diff options
Diffstat (limited to 'llvm/include/llvm/IR/IntrinsicsARM.td')
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsARM.td | 361 |
1 files changed, 308 insertions, 53 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 518ad7079225..df74e446b965 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -19,7 +19,7 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". // A space-consuming intrinsic primarily for testing ARMConstantIslands. The // first argument is the number of bytes this "instruction" takes up, the second // and return value are essentially chains, used to force ordering during ISel. -def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>; +def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>]>; // 16-bit multiplications def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">, @@ -262,59 +262,59 @@ def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty], // Coprocessor def int_arm_ldc : GCCBuiltin<"__builtin_arm_ldc">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; def int_arm_ldcl : GCCBuiltin<"__builtin_arm_ldcl">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; def int_arm_ldc2 : GCCBuiltin<"__builtin_arm_ldc2">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; def int_arm_ldc2l : GCCBuiltin<"__builtin_arm_ldc2l">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; // Move to coprocessor def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>; def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>; // Move from coprocessor def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">, MSBuiltin<"_MoveFromCoprocessor">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>; def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">, MSBuiltin<"_MoveFromCoprocessor2">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>; // Coprocessor data processing def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>; def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>; // Move from two registers to coprocessor def int_arm_mcrr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>; def int_arm_mcrr2 : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>; def int_arm_mrrc : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; def int_arm_mrrc2 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; //===----------------------------------------------------------------------===// // CRC32 @@ -695,16 +695,16 @@ def int_arm_neon_vst4 : Intrinsic<[], def int_arm_neon_vst1x2 : Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>; def int_arm_neon_vst1x3 : Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>; def int_arm_neon_vst1x4 : Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>; // Vector store N-element structure from one lane. // Source operands are: the address, the N vectors, the lane number, and @@ -773,6 +773,33 @@ class Neon_Dot_Intrinsic def int_arm_neon_udot : Neon_Dot_Intrinsic; def int_arm_neon_sdot : Neon_Dot_Intrinsic; +// v8.6-A Matrix Multiply Intrinsics +class Neon_MatMul_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, + LLVMMatchType<1>], + [IntrNoMem]>; +def int_arm_neon_ummla : Neon_MatMul_Intrinsic; +def int_arm_neon_smmla : Neon_MatMul_Intrinsic; +def int_arm_neon_usmmla : Neon_MatMul_Intrinsic; +def int_arm_neon_usdot : Neon_Dot_Intrinsic; + +// v8.6-A Bfloat Intrinsics +def int_arm_neon_vcvtfp2bf + : Intrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_arm_neon_vcvtbfp2bf + : Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>; + +def int_arm_neon_bfdot : Neon_Dot_Intrinsic; +def int_arm_neon_bfmmla : Neon_MatMul_Intrinsic; + +class Neon_FML_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem]>; +def int_arm_neon_bfmlalb : Neon_FML_Intrinsic; +def int_arm_neon_bfmlalt : Neon_FML_Intrinsic; + def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; @@ -795,14 +822,8 @@ def int_arm_mve_pred_i2v : Intrinsic< [llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>; def int_arm_mve_pred_v2i : Intrinsic< [llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>; - -multiclass IntrinsicSignSuffix<list<LLVMType> rets, list<LLVMType> params = [], - list<IntrinsicProperty> props = [], - string name = "", - list<SDNodeProperty> sdprops = []> { - def _s: Intrinsic<rets, params, props, name, sdprops>; - def _u: Intrinsic<rets, params, props, name, sdprops>; -} +def int_arm_mve_vreinterpretq : Intrinsic< + [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, @@ -876,11 +897,18 @@ def int_arm_mve_qsub_predicated: Intrinsic<[llvm_anyvector_ty], def int_arm_mve_hsub_predicated: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; - -defm int_arm_mve_minv: IntrinsicSignSuffix<[llvm_i32_ty], - [llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; -defm int_arm_mve_maxv: IntrinsicSignSuffix<[llvm_i32_ty], - [llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; +def int_arm_mve_vmina_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], + [IntrNoMem]>; +def int_arm_mve_vmaxa_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], + [IntrNoMem]>; +def int_arm_mve_vminnma_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], + [IntrNoMem]>; +def int_arm_mve_vmaxnma_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], + [IntrNoMem]>; multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params, LLVMType pred = llvm_anyvector_ty, @@ -897,8 +925,40 @@ multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params, LLVMMatchType<0>, rets[0])], props>; } +multiclass MVE_minmaxv { + defm v: MVEPredicated<[llvm_i32_ty], + [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>; + defm av: MVEPredicated<[llvm_i32_ty], + [llvm_i32_ty, llvm_anyvector_ty]>; + defm nmv: MVEPredicated<[llvm_anyfloat_ty], + [LLVMMatchType<0>, llvm_anyvector_ty]>; + defm nmav: MVEPredicated<[llvm_anyfloat_ty], + [LLVMMatchType<0>, llvm_anyvector_ty]>; +} +defm int_arm_mve_min: MVE_minmaxv; +defm int_arm_mve_max: MVE_minmaxv; + +defm int_arm_mve_addv: MVEPredicated<[llvm_i32_ty], + [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>; +defm int_arm_mve_addlv: MVEPredicated<[llvm_i64_ty], + [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>; + +// Intrinsic with a predicated and a non-predicated case. The predicated case +// has two additional parameters: inactive (the value for inactive lanes, can +// be undef) and predicate. +multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags, + list<LLVMType> params, LLVMType inactive, + LLVMType predicate, + list<IntrinsicProperty> props = [IntrNoMem]> { + def "": Intrinsic<rets, flags # params, props>; + def _predicated: Intrinsic<rets, flags # [inactive] # params # [predicate], + props>; +} + defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty], [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>; +defm int_arm_mve_vcvt_widen: MVEMXPredicated<[llvm_v4f32_ty], [], + [llvm_v8f16_ty, llvm_i32_ty], llvm_v4f32_ty, llvm_v4i1_ty>; defm int_arm_mve_vldr_gather_base: MVEPredicated< [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty], @@ -992,10 +1052,25 @@ def int_arm_mve_vabd: Intrinsic< def int_arm_mve_vadc: Intrinsic< [llvm_anyvector_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; +def int_arm_mve_vsbc: Intrinsic< + [llvm_anyvector_ty, llvm_i32_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; def int_arm_mve_vadc_predicated: Intrinsic< [llvm_anyvector_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; +def int_arm_mve_vsbc_predicated: Intrinsic< + [llvm_anyvector_ty, llvm_i32_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, + llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; +def int_arm_mve_vshlc: Intrinsic< + [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */, + llvm_i32_ty /* shift count */], [IntrNoMem]>; +def int_arm_mve_vshlc_predicated: Intrinsic< + [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */, + llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>; def int_arm_mve_vmulh: Intrinsic< [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */], @@ -1030,21 +1105,9 @@ def int_arm_mve_vmull_poly: Intrinsic< [llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>; -// Intrinsic with a predicated and a non-predicated case. The predicated case -// has two additional parameters: inactive (the value for inactive lanes, can -// be undef) and predicate. -multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags, - list<LLVMType> params, LLVMType inactive, - LLVMType predicate, - list<IntrinsicProperty> props = [IntrNoMem]> { - def "": Intrinsic<rets, flags # params, props>; - def _predicated: Intrinsic<rets, flags # [inactive] # params # [predicate], - props>; -} - // The first two parameters are compile-time constants: // * Halving: 0 means halving (vhcaddq), 1 means non-halving (vcaddq) -// instruction. Note: the flag is inverted to match the corresonding +// instruction. Note: the flag is inverted to match the corresponding // bit in the instruction encoding // * Rotation angle: 0 mean 90 deg, 1 means 180 deg defm int_arm_mve_vcaddq : MVEMXPredicated< @@ -1068,12 +1131,11 @@ defm int_arm_mve_vcmlaq : MVEPredicated< [llvm_i32_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], llvm_anyvector_ty>; -def int_arm_mve_vld2q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem]>; -def int_arm_mve_vld4q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem]>; +def int_arm_mve_vld2q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_arm_mve_vld4q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem]>; -def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem] ->; +def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly]>; +def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly]>; // MVE vector absolute difference and accumulate across vector // The first operand is an 'unsigned' flag. The remaining operands are: @@ -1121,4 +1183,197 @@ defm int_arm_mve_vrmlldavha: MVEPredicated< [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>; + +defm int_arm_mve_vidup: MVEMXPredicated< + [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [], + [llvm_i32_ty /* base */, llvm_i32_ty /* step */], + LLVMMatchType<0>, llvm_anyvector_ty>; +defm int_arm_mve_vddup: MVEMXPredicated< + [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [], + [llvm_i32_ty /* base */, llvm_i32_ty /* step */], + LLVMMatchType<0>, llvm_anyvector_ty>; +defm int_arm_mve_viwdup: MVEMXPredicated< + [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [], + [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */], + LLVMMatchType<0>, llvm_anyvector_ty>; +defm int_arm_mve_vdwdup: MVEMXPredicated< + [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [], + [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */], + LLVMMatchType<0>, llvm_anyvector_ty>; + +// Flags: +// * unsigned +defm int_arm_mve_vcvt_fix: MVEMXPredicated< + [llvm_anyvector_ty /* output */], [llvm_i32_ty], + [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */], + LLVMMatchType<0>, llvm_anyvector_ty>; + +def int_arm_mve_vcvt_fp_int_predicated: Intrinsic< + [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, + llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */], + [IntrNoMem]>; + +foreach suffix = ["a","n","p","m"] in { + defm "int_arm_mve_vcvt"#suffix: MVEMXPredicated< + [llvm_anyvector_ty /* output */], [llvm_i32_ty /* unsigned */], + [llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>; +} + +def int_arm_mve_vrintn: Intrinsic< + [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; +def int_arm_mve_vcls: Intrinsic< + [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; + +defm int_arm_mve_vbrsr: MVEMXPredicated< + [llvm_anyvector_ty], [], + [LLVMMatchType<0>, llvm_i32_ty], LLVMMatchType<0>, llvm_anyvector_ty>; + +def int_arm_mve_vqdmull: Intrinsic< + [llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], + [IntrNoMem]>; +def int_arm_mve_vqdmull_predicated: Intrinsic< + [llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty, + LLVMMatchType<0>], + [IntrNoMem]>; + +class MVESimpleUnaryPredicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; + +def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated; + +def int_arm_mve_vrev_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */, + llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; + +def int_arm_mve_vmovl_predicated: Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */, + llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>; +def int_arm_mve_vmovn_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */, + llvm_anyvector_ty /* predicate */], [IntrNoMem]>; + +def int_arm_mve_vqmovn: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, + llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */, + llvm_i32_ty /* top half */], [IntrNoMem]>; +def int_arm_mve_vqmovn_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, + llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */, + llvm_i32_ty /* top half */, llvm_anyvector_ty /* pred */], [IntrNoMem]>; + +def int_arm_mve_fma_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */, + LLVMMatchType<0> /* addend */, llvm_anyvector_ty /* pred */], [IntrNoMem]>; +def int_arm_mve_vmla_n_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */, + llvm_i32_ty /* mult op #2 (scalar) */, llvm_anyvector_ty /* pred */], + [IntrNoMem]>; +def int_arm_mve_vmlas_n_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */, + llvm_i32_ty /* addend (scalar) */, llvm_anyvector_ty /* pred */], + [IntrNoMem]>; + +defm int_arm_mve_vqdmlah: MVEPredicated<[llvm_anyvector_ty], + [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */, + llvm_i32_ty /* mult op #2 (scalar) */]>; +defm int_arm_mve_vqrdmlah: MVEPredicated<[llvm_anyvector_ty], + [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */, + llvm_i32_ty /* mult op #2 (scalar) */]>; +defm int_arm_mve_vqdmlash: MVEPredicated<[llvm_anyvector_ty], + [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */, + llvm_i32_ty /* addend (scalar) */]>; +defm int_arm_mve_vqrdmlash: MVEPredicated<[llvm_anyvector_ty], + [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */, + llvm_i32_ty /* addend (scalar) */]>; + +defm int_arm_mve_vqdmlad: MVEPredicated<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, + llvm_i32_ty /* exchange */, llvm_i32_ty /* round */, + llvm_i32_ty /* subtract */]>; + +// CDE (Custom Datapath Extension) + +multiclass CDEGPRIntrinsics<list<LLVMType> args> { + def "" : Intrinsic< + [llvm_i32_ty], + !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), + [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>; + def a : Intrinsic< + [llvm_i32_ty], + !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc */], args, + [llvm_i32_ty /* imm */]), + [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>; + + def d: Intrinsic< + [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */], + !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), + [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>; + def da: Intrinsic< + [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */], + !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc_lo */, + llvm_i32_ty /* acc_hi */], args, [llvm_i32_ty /* imm */]), + [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 3)>>]>; +} + +defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>; +defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>; +defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>; + +multiclass CDEVCXIntrinsics<list<LLVMType> args> { + def "" : Intrinsic< + [llvm_anyfloat_ty], + !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), + [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>; + def a : Intrinsic< + [llvm_anyfloat_ty], + !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */], + args, [llvm_i32_ty /* imm */]), + [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>; +} + +defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>; +defm int_arm_cde_vcx2 : CDEVCXIntrinsics<[LLVMMatchType<0>]>; +defm int_arm_cde_vcx3 : CDEVCXIntrinsics<[LLVMMatchType<0>, LLVMMatchType<0>]>; + +multiclass CDEVCXVecIntrinsics<list<LLVMType> args> { + def "" : Intrinsic< + [llvm_v16i8_ty], + !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), + [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>; + def a : Intrinsic< + [llvm_v16i8_ty], + !listconcat([llvm_i32_ty /* coproc */, llvm_v16i8_ty /* acc */], + args, [llvm_i32_ty /* imm */]), + [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>; + + def _predicated : Intrinsic< + [llvm_anyvector_ty], + !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* inactive */], + args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]), + [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>; + def a_predicated : Intrinsic< + [llvm_anyvector_ty], + !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */], + args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]), + [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>; +} + +defm int_arm_cde_vcx1q : CDEVCXVecIntrinsics<[]>; +defm int_arm_cde_vcx2q : CDEVCXVecIntrinsics<[llvm_v16i8_ty]>; +defm int_arm_cde_vcx3q : CDEVCXVecIntrinsics<[llvm_v16i8_ty, llvm_v16i8_ty]>; + } // end TargetPrefix |