diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.td')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 108 |
1 files changed, 69 insertions, 39 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index ebccc07edc7a..c8a697c8b82f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -33,6 +33,8 @@ def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">; def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">; +def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, + AssemblerPredicate<(all_of HasV9_3aOps), "armv9.3a">; def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">; @@ -198,6 +200,10 @@ def HasBRBE : Predicate<"Subtarget->hasBRBE()">, AssemblerPredicate<(all_of FeatureBRBE), "brbe">; def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">; +def HasHBC : Predicate<"Subtarget->hasHBC()">, + AssemblerPredicate<(all_of FeatureHBC), "hbc">; +def HasMOPS : Predicate<"Subtarget->hasMOPS()">, + AssemblerPredicate<(all_of FeatureMOPS), "mops">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -2362,7 +2368,12 @@ def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), //===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// -def Bcc : BranchCond; +def Bcc : BranchCond<0, "b">; + +// Armv8.8-A variant form which hints to the branch predictor that +// this branch is very likely to go the same way nearly all the time +// (even though it is not known at compile time _which_ way that is). +def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; //===----------------------------------------------------------------------===// // Compare-and-branch instructions. @@ -4500,9 +4511,9 @@ defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>; defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", - int_aarch64_neon_sqadd>; + int_aarch64_neon_sqrdmlah>; defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", - int_aarch64_neon_sqsub>; + int_aarch64_neon_sqrdmlsh>; // Extra saturate patterns, other than the intrinsics matches above defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>; @@ -4769,15 +4780,11 @@ defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; let Predicates = [HasRDM] in { defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; - def : Pat<(i32 (int_aarch64_neon_sqadd - (i32 FPR32:$Rd), - (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), - (i32 FPR32:$Rm))))), + def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn), + (i32 FPR32:$Rm))), (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(i32 (int_aarch64_neon_sqsub - (i32 FPR32:$Rd), - (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), - (i32 FPR32:$Rm))))), + def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn), + (i32 FPR32:$Rm))), (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; } @@ -5342,19 +5349,6 @@ def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), (v2i32 (trunc (v2i64 V128:$Vm))))), (UZP1v4i32 V128:$Vn, V128:$Vm)>; -def : Pat<(v16i8 (concat_vectors - (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), - (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), - (UZP2v16i8 V128:$Vn, V128:$Vm)>; -def : Pat<(v8i16 (concat_vectors - (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), - (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), - (UZP2v8i16 V128:$Vn, V128:$Vm)>; -def : Pat<(v4i32 (concat_vectors - (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), - (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), - (UZP2v4i32 V128:$Vn, V128:$Vm)>; - //---------------------------------------------------------------------------- // AdvSIMD TBL/TBX instructions //---------------------------------------------------------------------------- @@ -5376,10 +5370,10 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), //---------------------------------------------------------------------------- -// AdvSIMD scalar CPY instruction +// AdvSIMD scalar DUP instruction //---------------------------------------------------------------------------- -defm CPY : SIMDScalarCPY<"mov">; +defm DUP : SIMDScalarDUP<"mov">; //---------------------------------------------------------------------------- // AdvSIMD scalar pairwise instructions @@ -5790,7 +5784,7 @@ defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; // Floating point vector extractions are codegen'd as either a sequence of -// subregister extractions, or a MOV (aka CPY here, alias for DUP) if +// subregister extractions, or a MOV (aka DUP here) if // the lane number is anything other than zero. def : Pat<(vector_extract (v2f64 V128:$Rn), 0), (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; @@ -5803,13 +5797,13 @@ def : Pat<(vector_extract (v8bf16 V128:$Rn), 0), def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), - (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>; + (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), - (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>; + (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), - (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; + (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), - (bf16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; + (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; // All concat_vectors operations are canonicalised to act on i64 vectors for // AArch64. In the general case we need an instruction, which had just as well be @@ -6407,9 +6401,9 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", int_aarch64_neon_sqsub>; defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", - int_aarch64_neon_sqadd>; + int_aarch64_neon_sqrdmlah>; defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", - int_aarch64_neon_sqsub>; + int_aarch64_neon_sqrdmlsh>; defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; @@ -6425,6 +6419,22 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), VectorIndexS:$idx)), (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; +// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands +// have no common bits. +def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), + [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ + if (N->getOpcode() == ISD::ADD) + return true; + return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); +}]> { + let GISelPredicateCode = [{ + // Only handle G_ADD for now. FIXME. build capability to compute whether + // operands of G_OR have common bits set or not. + return MI.getOpcode() == TargetOpcode::G_ADD; + }]; +} + + //---------------------------------------------------------------------------- // AdvSIMD scalar shift instructions //---------------------------------------------------------------------------- @@ -6530,7 +6540,7 @@ defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", (AArch64srshri node:$MHS, node:$RHS))>>; defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", - TriOpFrag<(add node:$LHS, + TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", int_aarch64_neon_uqrshrn>; @@ -6543,7 +6553,7 @@ defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", (AArch64urshri node:$MHS, node:$RHS))>>; defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", - TriOpFrag<(add node:$LHS, + TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))>>; //---------------------------------------------------------------------------- @@ -6585,7 +6595,7 @@ defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", - TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; + TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", int_aarch64_neon_vcvtfxu2fp>; defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", @@ -6601,7 +6611,7 @@ defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", - TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; + TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; // RADDHN patterns for when RSHRN shifts by half the size of the vector element def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))), @@ -8106,7 +8116,7 @@ class NTStore128Pat<ValueType VT> : Pat<(nontemporalstore (VT FPR128:$Rt), (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), - (CPYi64 FPR128:$Rt, (i64 1)), + (DUPi64 FPR128:$Rt, (i64 1)), GPR64sp:$Rn, simm7s8:$offset)>; def : NTStore128Pat<v2i64>; @@ -8118,7 +8128,7 @@ class NTStore64Pat<ValueType VT> : Pat<(nontemporalstore (VT FPR64:$Rt), (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), - (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), + (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), GPR64sp:$Rn, simm7s4:$offset)>; // FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? @@ -8319,6 +8329,26 @@ let Predicates = [HasLS64] in { def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>; } +let Predicates = [HasMOPS] in { + defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; + defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; + defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; + + defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; + defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; + defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; + + defm SETP : MOPSMemorySetInsns<0b00, "setp">; + defm SETM : MOPSMemorySetInsns<0b01, "setm">; + defm SETE : MOPSMemorySetInsns<0b10, "sete">; +} +let Predicates = [HasMOPS, HasMTE] in { + defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; + defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; + // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td + defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; +} + let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in def StoreSwiftAsyncContext : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), |