aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64InstrInfo.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.td')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td108
1 files changed, 69 insertions, 39 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ebccc07edc7a..c8a697c8b82f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -33,6 +33,8 @@ def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">;
def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">;
+def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">,
+ AssemblerPredicate<(all_of HasV9_3aOps), "armv9.3a">;
def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">;
@@ -198,6 +200,10 @@ def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
AssemblerPredicate<(all_of FeatureBRBE), "brbe">;
def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">;
+def HasHBC : Predicate<"Subtarget->hasHBC()">,
+ AssemblerPredicate<(all_of FeatureHBC), "hbc">;
+def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
+ AssemblerPredicate<(all_of FeatureMOPS), "mops">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@@ -2362,7 +2368,12 @@ def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
-def Bcc : BranchCond;
+def Bcc : BranchCond<0, "b">;
+
+// Armv8.8-A variant form which hints to the branch predictor that
+// this branch is very likely to go the same way nearly all the time
+// (even though it is not known at compile time _which_ way that is).
+def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
//===----------------------------------------------------------------------===//
// Compare-and-branch instructions.
@@ -4500,9 +4511,9 @@ defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>;
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
- int_aarch64_neon_sqadd>;
+ int_aarch64_neon_sqrdmlah>;
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
- int_aarch64_neon_sqsub>;
+ int_aarch64_neon_sqrdmlsh>;
// Extra saturate patterns, other than the intrinsics matches above
defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
@@ -4769,15 +4780,11 @@ defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
let Predicates = [HasRDM] in {
defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
- def : Pat<(i32 (int_aarch64_neon_sqadd
- (i32 FPR32:$Rd),
- (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
- (i32 FPR32:$Rm))))),
+ def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))),
(SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
- def : Pat<(i32 (int_aarch64_neon_sqsub
- (i32 FPR32:$Rd),
- (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
- (i32 FPR32:$Rm))))),
+ def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))),
(SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
}
@@ -5342,19 +5349,6 @@ def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
(v2i32 (trunc (v2i64 V128:$Vm))))),
(UZP1v4i32 V128:$Vn, V128:$Vm)>;
-def : Pat<(v16i8 (concat_vectors
- (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
- (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
- (UZP2v16i8 V128:$Vn, V128:$Vm)>;
-def : Pat<(v8i16 (concat_vectors
- (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
- (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
- (UZP2v8i16 V128:$Vn, V128:$Vm)>;
-def : Pat<(v4i32 (concat_vectors
- (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
- (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
- (UZP2v4i32 V128:$Vn, V128:$Vm)>;
-
//----------------------------------------------------------------------------
// AdvSIMD TBL/TBX instructions
//----------------------------------------------------------------------------
@@ -5376,10 +5370,10 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
//----------------------------------------------------------------------------
-// AdvSIMD scalar CPY instruction
+// AdvSIMD scalar DUP instruction
//----------------------------------------------------------------------------
-defm CPY : SIMDScalarCPY<"mov">;
+defm DUP : SIMDScalarDUP<"mov">;
//----------------------------------------------------------------------------
// AdvSIMD scalar pairwise instructions
@@ -5790,7 +5784,7 @@ defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
// Floating point vector extractions are codegen'd as either a sequence of
-// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
+// subregister extractions, or a MOV (aka DUP here) if
// the lane number is anything other than zero.
def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
@@ -5803,13 +5797,13 @@ def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
- (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
+ (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
- (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
+ (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
- (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
+ (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
- (bf16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
+ (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
@@ -6407,9 +6401,9 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
int_aarch64_neon_sqsub>;
defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
- int_aarch64_neon_sqadd>;
+ int_aarch64_neon_sqrdmlah>;
defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
- int_aarch64_neon_sqsub>;
+ int_aarch64_neon_sqrdmlsh>;
defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
@@ -6425,6 +6419,22 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
VectorIndexS:$idx)),
(SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
+// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
+// have no common bits.
+def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
+ [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
+ if (N->getOpcode() == ISD::ADD)
+ return true;
+ return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
+}]> {
+ let GISelPredicateCode = [{
+ // Only handle G_ADD for now. FIXME. build capability to compute whether
+ // operands of G_OR have common bits set or not.
+ return MI.getOpcode() == TargetOpcode::G_ADD;
+ }];
+}
+
+
//----------------------------------------------------------------------------
// AdvSIMD scalar shift instructions
//----------------------------------------------------------------------------
@@ -6530,7 +6540,7 @@ defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
(AArch64srshri node:$MHS, node:$RHS))>>;
defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>;
defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
- TriOpFrag<(add node:$LHS,
+ TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vashr node:$MHS, node:$RHS))>>;
defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;
@@ -6543,7 +6553,7 @@ defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
(AArch64urshri node:$MHS, node:$RHS))>>;
defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
- TriOpFrag<(add node:$LHS,
+ TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vlshr node:$MHS, node:$RHS))>>;
//----------------------------------------------------------------------------
@@ -6585,7 +6595,7 @@ defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
- TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
+ TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
int_aarch64_neon_vcvtfxu2fp>;
defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
@@ -6601,7 +6611,7 @@ defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
- TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
+ TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
// RADDHN patterns for when RSHRN shifts by half the size of the vector element
def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
@@ -8106,7 +8116,7 @@ class NTStore128Pat<ValueType VT> :
Pat<(nontemporalstore (VT FPR128:$Rt),
(am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
(STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
- (CPYi64 FPR128:$Rt, (i64 1)),
+ (DUPi64 FPR128:$Rt, (i64 1)),
GPR64sp:$Rn, simm7s8:$offset)>;
def : NTStore128Pat<v2i64>;
@@ -8118,7 +8128,7 @@ class NTStore64Pat<ValueType VT> :
Pat<(nontemporalstore (VT FPR64:$Rt),
(am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
(STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
- (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
+ (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
GPR64sp:$Rn, simm7s4:$offset)>;
// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
@@ -8319,6 +8329,26 @@ let Predicates = [HasLS64] in {
def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
}
+let Predicates = [HasMOPS] in {
+ defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
+ defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
+ defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
+
+ defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
+ defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
+ defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
+
+ defm SETP : MOPSMemorySetInsns<0b00, "setp">;
+ defm SETM : MOPSMemorySetInsns<0b01, "setm">;
+ defm SETE : MOPSMemorySetInsns<0b10, "sete">;
+}
+let Predicates = [HasMOPS, HasMTE] in {
+ defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">;
+ defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">;
+ // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
+ defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
+}
+
let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in
def StoreSwiftAsyncContext
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),