diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-03 18:04:11 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-19 21:24:24 +0000 |
commit | 0c85e2760f6b5016c16d29f8c2f63f3ba2cf5298 (patch) | |
tree | d6c9033fa7ca2f632ddc81d371ef3faf921652db /contrib/llvm-project/llvm | |
parent | 92d4d6f1f60e5d9cb2c7e0dd5d632987e54741e8 (diff) | |
download | src-0c85e2760f6b5016c16d29f8c2f63f3ba2cf5298.tar.gz src-0c85e2760f6b5016c16d29f8c2f63f3ba2cf5298.zip |
Merge llvm-project main llvmorg-18-init-16003-gfc5f51cf5af4
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvm-project main llvmorg-18-init-16003-gfc5f51cf5af4.
PR: 276104
MFC after: 1 month
(cherry picked from commit 647cbc5de815c5651677bf8582797f716ec7b48d)
Diffstat (limited to 'contrib/llvm-project/llvm')
182 files changed, 4011 insertions, 2494 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/GenericUniformityImpl.h b/contrib/llvm-project/llvm/include/llvm/ADT/GenericUniformityImpl.h index b7d0a1228ebf..d397b937d78c 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/GenericUniformityImpl.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/GenericUniformityImpl.h @@ -33,6 +33,12 @@ /// the propagation of the impact of divergent control flow on the divergence of /// values (sync dependencies). /// +/// NOTE: In general, no interface exists for a transform to update +/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a +/// transitive dependence, but it also does not provide an interface for +/// updating itself. Given that, transforms should not preserve uniformity in +/// their getAnalysisUsage() callback. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h b/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h index 02fa28fc856d..a5b9eec50c82 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h @@ -89,30 +89,26 @@ public: /// Check for string equality. This is more efficient than compare() when /// the relative ordering of inequal strings isn't needed. - bool equals(StringRef RHS) const { - return str().equals(RHS); - } + [[nodiscard]] bool equals(StringRef RHS) const { return str().equals(RHS); } /// Check for string equality, ignoring case. - bool equals_insensitive(StringRef RHS) const { + [[nodiscard]] bool equals_insensitive(StringRef RHS) const { return str().equals_insensitive(RHS); } /// compare - Compare two strings; the result is negative, zero, or positive /// if this string is lexicographically less than, equal to, or greater than /// the \p RHS. - int compare(StringRef RHS) const { - return str().compare(RHS); - } + [[nodiscard]] int compare(StringRef RHS) const { return str().compare(RHS); } /// compare_insensitive - Compare two strings, ignoring case. - int compare_insensitive(StringRef RHS) const { + [[nodiscard]] int compare_insensitive(StringRef RHS) const { return str().compare_insensitive(RHS); } /// compare_numeric - Compare two strings, treating sequences of digits as /// numbers. - int compare_numeric(StringRef RHS) const { + [[nodiscard]] int compare_numeric(StringRef RHS) const { return str().compare_numeric(RHS); } @@ -121,10 +117,14 @@ public: /// @{ /// starts_with - Check if this string starts with the given \p Prefix. - bool starts_with(StringRef Prefix) const { return str().starts_with(Prefix); } + [[nodiscard]] bool starts_with(StringRef Prefix) const { + return str().starts_with(Prefix); + } /// ends_with - Check if this string ends with the given \p Suffix. - bool ends_with(StringRef Suffix) const { return str().ends_with(Suffix); } + [[nodiscard]] bool ends_with(StringRef Suffix) const { + return str().ends_with(Suffix); + } /// @} /// @name String Searching @@ -134,7 +134,7 @@ public: /// /// \return - The index of the first occurrence of \p C, or npos if not /// found. - size_t find(char C, size_t From = 0) const { + [[nodiscard]] size_t find(char C, size_t From = 0) const { return str().find(C, From); } @@ -142,7 +142,7 @@ public: /// /// \returns The index of the first occurrence of \p Str, or npos if not /// found. - size_t find(StringRef Str, size_t From = 0) const { + [[nodiscard]] size_t find(StringRef Str, size_t From = 0) const { return str().find(Str, From); } @@ -150,7 +150,7 @@ public: /// /// \returns The index of the last occurrence of \p C, or npos if not /// found. - size_t rfind(char C, size_t From = StringRef::npos) const { + [[nodiscard]] size_t rfind(char C, size_t From = StringRef::npos) const { return str().rfind(C, From); } @@ -158,13 +158,11 @@ public: /// /// \returns The index of the last occurrence of \p Str, or npos if not /// found. - size_t rfind(StringRef Str) const { - return str().rfind(Str); - } + [[nodiscard]] size_t rfind(StringRef Str) const { return str().rfind(Str); } /// Find the first character in the string that is \p C, or npos if not /// found. Same as find. - size_t find_first_of(char C, size_t From = 0) const { + [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const { return str().find_first_of(C, From); } @@ -172,13 +170,13 @@ public: /// not found. /// /// Complexity: O(size() + Chars.size()) - size_t find_first_of(StringRef Chars, size_t From = 0) const { + [[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = 0) const { return str().find_first_of(Chars, From); } /// Find the first character in the string that is not \p C or npos if not /// found. - size_t find_first_not_of(char C, size_t From = 0) const { + [[nodiscard]] size_t find_first_not_of(char C, size_t From = 0) const { return str().find_first_not_of(C, From); } @@ -186,13 +184,15 @@ public: /// \p Chars, or npos if not found. /// /// Complexity: O(size() + Chars.size()) - size_t find_first_not_of(StringRef Chars, size_t From = 0) const { + [[nodiscard]] size_t find_first_not_of(StringRef Chars, + size_t From = 0) const { return str().find_first_not_of(Chars, From); } /// Find the last character in the string that is \p C, or npos if not /// found. - size_t find_last_of(char C, size_t From = StringRef::npos) const { + [[nodiscard]] size_t find_last_of(char C, + size_t From = StringRef::npos) const { return str().find_last_of(C, From); } @@ -200,8 +200,8 @@ public: /// found. /// /// Complexity: O(size() + Chars.size()) - size_t find_last_of( - StringRef Chars, size_t From = StringRef::npos) const { + [[nodiscard]] size_t find_last_of(StringRef Chars, + size_t From = StringRef::npos) const { return str().find_last_of(Chars, From); } @@ -210,15 +210,11 @@ public: /// @{ /// Return the number of occurrences of \p C in the string. - size_t count(char C) const { - return str().count(C); - } + [[nodiscard]] size_t count(char C) const { return str().count(C); } /// Return the number of non-overlapped occurrences of \p Str in the /// string. - size_t count(StringRef Str) const { - return str().count(Str); - } + [[nodiscard]] size_t count(StringRef Str) const { return str().count(Str); } /// @} /// @name Substring Operations @@ -233,7 +229,8 @@ public: /// \param N The number of characters to included in the substring. If \p N /// exceeds the number of characters remaining in the string, the string /// suffix (starting with \p Start) will be returned. - StringRef substr(size_t Start, size_t N = StringRef::npos) const { + [[nodiscard]] StringRef substr(size_t Start, + size_t N = StringRef::npos) const { return str().substr(Start, N); } @@ -247,14 +244,16 @@ public: /// substring. If this is npos, or less than \p Start, or exceeds the /// number of characters remaining in the string, the string suffix /// (starting with \p Start) will be returned. - StringRef slice(size_t Start, size_t End) const { + [[nodiscard]] StringRef slice(size_t Start, size_t End) const { return str().slice(Start, End); } // Extra methods. /// Explicit conversion to StringRef. - StringRef str() const { return StringRef(this->data(), this->size()); } + [[nodiscard]] StringRef str() const { + return StringRef(this->data(), this->size()); + } // TODO: Make this const, if it's safe... const char* c_str() { diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h index 5d3bc64bf8b4..7b02b618f7cb 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -54,9 +54,6 @@ class ConstraintSystem { /// constraint system. DenseMap<Value *, unsigned> Value2Index; - /// Current greatest common divisor for all coefficients in the system. - uint32_t GCD = 1; - // Eliminate constraints from the system using Fourier–Motzkin elimination. bool eliminateUsingFM(); @@ -88,10 +85,6 @@ public: for (const auto &[Idx, C] : enumerate(R)) { if (C == 0) continue; - auto A = std::abs(C); - GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD}) - .getZExtValue(); - NewRow.emplace_back(C, Idx); } if (Constraints.empty()) diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h index 735be3680aea..048912beaba5 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1243,6 +1243,18 @@ public: ArrayRef<const Value *> Args = ArrayRef<const Value *>(), const Instruction *CxtI = nullptr) const; + /// Returns the cost estimation for alternating opcode pattern that can be + /// lowered to a single instruction on the target. In X86 this is for the + /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in + /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being + /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0` + /// when \p Opcode0 is selected and `1` when Opcode1 is selected. + /// \p VecTy is the vector type of the instruction to be generated. + InstructionCost getAltInstrCost( + VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + /// \return The cost of a shuffle instruction of kind Kind and of type Tp. /// The exact mask may be passed as Mask, or else the array will be empty. /// The index and subtype parameters are used by the subvector insertion and @@ -1944,6 +1956,10 @@ public: unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0; + virtual InstructionCost getAltInstrCost( + VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0; virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, @@ -2555,6 +2571,12 @@ public: return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, Args, CxtI); } + InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, + unsigned Opcode1, + const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind) const override { + return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind); + } InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 1d8f523e9792..7ad3ce512a35 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -554,6 +554,13 @@ public: return 1; } + InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, + unsigned Opcode1, + const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind) const { + return InstructionCost::getInvalid(); + } + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index e7debc652a0a..dcc1a4580b14 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -769,9 +769,6 @@ public: bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); - /// Fold boolean selects to logical operations. - bool matchSelectToLogical(MachineInstr &MI, BuildFnTy &MatchInfo); - bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info); /// Transform G_ADD(x, G_SUB(y, x)) to y. @@ -814,6 +811,9 @@ public: // Given a binop \p MI, commute operands 1 and 2. void applyCommuteBinOpOperands(MachineInstr &MI); + /// Combine selects. + bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo); + private: /// Checks for legality of an indexed variant of \p LdSt. bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const; @@ -904,6 +904,18 @@ private: /// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0 bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal, Register FalseVal, BuildFnTy &MatchInfo); + + /// Try to fold selects to logical operations. + bool tryFoldBoolSelectToLogic(GSelect *Select, BuildFnTy &MatchInfo); + + bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo); + + bool isOneOrOneSplat(Register Src, bool AllowUndefs); + bool isZeroOrZeroSplat(Register Src, bool AllowUndefs); + bool isConstantSplatVector(Register Src, int64_t SplatValue, + bool AllowUndefs); + + std::optional<APInt> getConstantOrConstantSplatVector(Register Src); }; } // namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index abbef03d02cb..669104307fa0 100644 --- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2562,6 +2562,13 @@ public: AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); + InsertPointTy createAtomicCompare(const LocationDescription &Loc, + AtomicOpValue &X, AtomicOpValue &V, + AtomicOpValue &R, Value *E, Value *D, + AtomicOrdering AO, + omp::OMPAtomicCompareOp Op, + bool IsXBinopExpr, bool IsPostfixUpdate, + bool IsFailOnly, AtomicOrdering Failure); /// Create the control flow structure of a canonical OpenMP loop. /// diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index cb48f54b13a6..531b11123545 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +def global_ptr_ty : LLVMQualPointerType<1>; + class AMDGPUReadPreloadRegisterIntrinsic : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; @@ -2353,10 +2355,10 @@ def int_amdgcn_s_get_waveid_in_workgroup : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; -class AMDGPUAtomicRtn<LLVMType vt> : Intrinsic < +class AMDGPUAtomicRtn<LLVMType vt, LLVMType pt = llvm_anyptr_ty> : Intrinsic < [vt], - [llvm_anyptr_ty, // vaddr - vt], // vdata(VGPR) + [pt, // vaddr + vt], // vdata(VGPR) [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>; @@ -2486,6 +2488,8 @@ def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var" [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>; +def int_amdgcn_global_atomic_ordered_add_b64 : AMDGPUAtomicRtn<llvm_i64_ty, global_ptr_ty>; + def int_amdgcn_flat_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>; def int_amdgcn_flat_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>; def int_amdgcn_global_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>; diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index 2de2cf4185d8..84cac3ef700e 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -127,12 +127,20 @@ enum : int32_t { #undef COMPUTE_PGM_RSRC1 // Compute program resource register 2. Must match hardware definition. +// GFX6+. #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH) +// [GFX6-GFX11]. +#define COMPUTE_PGM_RSRC2_GFX6_GFX11(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX11_##NAME, SHIFT, WIDTH) +// GFX12+. +#define COMPUTE_PGM_RSRC2_GFX12_PLUS(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX12_PLUS_##NAME, SHIFT, WIDTH) enum : int32_t { COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1), COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5), - COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1), + COMPUTE_PGM_RSRC2_GFX6_GFX11(ENABLE_TRAP_HANDLER, 6, 1), + COMPUTE_PGM_RSRC2_GFX12_PLUS(RESERVED1, 6, 1), COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1), COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1), COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1), @@ -166,23 +174,37 @@ enum : int32_t { // Compute program resource register 3 for GFX10+. Must match hardware // definition. -// [GFX10]. -#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \ - AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH) // GFX10+. #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH) +// [GFX10]. +#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_##NAME, SHIFT, WIDTH) +// [GFX10-GFX11]. +#define COMPUTE_PGM_RSRC3_GFX10_GFX11(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_GFX11_##NAME, SHIFT, WIDTH) // GFX11+. #define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH) +// [GFX11]. +#define COMPUTE_PGM_RSRC3_GFX11(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_##NAME, SHIFT, WIDTH) +// GFX12+. +#define COMPUTE_PGM_RSRC3_GFX12_PLUS(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX12_PLUS_##NAME, SHIFT, WIDTH) enum : int32_t { - COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), - COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 8), - COMPUTE_PGM_RSRC3_GFX11_PLUS(INST_PREF_SIZE, 4, 6), - COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_START, 10, 1), - COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_END, 11, 1), - COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED1, 12, 19), - COMPUTE_PGM_RSRC3_GFX10(RESERVED2, 31, 1), + COMPUTE_PGM_RSRC3_GFX10_GFX11(SHARED_VGPR_COUNT, 0, 4), + COMPUTE_PGM_RSRC3_GFX12_PLUS(RESERVED0, 0, 4), + COMPUTE_PGM_RSRC3_GFX10(RESERVED1, 4, 8), + COMPUTE_PGM_RSRC3_GFX11(INST_PREF_SIZE, 4, 6), + COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_START, 10, 1), + COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_END, 11, 1), + COMPUTE_PGM_RSRC3_GFX12_PLUS(INST_PREF_SIZE, 4, 8), + COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED2, 12, 1), + COMPUTE_PGM_RSRC3_GFX10_GFX11(RESERVED3, 13, 1), + COMPUTE_PGM_RSRC3_GFX12_PLUS(GLG_EN, 13, 1), + COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED4, 14, 17), + COMPUTE_PGM_RSRC3_GFX10(RESERVED5, 31, 1), COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1), }; #undef COMPUTE_PGM_RSRC3_GFX10_PLUS diff --git a/contrib/llvm-project/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/contrib/llvm-project/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h index b0683ac2e32c..3aceb247a26c 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -70,7 +70,8 @@ enum attributeBits { ATTR_EVEXKZ = 0x1 << 11, ATTR_EVEXB = 0x1 << 12, ATTR_REX2 = 0x1 << 13, - ATTR_max = 0x1 << 14, + ATTR_EVEXNF = 0x1 << 14, + ATTR_max = 0x1 << 15, }; // Combinations of the above attributes that are relevant to instruction @@ -137,12 +138,15 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_L_W_XD, 5, "requires VEX, L, W and XD prefix") \ ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") \ ENUM_ENTRY(IC_EVEX, 1, "requires an EVEX prefix") \ + ENUM_ENTRY(IC_EVEX_NF, 2, "requires EVEX and NF prefix") \ ENUM_ENTRY(IC_EVEX_XS, 2, "requires EVEX and the XS prefix") \ ENUM_ENTRY(IC_EVEX_XD, 2, "requires EVEX and the XD prefix") \ ENUM_ENTRY(IC_EVEX_OPSIZE, 2, "requires EVEX and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_NF, 3, "requires EVEX, NF and the OpSize prefix") \ ENUM_ENTRY(IC_EVEX_OPSIZE_ADSIZE, 3, \ "requires EVEX, OPSIZE and the ADSIZE prefix") \ ENUM_ENTRY(IC_EVEX_W, 3, "requires EVEX and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_NF, 4, "requires EVEX, W and NF prefix") \ ENUM_ENTRY(IC_EVEX_W_XS, 4, "requires EVEX, W, and XS prefix") \ ENUM_ENTRY(IC_EVEX_W_XD, 4, "requires EVEX, W, and XD prefix") \ ENUM_ENTRY(IC_EVEX_W_OPSIZE, 4, "requires EVEX, W, and OpSize") \ @@ -187,10 +191,13 @@ enum attributeBits { ENUM_ENTRY(IC_EVEX_L2_W_XD_K, 4, "requires EVEX_K, L2, W and XD prefix") \ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K, 4, "requires EVEX_K, L2, W and OpSize") \ ENUM_ENTRY(IC_EVEX_B, 1, "requires an EVEX_B prefix") \ + ENUM_ENTRY(IC_EVEX_B_NF, 2, "requires EVEX_NF and EVEX_B prefix") \ ENUM_ENTRY(IC_EVEX_XS_B, 2, "requires EVEX_B and the XS prefix") \ ENUM_ENTRY(IC_EVEX_XD_B, 2, "requires EVEX_B and the XD prefix") \ ENUM_ENTRY(IC_EVEX_OPSIZE_B, 2, "requires EVEX_B and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_B_NF, 3, "requires EVEX_B, NF and Opsize prefix") \ ENUM_ENTRY(IC_EVEX_W_B, 3, "requires EVEX_B and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_B_NF, 4, "requires EVEX_NF, EVEX_B and the W prefix") \ ENUM_ENTRY(IC_EVEX_W_XS_B, 4, "requires EVEX_B, W, and XS prefix") \ ENUM_ENTRY(IC_EVEX_W_XD_B, 4, "requires EVEX_B, W, and XD prefix") \ ENUM_ENTRY(IC_EVEX_W_OPSIZE_B, 4, "requires EVEX_B, W, and OpSize") \ diff --git a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td index 77db371adaf7..6bda80681432 100644 --- a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -437,13 +437,6 @@ def select_constant_cmp: GICombineRule< (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }]) >; -def select_to_logical : GICombineRule< - (defs root:$root, build_fn_matchinfo:$matchinfo), - (match (wip_match_opcode G_SELECT):$root, - [{ return Helper.matchSelectToLogical(*${root}, ${matchinfo}); }]), - (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }]) ->; - // Fold (C op x) -> (x op C) // TODO: handle more isCommutable opcodes // TODO: handle compares (currently not marked as isCommutable) @@ -1242,6 +1235,12 @@ def select_to_minmax: GICombineRule< [{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; +def match_selects : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchSelect(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -1282,7 +1281,7 @@ def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, def phi_combines : GICombineGroup<[extend_through_phis]>; def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp, - select_to_logical]>; + match_selects]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, mul_by_neg_one, idempotent_prop]>; diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp index 8a802515b6f4..35bdd869a88d 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp @@ -29,7 +29,6 @@ bool ConstraintSystem::eliminateUsingFM() { assert(!Constraints.empty() && "should only be called for non-empty constraint systems"); - uint32_t NewGCD = 1; unsigned LastIdx = NumVariables - 1; // First, either remove the variable in place if it is 0 or add the row to @@ -96,24 +95,20 @@ bool ConstraintSystem::eliminateUsingFM() { IdxUpper++; } - if (MulOverflow(UpperV, ((-1) * LowerLast / GCD), M1)) + if (MulOverflow(UpperV, ((-1) * LowerLast), M1)) return false; if (IdxLower < LowerRow.size() && LowerRow[IdxLower].Id == CurrentId) { LowerV = LowerRow[IdxLower].Coefficient; IdxLower++; } - if (MulOverflow(LowerV, (UpperLast / GCD), M2)) + if (MulOverflow(LowerV, (UpperLast), M2)) return false; if (AddOverflow(M1, M2, N)) return false; if (N == 0) continue; NR.emplace_back(N, CurrentId); - - NewGCD = - APIntOps::GreatestCommonDivisor({32, (uint32_t)N}, {32, NewGCD}) - .getZExtValue(); } if (NR.empty()) continue; @@ -124,7 +119,6 @@ bool ConstraintSystem::eliminateUsingFM() { } } NumVariables -= 1; - GCD = NewGCD; return true; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp index 5beac5547d65..78a833476334 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp @@ -1189,14 +1189,26 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = simplifyDivRem(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - // If this is an exact divide by a constant, then the dividend (Op0) must have - // at least as many trailing zeros as the divisor to divide evenly. If it has - // less trailing zeros, then the result must be poison. const APInt *DivC; - if (IsExact && match(Op1, m_APInt(DivC)) && DivC->countr_zero()) { - KnownBits KnownOp0 = computeKnownBits(Op0, /* Depth */ 0, Q); - if (KnownOp0.countMaxTrailingZeros() < DivC->countr_zero()) - return PoisonValue::get(Op0->getType()); + if (IsExact && match(Op1, m_APInt(DivC))) { + // If this is an exact divide by a constant, then the dividend (Op0) must + // have at least as many trailing zeros as the divisor to divide evenly. If + // it has less trailing zeros, then the result must be poison. + if (DivC->countr_zero()) { + KnownBits KnownOp0 = computeKnownBits(Op0, /* Depth */ 0, Q); + if (KnownOp0.countMaxTrailingZeros() < DivC->countr_zero()) + return PoisonValue::get(Op0->getType()); + } + + // udiv exact (mul nsw X, C), C --> X + // sdiv exact (mul nuw X, C), C --> X + // where C is not a power of 2. + Value *X; + if (!DivC->isPowerOf2() && + (Opcode == Instruction::UDiv + ? match(Op0, m_NSWMul(m_Value(X), m_Specific(Op1))) + : match(Op0, m_NUWMul(m_Value(X), m_Specific(Op1))))) + return X; } return nullptr; @@ -4857,14 +4869,12 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, // select ?, poison, X -> X // select ?, undef, X -> X if (isa<PoisonValue>(TrueVal) || - (Q.isUndefValue(TrueVal) && - isGuaranteedNotToBePoison(FalseVal, Q.AC, Q.CxtI, Q.DT))) + (Q.isUndefValue(TrueVal) && impliesPoison(FalseVal, Cond))) return FalseVal; // select ?, X, poison -> X // select ?, X, undef -> X if (isa<PoisonValue>(FalseVal) || - (Q.isUndefValue(FalseVal) && - isGuaranteedNotToBePoison(TrueVal, Q.AC, Q.CxtI, Q.DT))) + (Q.isUndefValue(FalseVal) && impliesPoison(TrueVal, Cond))) return TrueVal; // Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC'' diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp index 89cc7ea15ec1..360fc594ef7c 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp @@ -434,6 +434,28 @@ class LazyValueInfoImpl { void solve(); + // For the following methods, if UseBlockValue is true, the function may + // push additional values to the worklist and return nullopt. If + // UseBlockValue is false, it will never return nullopt. + + std::optional<ValueLatticeElement> + getValueFromSimpleICmpCondition(CmpInst::Predicate Pred, Value *RHS, + const APInt &Offset, Instruction *CxtI, + bool UseBlockValue); + + std::optional<ValueLatticeElement> + getValueFromICmpCondition(Value *Val, ICmpInst *ICI, bool isTrueDest, + bool UseBlockValue); + + std::optional<ValueLatticeElement> + getValueFromCondition(Value *Val, Value *Cond, bool IsTrueDest, + bool UseBlockValue, unsigned Depth = 0); + + std::optional<ValueLatticeElement> getEdgeValueLocal(Value *Val, + BasicBlock *BBFrom, + BasicBlock *BBTo, + bool UseBlockValue); + public: /// This is the query interface to determine the lattice value for the /// specified Value* at the context instruction (if specified) or at the @@ -755,14 +777,10 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) { return Result; } -static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, - bool isTrueDest = true, - unsigned Depth = 0); - // If we can determine a constraint on the value given conditions assumed by // the program, intersect those constraints with BBLV void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( - Value *Val, ValueLatticeElement &BBLV, Instruction *BBI) { + Value *Val, ValueLatticeElement &BBLV, Instruction *BBI) { BBI = BBI ? BBI : dyn_cast<Instruction>(Val); if (!BBI) return; @@ -779,17 +797,21 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( if (I->getParent() != BB || !isValidAssumeForContext(I, BBI)) continue; - BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0))); + BBLV = intersect(BBLV, *getValueFromCondition(Val, I->getArgOperand(0), + /*IsTrueDest*/ true, + /*UseBlockValue*/ false)); } // If guards are not used in the module, don't spend time looking for them if (GuardDecl && !GuardDecl->use_empty() && BBI->getIterator() != BB->begin()) { - for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()), - BB->rend())) { + for (Instruction &I : + make_range(std::next(BBI->getIterator().getReverse()), BB->rend())) { Value *Cond = nullptr; if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond)))) - BBLV = intersect(BBLV, getValueFromCondition(Val, Cond)); + BBLV = intersect(BBLV, + *getValueFromCondition(Val, Cond, /*IsTrueDest*/ true, + /*UseBlockValue*/ false)); } } @@ -886,10 +908,14 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) { // If the value is undef, a different value may be chosen in // the select condition. if (isGuaranteedNotToBeUndef(Cond, AC)) { - TrueVal = intersect(TrueVal, - getValueFromCondition(SI->getTrueValue(), Cond, true)); - FalseVal = intersect( - FalseVal, getValueFromCondition(SI->getFalseValue(), Cond, false)); + TrueVal = + intersect(TrueVal, *getValueFromCondition(SI->getTrueValue(), Cond, + /*IsTrueDest*/ true, + /*UseBlockValue*/ false)); + FalseVal = + intersect(FalseVal, *getValueFromCondition(SI->getFalseValue(), Cond, + /*IsTrueDest*/ false, + /*UseBlockValue*/ false)); } ValueLatticeElement Result = TrueVal; @@ -950,9 +976,11 @@ LazyValueInfoImpl::solveBlockValueBinaryOpImpl( // lets us pick up facts from expressions like "and i32 (call i32 // @foo()), 32" std::optional<ConstantRange> LHSRes = getRangeFor(I->getOperand(0), I, BB); + if (!LHSRes) + return std::nullopt; + std::optional<ConstantRange> RHSRes = getRangeFor(I->getOperand(1), I, BB); - if (!LHSRes || !RHSRes) - // More work to do before applying this transfer rule. + if (!RHSRes) return std::nullopt; const ConstantRange &LHSRange = *LHSRes; @@ -1068,15 +1096,26 @@ static bool matchICmpOperand(APInt &Offset, Value *LHS, Value *Val, } /// Get value range for a "(Val + Offset) Pred RHS" condition. -static ValueLatticeElement getValueFromSimpleICmpCondition( - CmpInst::Predicate Pred, Value *RHS, const APInt &Offset) { +std::optional<ValueLatticeElement> +LazyValueInfoImpl::getValueFromSimpleICmpCondition(CmpInst::Predicate Pred, + Value *RHS, + const APInt &Offset, + Instruction *CxtI, + bool UseBlockValue) { ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(), /*isFullSet=*/true); - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { RHSRange = ConstantRange(CI->getValue()); - else if (Instruction *I = dyn_cast<Instruction>(RHS)) + } else if (UseBlockValue) { + std::optional<ValueLatticeElement> R = + getBlockValue(RHS, CxtI->getParent(), CxtI); + if (!R) + return std::nullopt; + RHSRange = toConstantRange(*R, RHS->getType()); + } else if (Instruction *I = dyn_cast<Instruction>(RHS)) { if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) RHSRange = getConstantRangeFromMetadata(*Ranges); + } ConstantRange TrueValues = ConstantRange::makeAllowedICmpRegion(Pred, RHSRange); @@ -1103,8 +1142,8 @@ getRangeViaSLT(CmpInst::Predicate Pred, APInt RHS, return std::nullopt; } -static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, - bool isTrueDest) { +std::optional<ValueLatticeElement> LazyValueInfoImpl::getValueFromICmpCondition( + Value *Val, ICmpInst *ICI, bool isTrueDest, bool UseBlockValue) { Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); @@ -1128,11 +1167,13 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, unsigned BitWidth = Ty->getScalarSizeInBits(); APInt Offset(BitWidth, 0); if (matchICmpOperand(Offset, LHS, Val, EdgePred)) - return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset); + return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset, ICI, + UseBlockValue); CmpInst::Predicate SwappedPred = CmpInst::getSwappedPredicate(EdgePred); if (matchICmpOperand(Offset, RHS, Val, SwappedPred)) - return getValueFromSimpleICmpCondition(SwappedPred, LHS, Offset); + return getValueFromSimpleICmpCondition(SwappedPred, LHS, Offset, ICI, + UseBlockValue); const APInt *Mask, *C; if (match(LHS, m_And(m_Specific(Val), m_APInt(Mask))) && @@ -1212,10 +1253,12 @@ static ValueLatticeElement getValueFromOverflowCondition( return ValueLatticeElement::getRange(NWR); } -static ValueLatticeElement getValueFromCondition( - Value *Val, Value *Cond, bool IsTrueDest, unsigned Depth) { +std::optional<ValueLatticeElement> +LazyValueInfoImpl::getValueFromCondition(Value *Val, Value *Cond, + bool IsTrueDest, bool UseBlockValue, + unsigned Depth) { if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond)) - return getValueFromICmpCondition(Val, ICI, IsTrueDest); + return getValueFromICmpCondition(Val, ICI, IsTrueDest, UseBlockValue); if (auto *EVI = dyn_cast<ExtractValueInst>(Cond)) if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand())) @@ -1227,7 +1270,7 @@ static ValueLatticeElement getValueFromCondition( Value *N; if (match(Cond, m_Not(m_Value(N)))) - return getValueFromCondition(Val, N, !IsTrueDest, Depth); + return getValueFromCondition(Val, N, !IsTrueDest, UseBlockValue, Depth); Value *L, *R; bool IsAnd; @@ -1238,19 +1281,25 @@ static ValueLatticeElement getValueFromCondition( else return ValueLatticeElement::getOverdefined(); - ValueLatticeElement LV = getValueFromCondition(Val, L, IsTrueDest, Depth); - ValueLatticeElement RV = getValueFromCondition(Val, R, IsTrueDest, Depth); + std::optional<ValueLatticeElement> LV = + getValueFromCondition(Val, L, IsTrueDest, UseBlockValue, Depth); + if (!LV) + return std::nullopt; + std::optional<ValueLatticeElement> RV = + getValueFromCondition(Val, R, IsTrueDest, UseBlockValue, Depth); + if (!RV) + return std::nullopt; // if (L && R) -> intersect L and R // if (!(L || R)) -> intersect !L and !R // if (L || R) -> union L and R // if (!(L && R)) -> union !L and !R if (IsTrueDest ^ IsAnd) { - LV.mergeIn(RV); - return LV; + LV->mergeIn(*RV); + return *LV; } - return intersect(LV, RV); + return intersect(*LV, *RV); } // Return true if Usr has Op as an operand, otherwise false. @@ -1302,8 +1351,9 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op, } /// Compute the value of Val on the edge BBFrom -> BBTo. -static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, - BasicBlock *BBTo) { +std::optional<ValueLatticeElement> +LazyValueInfoImpl::getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, + BasicBlock *BBTo, bool UseBlockValue) { // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we // know that v != 0. if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { @@ -1324,13 +1374,16 @@ static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // If the condition of the branch is an equality comparison, we may be // able to infer the value. - ValueLatticeElement Result = getValueFromCondition(Val, Condition, - isTrueDest); - if (!Result.isOverdefined()) + std::optional<ValueLatticeElement> Result = + getValueFromCondition(Val, Condition, isTrueDest, UseBlockValue); + if (!Result) + return std::nullopt; + + if (!Result->isOverdefined()) return Result; if (User *Usr = dyn_cast<User>(Val)) { - assert(Result.isOverdefined() && "Result isn't overdefined"); + assert(Result->isOverdefined() && "Result isn't overdefined"); // Check with isOperationFoldable() first to avoid linearly iterating // over the operands unnecessarily which can be expensive for // instructions with many operands. @@ -1356,8 +1409,8 @@ static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // br i1 %Condition, label %then, label %else for (unsigned i = 0; i < Usr->getNumOperands(); ++i) { Value *Op = Usr->getOperand(i); - ValueLatticeElement OpLatticeVal = - getValueFromCondition(Op, Condition, isTrueDest); + ValueLatticeElement OpLatticeVal = *getValueFromCondition( + Op, Condition, isTrueDest, /*UseBlockValue*/ false); if (std::optional<APInt> OpConst = OpLatticeVal.asConstantInteger()) { Result = constantFoldUser(Usr, Op, *OpConst, DL); @@ -1367,7 +1420,7 @@ static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, } } } - if (!Result.isOverdefined()) + if (!Result->isOverdefined()) return Result; } } @@ -1432,8 +1485,12 @@ LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, if (Constant *VC = dyn_cast<Constant>(Val)) return ValueLatticeElement::get(VC); - ValueLatticeElement LocalResult = getEdgeValueLocal(Val, BBFrom, BBTo); - if (hasSingleValue(LocalResult)) + std::optional<ValueLatticeElement> LocalResult = + getEdgeValueLocal(Val, BBFrom, BBTo, /*UseBlockValue*/ true); + if (!LocalResult) + return std::nullopt; + + if (hasSingleValue(*LocalResult)) // Can't get any more precise here return LocalResult; @@ -1453,7 +1510,7 @@ LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, // but then the result is not cached. intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, CxtI); - return intersect(LocalResult, InBlock); + return intersect(*LocalResult, InBlock); } ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, @@ -1499,10 +1556,12 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, std::optional<ValueLatticeElement> Result = getEdgeValue(V, FromBB, ToBB, CxtI); - if (!Result) { + while (!Result) { + // As the worklist only explicitly tracks block values (but not edge values) + // we may have to call solve() multiple times, as the edge value calculation + // may request additional block values. solve(); Result = getEdgeValue(V, FromBB, ToBB, CxtI); - assert(Result && "More work to do after problem solved?"); } LLVM_DEBUG(dbgs() << " Result = " << *Result << "\n"); @@ -1528,13 +1587,17 @@ ValueLatticeElement LazyValueInfoImpl::getValueAtUse(const Use &U) { if (!isGuaranteedNotToBeUndef(SI->getCondition(), AC)) break; if (CurrU->getOperandNo() == 1) - CondVal = getValueFromCondition(V, SI->getCondition(), true); + CondVal = + *getValueFromCondition(V, SI->getCondition(), /*IsTrueDest*/ true, + /*UseBlockValue*/ false); else if (CurrU->getOperandNo() == 2) - CondVal = getValueFromCondition(V, SI->getCondition(), false); + CondVal = + *getValueFromCondition(V, SI->getCondition(), /*IsTrueDest*/ false, + /*UseBlockValue*/ false); } else if (auto *PHI = dyn_cast<PHINode>(CurrI)) { // TODO: Use non-local query? - CondVal = - getEdgeValueLocal(V, PHI->getIncomingBlock(*CurrU), PHI->getParent()); + CondVal = *getEdgeValueLocal(V, PHI->getIncomingBlock(*CurrU), + PHI->getParent(), /*UseBlockValue*/ false); } if (CondVal) VL = intersect(VL, *CondVal); diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp index 3f76dfdaac31..67246afa2314 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -862,6 +862,15 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost( return Cost; } +InstructionCost TargetTransformInfo::getAltInstrCost( + VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const { + InstructionCost Cost = + TTIImpl->getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + InstructionCost TargetTransformInfo::getShuffleCost( ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp index cac2602d455f..16d78c1ded6d 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp @@ -983,45 +983,11 @@ static void computeKnownBitsFromOperator(const Operator *I, break; } case Instruction::Select: { - const Value *LHS = nullptr, *RHS = nullptr; - SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; - if (SelectPatternResult::isMinOrMax(SPF)) { - computeKnownBits(RHS, Known, Depth + 1, Q); - computeKnownBits(LHS, Known2, Depth + 1, Q); - switch (SPF) { - default: - llvm_unreachable("Unhandled select pattern flavor!"); - case SPF_SMAX: - Known = KnownBits::smax(Known, Known2); - break; - case SPF_SMIN: - Known = KnownBits::smin(Known, Known2); - break; - case SPF_UMAX: - Known = KnownBits::umax(Known, Known2); - break; - case SPF_UMIN: - Known = KnownBits::umin(Known, Known2); - break; - } - break; - } - computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); // Only known if known in both the LHS and RHS. Known = Known.intersectWith(Known2); - - if (SPF == SPF_ABS) { - // RHS from matchSelectPattern returns the negation part of abs pattern. - // If the negate has an NSW flag we can assume the sign bit of the result - // will be 0 because that makes abs(INT_MIN) undefined. - if (match(RHS, m_Neg(m_Specific(LHS))) && - Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RHS))) - Known.Zero.setSignBit(); - } - break; } case Instruction::FPTrunc: diff --git a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp index f90fca9d937f..5b57f0a25cec 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp @@ -123,6 +123,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) { + assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!"); + switch (ID) { case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 8907f6fa4ff3..a027d0c21ba0 100644 --- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -4218,6 +4218,9 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord( // Check whether we have enough values to read a partition name. if (OpNum + 1 < Record.size()) { + // Check Strtab has enough values for the partition. + if (Record[OpNum] + Record[OpNum + 1] > Strtab.size()) + return error("Malformed partition, too large."); NewGA->setPartition( StringRef(Strtab.data() + Record[OpNum], Record[OpNum + 1])); OpNum += 2; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 91a64d59e154..8b15bdb0aca3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5940,62 +5940,6 @@ bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA( return false; } -bool CombinerHelper::matchSelectToLogical(MachineInstr &MI, - BuildFnTy &MatchInfo) { - GSelect &Sel = cast<GSelect>(MI); - Register DstReg = Sel.getReg(0); - Register Cond = Sel.getCondReg(); - Register TrueReg = Sel.getTrueReg(); - Register FalseReg = Sel.getFalseReg(); - - auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI); - auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI); - - const LLT CondTy = MRI.getType(Cond); - const LLT OpTy = MRI.getType(TrueReg); - if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1) - return false; - - // We have a boolean select. - - // select Cond, Cond, F --> or Cond, F - // select Cond, 1, F --> or Cond, F - auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI); - if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildOr(DstReg, Cond, FalseReg); - }; - return true; - } - - // select Cond, T, Cond --> and Cond, T - // select Cond, T, 0 --> and Cond, T - auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI); - if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildAnd(DstReg, Cond, TrueReg); - }; - return true; - } - - // select Cond, T, 1 --> or (not Cond), T - if (MaybeCstFalse && MaybeCstFalse->isOne()) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg); - }; - return true; - } - - // select Cond, 0, F --> and (not Cond), F - if (MaybeCstTrue && MaybeCstTrue->isZero()) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg); - }; - return true; - } - return false; -} - bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &IdxToPropagate) { bool PropagateNaN; @@ -6318,3 +6262,300 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) { MI.getOperand(2).setReg(LHSReg); Observer.changedInstr(MI); } + +bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) { + LLT SrcTy = MRI.getType(Src); + if (SrcTy.isFixedVector()) + return isConstantSplatVector(Src, 1, AllowUndefs); + if (SrcTy.isScalar()) { + if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr) + return true; + auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); + return IConstant && IConstant->Value == 1; + } + return false; // scalable vector +} + +bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) { + LLT SrcTy = MRI.getType(Src); + if (SrcTy.isFixedVector()) + return isConstantSplatVector(Src, 0, AllowUndefs); + if (SrcTy.isScalar()) { + if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr) + return true; + auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); + return IConstant && IConstant->Value == 0; + } + return false; // scalable vector +} + +// Ignores COPYs during conformance checks. +// FIXME scalable vectors. +bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue, + bool AllowUndefs) { + GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI); + if (!BuildVector) + return false; + unsigned NumSources = BuildVector->getNumSources(); + + for (unsigned I = 0; I < NumSources; ++I) { + GImplicitDef *ImplicitDef = + getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI); + if (ImplicitDef && AllowUndefs) + continue; + if (ImplicitDef && !AllowUndefs) + return false; + std::optional<ValueAndVReg> IConstant = + getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI); + if (IConstant && IConstant->Value == SplatValue) + continue; + return false; + } + return true; +} + +// Ignores COPYs during lookups. +// FIXME scalable vectors +std::optional<APInt> +CombinerHelper::getConstantOrConstantSplatVector(Register Src) { + auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); + if (IConstant) + return IConstant->Value; + + GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI); + if (!BuildVector) + return std::nullopt; + unsigned NumSources = BuildVector->getNumSources(); + + std::optional<APInt> Value = std::nullopt; + for (unsigned I = 0; I < NumSources; ++I) { + std::optional<ValueAndVReg> IConstant = + getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI); + if (!IConstant) + return std::nullopt; + if (!Value) + Value = IConstant->Value; + else if (*Value != IConstant->Value) + return std::nullopt; + } + return Value; +} + +// TODO: use knownbits to determine zeros +bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, + BuildFnTy &MatchInfo) { + uint32_t Flags = Select->getFlags(); + Register Dest = Select->getReg(0); + Register Cond = Select->getCondReg(); + Register True = Select->getTrueReg(); + Register False = Select->getFalseReg(); + LLT CondTy = MRI.getType(Select->getCondReg()); + LLT TrueTy = MRI.getType(Select->getTrueReg()); + + // We only do this combine for scalar boolean conditions. + if (CondTy != LLT::scalar(1)) + return false; + + // Both are scalars. + std::optional<ValueAndVReg> TrueOpt = + getIConstantVRegValWithLookThrough(True, MRI); + std::optional<ValueAndVReg> FalseOpt = + getIConstantVRegValWithLookThrough(False, MRI); + + if (!TrueOpt || !FalseOpt) + return false; + + APInt TrueValue = TrueOpt->Value; + APInt FalseValue = FalseOpt->Value; + + // select Cond, 1, 0 --> zext (Cond) + if (TrueValue.isOne() && FalseValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + B.buildZExtOrTrunc(Dest, Cond); + }; + return true; + } + + // select Cond, -1, 0 --> sext (Cond) + if (TrueValue.isAllOnes() && FalseValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + B.buildSExtOrTrunc(Dest, Cond); + }; + return true; + } + + // select Cond, 0, 1 --> zext (!Cond) + if (TrueValue.isZero() && FalseValue.isOne()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + B.buildZExtOrTrunc(Dest, Inner); + }; + return true; + } + + // select Cond, 0, -1 --> sext (!Cond) + if (TrueValue.isZero() && FalseValue.isAllOnes()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + B.buildSExtOrTrunc(Dest, Inner); + }; + return true; + } + + // select Cond, C1, C1-1 --> add (zext Cond), C1-1 + if (TrueValue - 1 == FalseValue) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Inner, Cond); + B.buildAdd(Dest, Inner, False); + }; + return true; + } + + // select Cond, C1, C1+1 --> add (sext Cond), C1+1 + if (TrueValue + 1 == FalseValue) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildSExtOrTrunc(Inner, Cond); + B.buildAdd(Dest, Inner, False); + }; + return true; + } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (TrueValue.isPowerOf2() && FalseValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Inner, Cond); + // The shift amount must be scalar. + LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy; + auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2()); + B.buildShl(Dest, Inner, ShAmtC, Flags); + }; + return true; + } + // select Cond, -1, C --> or (sext Cond), C + if (TrueValue.isAllOnes()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildSExtOrTrunc(Inner, Cond); + B.buildOr(Dest, Inner, False, Flags); + }; + return true; + } + + // select Cond, C, -1 --> or (sext (not Cond)), C + if (FalseValue.isAllOnes()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Not = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Not, Cond); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildSExtOrTrunc(Inner, Not); + B.buildOr(Dest, Inner, True, Flags); + }; + return true; + } + + return false; +} + +// TODO: use knownbits to determine zeros +bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, + BuildFnTy &MatchInfo) { + uint32_t Flags = Select->getFlags(); + Register DstReg = Select->getReg(0); + Register Cond = Select->getCondReg(); + Register True = Select->getTrueReg(); + Register False = Select->getFalseReg(); + LLT CondTy = MRI.getType(Select->getCondReg()); + LLT TrueTy = MRI.getType(Select->getTrueReg()); + + // Boolean or fixed vector of booleans. + if (CondTy.isScalableVector() || + (CondTy.isFixedVector() && + CondTy.getElementType().getScalarSizeInBits() != 1) || + CondTy.getScalarSizeInBits() != 1) + return false; + + if (CondTy != TrueTy) + return false; + + // select Cond, Cond, F --> or Cond, F + // select Cond, 1, F --> or Cond, F + if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Cond); + B.buildOr(DstReg, Ext, False, Flags); + }; + return true; + } + + // select Cond, T, Cond --> and Cond, T + // select Cond, T, 0 --> and Cond, T + if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Cond); + B.buildAnd(DstReg, Ext, True); + }; + return true; + } + + // select Cond, T, 1 --> or (not Cond), T + if (isOneOrOneSplat(False, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + // First the not. + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + // Then an ext to match the destination register. + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Inner); + B.buildOr(DstReg, Ext, True, Flags); + }; + return true; + } + + // select Cond, 0, F --> and (not Cond), F + if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + // First the not. + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + // Then an ext to match the destination register. + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Inner); + B.buildAnd(DstReg, Ext, False); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { + GSelect *Select = cast<GSelect>(&MI); + + if (tryFoldSelectOfConstants(Select, MatchInfo)) + return true; + + if (tryFoldBoolSelectToLogic(Select, MatchInfo)) + return true; + + return false; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp index a032b31a1fc7..51e944d0279f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -175,8 +175,46 @@ public: if (MachineInstr *MI = I->second.MI) { std::optional<DestSourcePair> CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr); - markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()}, - TRI); + + MCRegister Def = CopyOperands->Destination->getReg().asMCReg(); + MCRegister Src = CopyOperands->Source->getReg().asMCReg(); + + markRegsUnavailable(Def, TRI); + + // Since we clobber the destination of a copy, the semantic of Src's + // "DefRegs" to contain Def is no longer effectual. We will also need + // to remove the record from the copy maps that indicates Src defined + // Def. Failing to do so might cause the target to miss some + // opportunities to further eliminate redundant copy instructions. + // Consider the following sequence during the + // ForwardCopyPropagateBlock procedure: + // L1: r0 = COPY r9 <- TrackMI + // L2: r0 = COPY r8 <- TrackMI (Remove r9 defined r0 from tracker) + // L3: use r0 <- Remove L2 from MaybeDeadCopies + // L4: early-clobber r9 <- Clobber r9 (L2 is still valid in tracker) + // L5: r0 = COPY r8 <- Remove NopCopy + for (MCRegUnit SrcUnit : TRI.regunits(Src)) { + auto SrcCopy = Copies.find(SrcUnit); + if (SrcCopy != Copies.end() && SrcCopy->second.LastSeenUseInCopy) { + // If SrcCopy defines multiple values, we only need + // to erase the record for Def in DefRegs. + for (auto itr = SrcCopy->second.DefRegs.begin(); + itr != SrcCopy->second.DefRegs.end(); itr++) { + if (*itr == Def) { + SrcCopy->second.DefRegs.erase(itr); + // If DefReg becomes empty after removal, we can remove the + // SrcCopy from the tracker's copy maps. We only remove those + // entries solely record the Def is defined by Src. If an + // entry also contains the definition record of other Def' + // registers, it cannot be cleared. + if (SrcCopy->second.DefRegs.empty() && !SrcCopy->second.MI) { + Copies.erase(SrcCopy); + } + break; + } + } + } + } } // Now we can erase the copy. Copies.erase(I); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0d46c7868d87..eafa95ce7fcf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -546,6 +546,7 @@ namespace { SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFP_TO_BF16(SDNode *N); + SDValue visitBF16_TO_FP(SDNode *N); SDValue visitVECREDUCE(SDNode *N); SDValue visitVPOp(SDNode *N); SDValue visitGET_FPENV_MEM(SDNode *N); @@ -2047,6 +2048,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); case ISD::FP_TO_BF16: return visitFP_TO_BF16(N); + case ISD::BF16_TO_FP: return visitBF16_TO_FP(N); case ISD::FREEZE: return visitFREEZE(N); case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N); case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N); @@ -26256,14 +26258,17 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { } SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { + auto Op = N->getOpcode(); + assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) && + "opcode should be FP16_TO_FP or BF16_TO_FP."); SDValue N0 = N->getOperand(0); - // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) + // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or + // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op) if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) { ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); if (AndConst && AndConst->getAPIntValue() == 0xffff) { - return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), - N0.getOperand(0)); + return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0)); } } @@ -26280,6 +26285,11 @@ SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) { + // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op) + return visitFP16_TO_FP(N); +} + SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N0.getValueType(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a27febe15db8..34fa1f5a7ed1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -495,7 +495,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no // constraints on the %dst register, COPY can target all legal register // classes. - unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned SubIdx = Node->getConstantOperandVal(1); const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); @@ -611,7 +611,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. - unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned DstRCIdx = Node->getConstantOperandVal(1); const TargetRegisterClass *DstRC = TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); Register NewVReg = MRI->createVirtualRegister(DstRC); @@ -629,7 +629,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, void InstrEmitter::EmitRegSequence(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap, bool IsClone, bool IsCloned) { - unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + unsigned DstRCIdx = Node->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); @@ -1309,8 +1309,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - unsigned Flags = - cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned Flags = Node->getConstantOperandVal(i); const InlineAsm::Flag F(Flags); const unsigned NumVals = F.getNumOperandRegisters(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index f73ddfee2b90..e3acb58327a8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -492,8 +492,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, --NumOps; // Ignore the glue operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - unsigned Flags = - cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned Flags = Node->getConstantOperandVal(i); const InlineAsm::Flag F(Flags); unsigned NumVals = F.getNumOperandRegisters(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 47c137d2bcad..dcecb2e0e7fa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -331,7 +331,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, unsigned Opcode = Node->getMachineOpcode(); if (Opcode == TargetOpcode::REG_SEQUENCE) { - unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + unsigned DstRCIdx = Node->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); RegClass = RC->getID(); Cost = RegSequenceCost; @@ -1369,8 +1369,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { --NumOps; // Ignore the glue operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - unsigned Flags = - cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned Flags = Node->getConstantOperandVal(i); const InlineAsm::Flag F(Flags); unsigned NumVals = F.getNumOperandRegisters(); @@ -2298,8 +2297,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { continue; } if (POpc == TargetOpcode::REG_SEQUENCE) { - unsigned DstRCIdx = - cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue(); + unsigned DstRCIdx = PN->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); unsigned RCId = RC->getID(); // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 81facf92e55a..0e17bba2398e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5470,7 +5470,7 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, Ops[i].getOperand(0).getValueType() != VT || (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) || !isa<ConstantSDNode>(Ops[i].getOperand(1)) || - cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) { + Ops[i].getConstantOperandAPInt(1) != i) { IsIdentity = false; break; } @@ -7408,7 +7408,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { Src.getOperand(0).getOpcode() == ISD::GlobalAddress && Src.getOperand(1).getOpcode() == ISD::Constant) { G = cast<GlobalAddressSDNode>(Src.getOperand(0)); - SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue(); + SrcDelta = Src.getConstantOperandVal(1); } if (!G) return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3dc6e4bbcf46..f28211ac113c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -4181,8 +4181,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { Msg << "\nIn function: " << MF->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; - unsigned iid = - cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); + unsigned iid = N->getConstantOperandVal(HasInputChain); if (iid < Intrinsic::num_intrinsics) Msg << "intrinsic %" << Intrinsic::getBaseName((Intrinsic::ID)iid); else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp index f65ec27ff875..5a058bd712a3 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp @@ -105,8 +105,7 @@ llvm::orc::createDWARFContext(LinkGraph &G) { auto SecData = getSectionData(Sec); auto Name = Sec.getName(); // DWARFContext expects the section name to not start with a dot - if (Name.starts_with(".")) - Name = Name.drop_front(); + Name.consume_front("."); LLVM_DEBUG(dbgs() << "Creating DWARFContext section " << Name << " with size " << SecData.size() << "\n"); DWARFSectionData[Name] = diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index a19e17029810..e259c393d07e 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -768,11 +768,11 @@ Error LLJITBuilderState::prepareForConstruction() { // create a default one. if (!SetupProcessSymbolsJITDylib && LinkProcessSymbolsByDefault) { LLVM_DEBUG(dbgs() << "Creating default Process JD setup function\n"); - SetupProcessSymbolsJITDylib = [this](LLJIT &J) -> Expected<JITDylibSP> { + SetupProcessSymbolsJITDylib = [](LLJIT &J) -> Expected<JITDylibSP> { auto &JD = J.getExecutionSession().createBareJITDylib("<Process Symbols>"); - auto G = orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( - DL->getGlobalPrefix()); + auto G = EPCDynamicLibrarySearchGenerator::GetForTargetProcess( + J.getExecutionSession()); if (!G) return G.takeError(); JD.addGenerator(std::move(*G)); diff --git a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index ce428f78dc84..f6cf358119fb 100644 --- a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -6026,6 +6026,17 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly) { + AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO); + return createAtomicCompare(Loc, X, V, R, E, D, AO, Op, IsXBinopExpr, + IsPostfixUpdate, IsFailOnly, Failure); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( + const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, + AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, + omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, + bool IsFailOnly, AtomicOrdering Failure) { + if (!updateToLocation(Loc)) return Loc.IP; @@ -6040,7 +6051,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( bool IsInteger = E->getType()->isIntegerTy(); if (Op == OMPAtomicCompareOp::EQ) { - AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO); AtomicCmpXchgInst *Result = nullptr; if (!IsInteger) { IntegerType *IntCastTy = diff --git a/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp b/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp index c64e9c04e199..58e4b74f4b22 100644 --- a/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp +++ b/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp @@ -86,13 +86,12 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) { Args.push_back("-passes=gvn"); } else if (Opt == "sccp") { Args.push_back("-passes=sccp"); - } else if (Opt == "loop_predication") { Args.push_back("-passes=loop-predication"); } else if (Opt == "guard_widening") { Args.push_back("-passes=guard-widening"); } else if (Opt == "loop_rotate") { - Args.push_back("-passes=loop(rotate)"); + Args.push_back("-passes=loop-rotate"); } else if (Opt == "loop_unswitch") { Args.push_back("-passes=loop(simple-loop-unswitch)"); } else if (Opt == "loop_unroll") { @@ -107,7 +106,18 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) { Args.push_back("-passes=loop-reduce"); } else if (Opt == "irce") { Args.push_back("-passes=irce"); - + } else if (Opt == "dse") { + Args.push_back("-passes=dse"); + } else if (Opt == "loop_idiom") { + Args.push_back("-passes=loop-idiom"); + } else if (Opt == "reassociate") { + Args.push_back("-passes=reassociate"); + } else if (Opt == "lower_matrix_intrinsics") { + Args.push_back("-passes=lower-matrix-intrinsics"); + } else if (Opt == "memcpyopt") { + Args.push_back("-passes=memcpyopt"); + } else if (Opt == "sroa") { + Args.push_back("-passes=sroa"); } else if (Triple(Opt).getArch()) { Args.push_back("-mtriple=" + Opt.str()); } else { diff --git a/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp index eab05eed428e..c6dc42e8ac88 100644 --- a/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp +++ b/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp @@ -2115,6 +2115,10 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) { if (F.hasFnAttribute(Attribute::OptimizeNone)) return /*Changed*/ false; + // FIXME: https://github.com/llvm/llvm-project/issues/76545 + if (F.hasFnAttribute(Attribute::SanitizeHWAddress)) + return /*Changed*/ false; + bool Changed = false; auto *DL = &F.getParent()->getDataLayout(); // Collect a map of {backing storage : dbg.declares} (currently "backing diff --git a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp index fd48d5080ff6..e43f111113b4 100644 --- a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp @@ -1526,8 +1526,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, StringRef Name = SectionName; // For user-defined custom sections, strip the prefix - if (Name.starts_with(".custom_section.")) - Name = Name.substr(strlen(".custom_section.")); + Name.consume_front(".custom_section."); MCSymbol *Begin = Sec.getBeginSymbol(); if (Begin) { diff --git a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp index dfe86a45df32..ccc29d0cb73d 100644 --- a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp +++ b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp @@ -1484,6 +1484,11 @@ Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) { } uint32_t BodySize = FunctionEnd - Ctx.Ptr; + // Ensure that Function is within Ctx's buffer. + if (Ctx.Ptr + BodySize > Ctx.End) { + return make_error<GenericBinaryError>("Function extends beyond buffer", + object_error::parse_failed); + } Function.Body = ArrayRef<uint8_t>(Ctx.Ptr, BodySize); // This will be set later when reading in the linking metadata section. Function.Comdat = UINT32_MAX; @@ -1662,10 +1667,18 @@ Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const { Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { auto &Sym = getWasmSymbol(Symb); if (Sym.Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION && - isDefinedFunctionIndex(Sym.Info.ElementIndex)) - return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset; - else - return getSymbolValue(Symb); + isDefinedFunctionIndex(Sym.Info.ElementIndex)) { + // For object files, use the section offset. The linker relies on this. + // For linked files, use the file offset. This behavior matches the way + // browsers print stack traces and is useful for binary size analysis. + // (see https://webassembly.github.io/spec/web-api/index.html#conventions) + uint32_t Adjustment = isRelocatableObject() || isSharedObject() + ? 0 + : Sections[CodeSection].Offset; + return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset + + Adjustment; + } + return getSymbolValue(Symb); } uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const { diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp index 8f62df79d5b7..b547cf7181b1 100644 --- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp @@ -539,7 +539,7 @@ Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { const IntPtrT FPtr = swap(I->FunctionPointer); if (!FPtr) continue; - Symtab.mapAddress(FPtr, I->NameRef); + Symtab.mapAddress(FPtr, swap(I->NameRef)); } return success(); } diff --git a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp index 7256e9a29329..a9b7e209915a 100644 --- a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp @@ -75,7 +75,6 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"xcvmac", RISCVExtensionVersion{1, 0}}, {"xcvmem", RISCVExtensionVersion{1, 0}}, {"xcvsimd", RISCVExtensionVersion{1, 0}}, - {"xsfcie", RISCVExtensionVersion{1, 0}}, {"xsfvcp", RISCVExtensionVersion{1, 0}}, {"xsfvfnrclipxfqf", RISCVExtensionVersion{1, 0}}, {"xsfvfwmaccqqq", RISCVExtensionVersion{1, 0}}, @@ -191,11 +190,17 @@ static const RISCVSupportedExtension SupportedExtensions[] = { static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { {"zacas", RISCVExtensionVersion{1, 0}}, + {"zcmop", RISCVExtensionVersion{0, 2}}, + {"zfbfmin", RISCVExtensionVersion{0, 8}}, {"zicfilp", RISCVExtensionVersion{0, 4}}, + {"zicfiss", RISCVExtensionVersion{0, 4}}, + {"zicond", RISCVExtensionVersion{1, 0}}, + {"zimop", RISCVExtensionVersion{0, 1}}, + {"ztso", RISCVExtensionVersion{0, 1}}, {"zvfbfmin", RISCVExtensionVersion{0, 8}}, @@ -1006,6 +1011,7 @@ static const char *ImpliedExtsZcb[] = {"zca"}; static const char *ImpliedExtsZcd[] = {"d", "zca"}; static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"}; static const char *ImpliedExtsZcf[] = {"f", "zca"}; +static const char *ImpliedExtsZcmop[] = {"zca"}; static const char *ImpliedExtsZcmp[] = {"zca"}; static const char *ImpliedExtsZcmt[] = {"zca", "zicsr"}; static const char *ImpliedExtsZdinx[] = {"zfinx"}; @@ -1017,6 +1023,7 @@ static const char *ImpliedExtsZfinx[] = {"zicsr"}; static const char *ImpliedExtsZhinx[] = {"zhinxmin"}; static const char *ImpliedExtsZhinxmin[] = {"zfinx"}; static const char *ImpliedExtsZicntr[] = {"zicsr"}; +static const char *ImpliedExtsZicfiss[] = {"zicsr", "zimop"}; static const char *ImpliedExtsZihpm[] = {"zicsr"}; static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"}; static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx", @@ -1078,6 +1085,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"zcd"}, {ImpliedExtsZcd}}, {{"zce"}, {ImpliedExtsZce}}, {{"zcf"}, {ImpliedExtsZcf}}, + {{"zcmop"}, {ImpliedExtsZcmop}}, {{"zcmp"}, {ImpliedExtsZcmp}}, {{"zcmt"}, {ImpliedExtsZcmt}}, {{"zdinx"}, {ImpliedExtsZdinx}}, @@ -1088,6 +1096,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"zfinx"}, {ImpliedExtsZfinx}}, {{"zhinx"}, {ImpliedExtsZhinx}}, {{"zhinxmin"}, {ImpliedExtsZhinxmin}}, + {{"zicfiss"}, {ImpliedExtsZicfiss}}, {{"zicntr"}, {ImpliedExtsZicntr}}, {{"zihpm"}, {ImpliedExtsZihpm}}, {{"zk"}, {ImpliedExtsZk}}, diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc index 168a63bb2d96..2bf68b7972e7 100644 --- a/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc +++ b/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc @@ -154,7 +154,10 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr) { return ""; llvm::sys::path::make_preferred(PathNameUTF8); - return std::string(PathNameUTF8.data()); + + SmallString<256> RealPath; + sys::fs::real_path(PathNameUTF8, RealPath); + return std::string(RealPath); } UniqueID file_status::getUniqueID() const { diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 463ec41b94e9..476d99c2a7e0 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1950,7 +1950,7 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, unsigned Op) { unsigned TileNum = 0; if (BaseReg != AArch64::ZA) - TileNum = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + TileNum = N->getConstantOperandVal(2); if (!SelectSMETile(BaseReg, TileNum)) return; @@ -2145,8 +2145,7 @@ void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, const EVT ResTys[] = {MVT::Untyped, MVT::Other}; - unsigned LaneNo = - cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); + unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), N->getOperand(NumVecs + 3), N->getOperand(0)}; @@ -2185,8 +2184,7 @@ void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, const EVT ResTys[] = {MVT::i64, // Type of the write back register RegSeq->getValueType(0), MVT::Other}; - unsigned LaneNo = - cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); + unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, @@ -2237,8 +2235,7 @@ void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - unsigned LaneNo = - cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); + unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), N->getOperand(NumVecs + 3), N->getOperand(0)}; @@ -2269,8 +2266,7 @@ void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, const EVT ResTys[] = {MVT::i64, // Type of the write back register MVT::Other}; - unsigned LaneNo = - cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); + unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), N->getOperand(NumVecs + 2), // Base Register @@ -2576,8 +2572,8 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, case AArch64::UBFMXri: Opc = NOpc; Opd0 = N->getOperand(0); - Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); - Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); + Immr = N->getConstantOperandVal(1); + Imms = N->getConstantOperandVal(2); return true; } // Unreachable @@ -3877,7 +3873,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { assert(isa<ConstantSDNode>(N->getOperand(2)) && "Expected a constant integer expression."); unsigned Reg = PMapper->Encoding; - uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + uint64_t Immed = N->getConstantOperandVal(2); CurDAG->SelectNodeTo( N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0)); @@ -4173,8 +4169,7 @@ bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { SDValue IRG_SP = N->getOperand(2); if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || - cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != - Intrinsic::aarch64_irg_sp) { + IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) { return false; } @@ -4183,7 +4178,7 @@ bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); SDValue FiOp = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); - int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + int TagOffset = N->getConstantOperandVal(3); SDNode *Out = CurDAG->getMachineNode( AArch64::TAGPstack, DL, MVT::i64, @@ -4203,7 +4198,7 @@ void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { // General case for unrelated pointers in Op1 and Op2. SDLoc DL(N); - int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + int TagOffset = N->getConstantOperandVal(3); SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, {N->getOperand(1), N->getOperand(2)}); SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, @@ -4219,7 +4214,7 @@ bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) { assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!"); // Bail when not a "cast" like insert_subvector. - if (cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() != 0) + if (N->getConstantOperandVal(2) != 0) return false; if (!N->getOperand(0).isUndef()) return false; @@ -4250,7 +4245,7 @@ bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) { assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!"); // Bail when not a "cast" like extract_subvector. - if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 0) + if (N->getConstantOperandVal(1) != 0) return false; // Bail when normal isel can do the job. @@ -4422,7 +4417,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { return; } case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(1); switch (IntNo) { default: break; @@ -5179,7 +5174,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } } break; case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { default: break; @@ -5782,7 +5777,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { break; } case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(1); if (Node->getNumOperands() >= 3) VT = Node->getOperand(2)->getValueType(0); switch (IntNo) { @@ -6806,7 +6801,7 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN) return EVT(); - switch (cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue()) { + switch (Root->getConstantOperandVal(1)) { default: return EVT(); case Intrinsic::aarch64_sme_ldr: diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index dffe69bdb900..102fd0c3dae2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2196,7 +2196,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( } case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); switch (IntNo) { default: break; @@ -3922,9 +3922,9 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // 4: bool isDataCache static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); - unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); - unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + unsigned IsWrite = Op.getConstantOperandVal(2); + unsigned Locality = Op.getConstantOperandVal(3); + unsigned IsData = Op.getConstantOperandVal(4); bool IsStream = !Locality; // When the locality number is set @@ -4973,10 +4973,10 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(2); - unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - unsigned Locality = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); - unsigned IsStream = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); - unsigned IsData = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue(); + unsigned IsWrite = Op.getConstantOperandVal(3); + unsigned Locality = Op.getConstantOperandVal(4); + unsigned IsStream = Op.getConstantOperandVal(5); + unsigned IsData = Op.getConstantOperandVal(6); unsigned PrfOp = (IsWrite << 4) | // Load/Store bit (!IsData << 3) | // IsDataCache bit (Locality << 1) | // Cache level bits @@ -5039,7 +5039,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. @@ -5218,8 +5218,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::aarch64_sve_ptrue: - return getPTrue(DAG, dl, Op.getValueType(), - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); + return getPTrue(DAG, dl, Op.getValueType(), Op.getConstantOperandVal(1)); case Intrinsic::aarch64_sve_clz: return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); @@ -6478,7 +6477,7 @@ static unsigned getIntrinsicID(const SDNode *N) { default: return Intrinsic::not_intrinsic; case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); if (IID < Intrinsic::num_intrinsics) return IID; return Intrinsic::not_intrinsic; @@ -10009,7 +10008,7 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64); while (Depth--) @@ -10076,7 +10075,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDValue ReturnAddress; if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); @@ -10942,7 +10941,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); // Update the minimum and maximum lane number seen. - unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); + unsigned EltNo = V.getConstantOperandVal(1); Source->MinElt = std::min(Source->MinElt, EltNo); Source->MaxElt = std::max(Source->MaxElt, EltNo); } @@ -13329,7 +13328,7 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, "Only cases that extract a fixed length vector are supported!"); EVT InVT = Op.getOperand(0).getValueType(); - unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Idx = Op.getConstantOperandVal(1); unsigned Size = Op.getValueSizeInBits(); // If we don't have legal types yet, do nothing @@ -13375,7 +13374,7 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, "Only expect to lower inserts into scalable vectors!"); EVT InVT = Op.getOperand(1).getValueType(); - unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Idx = Op.getConstantOperandVal(2); SDValue Vec0 = Op.getOperand(0); SDValue Vec1 = Op.getOperand(1); @@ -13715,11 +13714,10 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs); - bool IsSplatUniform = - SrcVT.getVectorElementType().getSizeInBits() >= SplatBitSize; - bool IsZero = IsCnst && SplatValue == 0 && IsSplatUniform; - bool IsOne = IsCnst && SplatValue == 1 && IsSplatUniform; - bool IsMinusOne = IsCnst && SplatValue.isAllOnes() && IsSplatUniform; + bool IsZero = IsCnst && SplatValue == 0; + bool IsOne = + IsCnst && SrcVT.getScalarSizeInBits() == SplatBitSize && SplatValue == 1; + bool IsMinusOne = IsCnst && SplatValue.isAllOnes(); if (SrcVT.getVectorElementType().isFloatingPoint()) { switch (CC) { @@ -14247,7 +14245,7 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, assert(VT != MVT::i64 && "Expected illegal VSCALE node"); SDLoc DL(Op); - APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue(); + APInt MulImm = Op.getConstantOperandAPInt(0); return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sext(64)), DL, VT); } @@ -18343,7 +18341,7 @@ static bool isEssentiallyExtractHighSubvector(SDValue N) { return false; if (N.getOperand(0).getValueType().isScalableVector()) return false; - return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() == + return N.getConstantOperandAPInt(1) == N.getOperand(0).getValueType().getVectorNumElements() / 2; } @@ -18399,8 +18397,8 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) { // TODO: we want the operands of the Cmp not the csel SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3); SetCCInfo.IsAArch64 = true; - SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>( - cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + SetCCInfo.Info.AArch64.CC = + static_cast<AArch64CC::CondCode>(Op.getConstantOperandVal(2)); // Check that the operands matches the constraints: // (1) Both operands must be constants. @@ -21585,7 +21583,7 @@ static SDValue performNEONPostLDSTCombine(SDNode *N, bool IsDupOp = false; unsigned NewOpc = 0; unsigned NumVecs = 0; - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default: llvm_unreachable("unexpected intrinsic for Neon base update"); case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post; @@ -22501,7 +22499,7 @@ static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { - unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned Bit = N->getConstantOperandVal(2); bool Invert = false; SDValue TestSrc = N->getOperand(1); SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG); @@ -23789,7 +23787,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performMULLCombine(N, DCI, DAG); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { case Intrinsic::aarch64_sve_prfb_gather_scalar_offset: return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/); case Intrinsic::aarch64_sve_prfh_gather_scalar_offset: @@ -23940,8 +23938,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED); case Intrinsic::aarch64_rndr: case Intrinsic::aarch64_rndrrs: { - unsigned IntrinsicID = - cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntrinsicID = N->getConstantOperandVal(1); auto Register = (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR : AArch64SysReg::RNDRRS); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index e3220d103ae0..a21b4b77166e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -896,7 +896,7 @@ static DecodeStatus DecodePCRelLabel16(MCInst &Inst, unsigned Imm, // Immediate is encoded as the top 16-bits of an unsigned 18-bit negative // PC-relative offset. uint64_t ImmVal = Imm; - if (ImmVal < 0 || ImmVal > (1 << 16)) + if (ImmVal > (1 << 16)) return Fail; ImmVal = -ImmVal; if (!Decoder->tryAddingSymbolicOperand(Inst, (ImmVal << 2), Addr, diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 8b909f53c844..1d0e8be80d07 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -623,6 +623,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor({s32, s64}) .legalFor(PackedVectorAllTypeList) .maxScalar(0, s64) + .clampNumElements(0, v8s8, v16s8) + .clampNumElements(0, v4s16, v8s16) + .clampNumElements(0, v2s32, v4s32) + .clampMaxNumElements(0, s64, 2) .lower(); // FP conversions @@ -1406,7 +1410,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::aarch64_neon_umax: case Intrinsic::aarch64_neon_umin: case Intrinsic::aarch64_neon_fmax: - case Intrinsic::aarch64_neon_fmin: { + case Intrinsic::aarch64_neon_fmin: + case Intrinsic::aarch64_neon_fmaxnm: + case Intrinsic::aarch64_neon_fminnm: { MachineIRBuilder MIB(MI); if (IntrinsicID == Intrinsic::aarch64_neon_smax) MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); @@ -1422,6 +1428,12 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, else if (IntrinsicID == Intrinsic::aarch64_neon_fmin) MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)}, {MI.getOperand(2), MI.getOperand(3)}); + else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm) + MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)}, + {MI.getOperand(2), MI.getOperand(3)}); + else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm) + MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)}, + {MI.getOperand(2), MI.getOperand(3)}); MI.eraseFromParent(); return true; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b0eac567ec9f..bffea82ab8f4 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -377,7 +377,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, return Subtarget->getRegisterInfo()->getRegClass(RegClass); } case AMDGPU::REG_SEQUENCE: { - unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned RCID = N->getConstantOperandVal(0); const TargetRegisterClass *SuperRC = Subtarget->getRegisterInfo()->getRegClass(RCID); @@ -724,7 +724,7 @@ bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const { assert(N->getOpcode() == ISD::AND); - const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + const APInt &RHS = N->getConstantOperandAPInt(1); if (RHS.countr_one() >= ShAmtBits) return true; @@ -2672,7 +2672,7 @@ void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) { } void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { - unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntrID = N->getConstantOperandVal(1); switch (IntrID) { case Intrinsic::amdgcn_ds_append: case Intrinsic::amdgcn_ds_consume: { @@ -2690,7 +2690,7 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { } void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { - unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntrID = N->getConstantOperandVal(0); unsigned Opcode; switch (IntrID) { case Intrinsic::amdgcn_wqm: @@ -2731,7 +2731,7 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { } void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { - unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntrID = N->getConstantOperandVal(1); switch (IntrID) { case Intrinsic::amdgcn_ds_gws_init: case Intrinsic::amdgcn_ds_gws_barrier: diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 541a5b62450d..8fbc90a6db9f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -682,7 +682,7 @@ static bool hasSourceMods(const SDNode *N) { case ISD::BITCAST: return false; case ISD::INTRINSIC_WO_CHAIN: { - switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) { + switch (N->getConstantOperandVal(0)) { case Intrinsic::amdgcn_interp_p1: case Intrinsic::amdgcn_interp_p2: case Intrinsic::amdgcn_interp_mov: @@ -837,7 +837,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const { case ISD::TokenFactor: return true; case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntrID = N->getConstantOperandVal(0); switch (IntrID) { case Intrinsic::amdgcn_readfirstlane: case Intrinsic::amdgcn_readlane: @@ -1489,7 +1489,7 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SmallVector<SDValue, 8> Args; - unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Start = Op.getConstantOperandVal(1); EVT VT = Op.getValueType(); EVT SrcVT = Op.getOperand(0).getValueType(); @@ -2502,8 +2502,7 @@ static bool valueIsKnownNeverF32Denorm(SDValue Src) { case ISD::FFREXP: return true; case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID = - cast<ConstantSDNode>(Src.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Src.getConstantOperandVal(0); switch (IntrinsicID) { case Intrinsic::amdgcn_frexp_mant: return true; @@ -3601,7 +3600,7 @@ static SDValue simplifyMul24(SDNode *Node24, SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1); unsigned NewOpcode = Node24->getOpcode(); if (IsIntrin) { - unsigned IID = cast<ConstantSDNode>(Node24->getOperand(0))->getZExtValue(); + unsigned IID = Node24->getConstantOperandVal(0); switch (IID) { case Intrinsic::amdgcn_mul_i24: NewOpcode = AMDGPUISD::MUL_I24; @@ -3821,7 +3820,7 @@ SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine( SDNode *N, DAGCombinerInfo &DCI) const { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); switch (IID) { case Intrinsic::amdgcn_mul_i24: case Intrinsic::amdgcn_mul_u24: @@ -5652,7 +5651,7 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( break; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IID = Op.getConstantOperandVal(0); switch (IID) { case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::amdgcn_workitem_id_y: @@ -5834,8 +5833,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, return SNaN; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID - = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(0); // TODO: Handle more intrinsics switch (IntrinsicID) { case Intrinsic::amdgcn_cubeid: diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index eaf72d7157ee..36e07d944c94 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -642,6 +642,7 @@ defm int_amdgcn_global_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_csub : noret_op; defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op; defm int_amdgcn_ds_fadd_v2bf16 : noret_op; +defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op; defm int_amdgcn_flat_atomic_fmin_num : noret_op; defm int_amdgcn_flat_atomic_fmax_num : noret_op; defm int_amdgcn_global_atomic_fmin_num : noret_op; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index c9412f720c62..fba060464a6e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4690,6 +4690,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_global_atomic_fadd_v2bf16: case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: + case Intrinsic::amdgcn_global_atomic_ordered_add_b64: return getDefaultMappingAllVGPR(MI); case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td index e83e644d13f3..2d8dc9d47225 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -11,7 +11,7 @@ def SGPRRegBank : RegisterBank<"SGPR", >; def VGPRRegBank : RegisterBank<"VGPR", - [VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024] + [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024] >; // It is helpful to distinguish conditions from ordinary SGPRs. diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index db5d2bbcf5bb..fc47b02c98e0 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -346,8 +346,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage( IsSGPR = true; Width = 1; } else if (AMDGPU::VGPR_32RegClass.contains(Reg) || - AMDGPU::VGPR_LO16RegClass.contains(Reg) || - AMDGPU::VGPR_HI16RegClass.contains(Reg)) { + AMDGPU::VGPR_16RegClass.contains(Reg)) { IsSGPR = false; Width = 1; } else if (AMDGPU::AGPR_32RegClass.contains(Reg) || diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp index 459400e3359c..79e9312034da 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp @@ -85,7 +85,6 @@ public: AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<UniformityInfoWrapperPass>(); AU.setPreservesCFG(); } }; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index beb670669581..4cc8871a00fe 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -243,6 +243,7 @@ def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>; def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>; def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>; def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>; +def : SourceOfDivergence<int_amdgcn_global_atomic_ordered_add_b64>; def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd>; def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>; def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 9bc3ba161c9e..1bfb7c0edd80 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -109,9 +109,6 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const { // FIXME: preserve PostDominatorTreeWrapperPass } - // No divergent values are changed, only blocks and branch edges. - AU.addPreserved<UniformityInfoWrapperPass>(); - // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 3b69a37728ea..abd7e911beef 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5416,11 +5416,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val, ValRange); } else if (ID == ".amdhsa_shared_vgpr_count") { - if (IVersion.Major < 10) - return Error(IDRange.Start, "directive requires gfx10+", IDRange); + if (IVersion.Major < 10 || IVersion.Major >= 12) + return Error(IDRange.Start, "directive requires gfx10 or gfx11", + IDRange); SharedVGPRCount = Val; PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, - COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, + COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { PARSE_BITS_ENTRY( @@ -5522,7 +5523,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { (AccumOffset / 4 - 1)); } - if (IVersion.Major >= 10) { + if (IVersion.Major >= 10 && IVersion.Major < 12) { // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { return TokError("shared_vgpr_count directive not valid on " diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 7939d0036568..67be7b0fd642 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1284,9 +1284,8 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx, bool IsHi) const { - unsigned RCID = - IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID; - return createRegOperand(RCID, RegIdx); + unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0); + return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16); } // Decode Literals for insts which always have a literal in the encoding @@ -2000,34 +1999,60 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1) return MCDisassembler::Fail; } else if (isGFX10Plus()) { - if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) { - PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count", - COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); + // Bits [0-3]. + if (!isGFX12Plus()) { + if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) { + PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count", + COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); + } else { + PRINT_PSEUDO_DIRECTIVE_COMMENT( + "SHARED_VGPR_COUNT", + COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); + } } else { - PRINT_PSEUDO_DIRECTIVE_COMMENT( - "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0) + return MCDisassembler::Fail; } - if (isGFX11Plus()) { + // Bits [4-11]. + if (isGFX11()) { PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE", - COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE); + COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE); PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START", - COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START); + COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START); PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END", - COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END); + COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END); + } else if (isGFX12Plus()) { + PRINT_PSEUDO_DIRECTIVE_COMMENT( + "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE); + } else { + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED1) + return MCDisassembler::Fail; + } + + // Bits [12]. + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2) + return MCDisassembler::Fail; + + // Bits [13]. + if (isGFX12Plus()) { + PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN", + COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN); } else { - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0) + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3) return MCDisassembler::Fail; } - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1) + // Bits [14-30]. + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4) return MCDisassembler::Fail; + // Bits [31]. if (isGFX11Plus()) { PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP", - COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START); + COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP); } else { - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2) + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED5) return MCDisassembler::Fail; } } else if (FourByteBuffer) { diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td index 0dd2b3f5c2c9..615f8cd54d8f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -926,9 +926,11 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_usho defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; -} // End is_flat_global = 1 - +let SubtargetPredicate = isGFX12Plus in { + defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; +} // End SubtargetPredicate = isGFX12Plus +} // End is_flat_global = 1 let SubtargetPredicate = HasFlatScratchInsts in { defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; @@ -1529,6 +1531,10 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64> defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; +let OtherPredicates = [isGFX12Plus] in { + defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>; +} + let OtherPredicates = [isGFX10Plus] in { defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; @@ -2654,6 +2660,7 @@ defm GLOBAL_ATOMIC_DEC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04d, "GLOBAL_A defm GLOBAL_ATOMIC_MIN_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_num_f32", true, "global_atomic_min_f32">; defm GLOBAL_ATOMIC_MAX_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_num_f32", true, "global_atomic_max_f32">; defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; +defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073, "GLOBAL_ATOMIC_ORDERED_ADD_B64", "global_atomic_ordered_add_b64">; // ENC_VSCRATCH. defm SCRATCH_LOAD_U8 : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index a855cf585205..e135a4e25dd1 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -475,8 +475,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS); + } + if (IVersion.Major >= 10 && IVersion.Major < 12) { PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3, - amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); + amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); } if (IVersion.Major >= 12) PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1, diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index c1ba9c514874..9a2fb0bc37b2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -424,8 +424,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const return lowerADDRSPACECAST(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); - unsigned IntrinsicID = - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(1); switch (IntrinsicID) { case Intrinsic::r600_store_swizzle: { SDLoc DL(Op); @@ -449,8 +448,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const break; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID = - cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(0); EVT VT = Op.getValueType(); SDLoc DL(Op); switch (IntrinsicID) { diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index fc119aa61d01..0e857e6ac71b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1240,6 +1240,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_fmax_num: + case Intrinsic::amdgcn_global_atomic_ordered_add_b64: case Intrinsic::amdgcn_flat_atomic_fadd: case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmax: @@ -5304,7 +5305,7 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op, assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 || VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 || - VT == MVT::v32f32); + VT == MVT::v32f32 || VT == MVT::v32f16 || VT == MVT::v32i16); SDValue Lo0, Hi0; SDValue Op0 = Op.getOperand(0); @@ -5388,7 +5389,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return SDValue(); // Get the rounding mode from the last operand - int RoundMode = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + int RoundMode = Op.getConstantOperandVal(1); if (RoundMode == (int)RoundingMode::TowardPositive) Opc = AMDGPUISD::FPTRUNC_ROUND_UPWARD; else if (RoundMode == (int)RoundingMode::TowardNegative) @@ -5698,7 +5699,7 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); switch (IID) { case Intrinsic::amdgcn_make_buffer_rsrc: Results.push_back(lowerPointerAsRsrcIntrin(N, DAG)); @@ -5836,7 +5837,7 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) { unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const { if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) { - switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) { + switch (Intr->getConstantOperandVal(1)) { case Intrinsic::amdgcn_if: return AMDGPUISD::IF; case Intrinsic::amdgcn_else: @@ -5985,7 +5986,7 @@ SDValue SITargetLowering::LowerRETURNADDR(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); // Checking the depth - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) + if (Op.getConstantOperandVal(0) != 0) return DAG.getConstant(0, DL, VT); MachineFunction &MF = DAG.getMachineFunction(); @@ -7634,7 +7635,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(0); // TODO: Should this propagate fast-math-flags? @@ -7788,7 +7789,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(), SDLoc(Op), MVT::i32); case Intrinsic::amdgcn_s_buffer_load: { - unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned CPol = Op.getConstantOperandVal(3); if (CPol & ~((Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12) ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12)) @@ -8038,7 +8039,7 @@ SITargetLowering::lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { - unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned IntrID = Op.getConstantOperandVal(1); SDLoc DL(Op); switch (IntrID) { @@ -8134,8 +8135,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, } case Intrinsic::amdgcn_buffer_load: case Intrinsic::amdgcn_buffer_load_format: { - unsigned Glc = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue(); + unsigned Glc = Op.getConstantOperandVal(5); + unsigned Slc = Op.getConstantOperandVal(6); unsigned IdxEn = getIdxEn(Op.getOperand(3)); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -8223,10 +8224,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, EVT LoadVT = Op.getValueType(); auto SOffset = selectSOffset(Op.getOperand(5), DAG, Subtarget); - unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue(); - unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue(); - unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue(); - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue(); + unsigned Dfmt = Op.getConstantOperandVal(7); + unsigned Nfmt = Op.getConstantOperandVal(8); + unsigned Glc = Op.getConstantOperandVal(9); + unsigned Slc = Op.getConstantOperandVal(10); unsigned IdxEn = getIdxEn(Op.getOperand(3)); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -8313,7 +8314,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_buffer_atomic_or: case Intrinsic::amdgcn_buffer_atomic_xor: case Intrinsic::amdgcn_buffer_atomic_fadd: { - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue(); + unsigned Slc = Op.getConstantOperandVal(6); unsigned IdxEn = getIdxEn(Op.getOperand(4)); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -8474,7 +8475,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC); case Intrinsic::amdgcn_buffer_atomic_cmpswap: { - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue(); + unsigned Slc = Op.getConstantOperandVal(7); unsigned IdxEn = getIdxEn(Op.getOperand(5)); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -8878,7 +8879,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Chain = Op.getOperand(0); - unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(1); MachineFunction &MF = DAG.getMachineFunction(); switch (IntrinsicID) { @@ -8943,10 +8944,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); if (IsD16) VData = handleD16VData(VData, DAG); - unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue(); - unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue(); - unsigned Glc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue(); - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(11))->getZExtValue(); + unsigned Dfmt = Op.getConstantOperandVal(8); + unsigned Nfmt = Op.getConstantOperandVal(9); + unsigned Glc = Op.getConstantOperandVal(10); + unsigned Slc = Op.getConstantOperandVal(11); unsigned IdxEn = getIdxEn(Op.getOperand(4)); SDValue Ops[] = { Chain, @@ -9029,8 +9030,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); if (IsD16) VData = handleD16VData(VData, DAG); - unsigned Glc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue(); - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue(); + unsigned Glc = Op.getConstantOperandVal(6); + unsigned Slc = Op.getConstantOperandVal(7); unsigned IdxEn = getIdxEn(Op.getOperand(4)); SDValue Ops[] = { Chain, @@ -12069,8 +12070,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, return false; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID - = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(0); // TODO: Handle more intrinsics switch (IntrinsicID) { case Intrinsic::amdgcn_cvt_pkrtz: @@ -15008,7 +15008,7 @@ void SITargetLowering::computeKnownBitsForTargetNode(const SDValue Op, unsigned Opc = Op.getOpcode(); switch (Opc) { case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IID = Op.getConstantOperandVal(0); switch (IID) { case Intrinsic::amdgcn_mbcnt_lo: case Intrinsic::amdgcn_mbcnt_hi: { @@ -15251,11 +15251,9 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N, case ISD::CALLSEQ_END: return true; case ISD::INTRINSIC_WO_CHAIN: - return AMDGPU::isIntrinsicSourceOfDivergence( - cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()); + return AMDGPU::isIntrinsicSourceOfDivergence(N->getConstantOperandVal(0)); case ISD::INTRINSIC_W_CHAIN: - return AMDGPU::isIntrinsicSourceOfDivergence( - cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()); + return AMDGPU::isIntrinsicSourceOfDivergence(N->getConstantOperandVal(1)); case AMDGPUISD::ATOMIC_CMP_SWAP: case AMDGPUISD::ATOMIC_LOAD_FMIN: case AMDGPUISD::ATOMIC_LOAD_FMAX: diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ebe23a5eac57..396d22c7ec18 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -273,8 +273,8 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, // subtract the index by one. Offset0Idx -= get(Opc0).NumDefs; Offset1Idx -= get(Opc1).NumDefs; - Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue(); - Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue(); + Offset0 = Load0->getConstantOperandVal(Offset0Idx); + Offset1 = Load1->getConstantOperandVal(Offset1Idx); return true; } @@ -955,12 +955,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg); bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg); bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg); - bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) || - AMDGPU::SReg_LO16RegClass.contains(DestReg) || - AMDGPU::AGPR_LO16RegClass.contains(DestReg); - bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || - AMDGPU::SReg_LO16RegClass.contains(SrcReg) || - AMDGPU::AGPR_LO16RegClass.contains(SrcReg); + bool DstLow = !AMDGPU::isHi(DestReg, RI); + bool SrcLow = !AMDGPU::isHi(SrcReg, RI); MCRegister NewDestReg = RI.get32BitRegister(DestReg); MCRegister NewSrcReg = RI.get32BitRegister(SrcReg); @@ -7202,6 +7198,18 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, Register DstReg = Inst.getOperand(0).getReg(); const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); + // If it's a copy of a VGPR to a physical SGPR, insert a V_READFIRSTLANE and + // hope for the best. + if (Inst.isCopy() && DstReg.isPhysical() && + RI.isVGPR(MRI, Inst.getOperand(1).getReg())) { + // TODO: Only works for 32 bit registers. + BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), + get(AMDGPU::V_READFIRSTLANE_B32), Inst.getOperand(0).getReg()) + .add(Inst.getOperand(1)); + Inst.eraseFromParent(); + return; + } + if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { // Instead of creating a copy where src and dst are the same register diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td index f9bc623abcd0..8310c6b57dad 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1487,8 +1487,18 @@ foreach Index = 0-31 in { // 16-bit bitcast def : BitConvert <i16, f16, VGPR_32>; def : BitConvert <f16, i16, VGPR_32>; +def : BitConvert <f16, bf16, VGPR_32>; +def : BitConvert <bf16, f16, VGPR_32>; + def : BitConvert <i16, f16, SReg_32>; def : BitConvert <f16, i16, SReg_32>; +def : BitConvert <f16, bf16, SReg_32>; +def : BitConvert <bf16, f16, SReg_32>; + +def : BitConvert <i16, bf16, VGPR_32>; +def : BitConvert <bf16, i16, VGPR_32>; +def : BitConvert <i16, bf16, SReg_32>; +def : BitConvert <bf16, i16, SReg_32>; // 32-bit bitcast def : BitConvert <i32, f32, VGPR_32>; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 021d797344c5..a93cf5cad411 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -330,8 +330,10 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) RegPressureIgnoredUnits.resize(getNumRegUnits()); RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin()); - for (auto Reg : AMDGPU::VGPR_HI16RegClass) - RegPressureIgnoredUnits.set(*regunits(Reg).begin()); + for (auto Reg : AMDGPU::VGPR_16RegClass) { + if (AMDGPU::isHi(Reg, *this)) + RegPressureIgnoredUnits.set(*regunits(Reg).begin()); + } // HACK: Until this is fully tablegen'd. static llvm::once_flag InitializeRegSplitPartsFlag; @@ -2661,7 +2663,7 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const { if (BitWidth == 1) return &AMDGPU::VReg_1RegClass; if (BitWidth == 16) - return &AMDGPU::VGPR_LO16RegClass; + return &AMDGPU::VGPR_16RegClass; if (BitWidth == 32) return &AMDGPU::VGPR_32RegClass; return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth) @@ -2808,8 +2810,6 @@ getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) { const TargetRegisterClass * SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const { - if (BitWidth == 16) - return &AMDGPU::VGPR_LO16RegClass; if (BitWidth == 32) return &AMDGPU::AV_32RegClass; return ST.needsAlignedVGPRs() @@ -3041,8 +3041,6 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, default: return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF); case AMDGPU::VGPR_32RegClassID: - case AMDGPU::VGPR_LO16RegClassID: - case AMDGPU::VGPR_HI16RegClassID: return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF)); case AMDGPU::SGPR_32RegClassID: case AMDGPU::SGPR_LO16RegClassID: diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 981da13fe089..c94b894c5841 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -376,7 +376,7 @@ def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> { let HasSGPR = 1; } -def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> { +def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add M0_LO16)> { let CopyCost = 1; let Size = 16; let isAllocatable = 0; @@ -385,7 +385,7 @@ def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> { // TODO: Do we need to set DwarfRegAlias on register tuples? -def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, +def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add (sequence "SGPR%u_LO16", 0, 105))> { let AllocationPriority = 0; let Size = 16; @@ -393,7 +393,7 @@ def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, let HasSGPR = 1; } -def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, +def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add (sequence "SGPR%u_HI16", 0, 105))> { let isAllocatable = 0; let Size = 16; @@ -402,7 +402,7 @@ def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, } // SGPR 32-bit registers -def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add (sequence "SGPR%u", 0, 105))> { // Give all SGPR classes higher priority than VGPR classes, because // we want to spill SGPRs to VGPRs. @@ -451,14 +451,14 @@ def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s" def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">; // Trap handler TMP 32-bit registers -def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, +def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16, v2bf16], 32, (add (sequence "TTMP%u", 0, 15))> { let isAllocatable = 0; let HasSGPR = 1; } // Trap handler TMP 16-bit registers -def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, +def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add (sequence "TTMP%u_LO16", 0, 15))> { let Size = 16; let isAllocatable = 0; @@ -584,24 +584,10 @@ class RegisterTypes<list<ValueType> reg_types> { list<ValueType> types = reg_types; } -def Reg16Types : RegisterTypes<[i16, f16]>; -def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>; +def Reg16Types : RegisterTypes<[i16, f16, bf16]>; +def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>; let HasVGPR = 1 in { -def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, - (add (sequence "VGPR%u_LO16", 0, 255))> { - let AllocationPriority = 0; - let Size = 16; - let GeneratePressureSet = 0; -} - -def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, - (add (sequence "VGPR%u_HI16", 0, 255))> { - let AllocationPriority = 0; - let Size = 16; - let GeneratePressureSet = 0; -} - // VOP3 and VINTERP can access 256 lo and 256 hi registers. def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, (add (interleave (sequence "VGPR%u_LO16", 0, 255), @@ -697,7 +683,7 @@ def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, } // AccVGPR 32-bit registers -def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add (sequence "AGPR%u", 0, 255))> { let AllocationPriority = 0; let Size = 32; @@ -749,7 +735,7 @@ def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">; // Register classes used as source and destination //===----------------------------------------------------------------------===// -def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add FP_REG, SP_REG)> { let isAllocatable = 0; let CopyCost = -1; @@ -757,7 +743,7 @@ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16 let BaseClassOrder = 10000; } -def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16], 32, +def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16, v8bf16], 32, (add PRIVATE_RSRC_REG)> { let isAllocatable = 0; let CopyCost = -1; @@ -774,7 +760,7 @@ def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32, let GeneratePressureSet = 0, HasSGPR = 1 in { // Subset of SReg_32 without M0 for SMRD instructions and alike. // See comments in SIInstructions.td for more info. -def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO, SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI, @@ -783,7 +769,7 @@ def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2 let AllocationPriority = 0; } -def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, +def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16, XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16, TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16, @@ -796,17 +782,17 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, let BaseClassOrder = 16; } -def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, (add SReg_32_XM0_XEXEC, M0_CLASS)> { let AllocationPriority = 0; } -def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, (add SReg_32_XEXEC, EXEC_LO)> { let AllocationPriority = 0; } -def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { let AllocationPriority = 0; } @@ -814,7 +800,7 @@ def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i } // End GeneratePressureSet = 0 // Register class for all scalar registers (SGPRs + Special Registers) -def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, (add SReg_32_XM0, M0_CLASS)> { let AllocationPriority = 0; let HasSGPR = 1; @@ -822,13 +808,13 @@ def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], } let GeneratePressureSet = 0 in { -def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasSGPR = 1; } -def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, +def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16, v4bf16], 32, (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 1; @@ -850,13 +836,13 @@ def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, let HasSGPR = 1; } -def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, +def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; let HasSGPR = 1; } -def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, +def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32, (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> { let CopyCost = 1; @@ -864,7 +850,7 @@ def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16 let HasSGPR = 1; } -def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, +def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32, (add SReg_64_XEXEC, EXEC)> { let CopyCost = 1; let AllocationPriority = 1; @@ -919,11 +905,11 @@ multiclass SRegClass<int numRegs, } defm "" : SRegClass<3, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>; -defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>; +defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], SGPR_128Regs, TTMP_128Regs>; defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>; defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>; defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>; -defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>; +defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16], SGPR_256Regs, TTMP_256Regs>; defm "" : SRegClass<9, [v9i32, v9f32], SGPR_288Regs, TTMP_288Regs>; defm "" : SRegClass<10, [v10i32, v10f32], SGPR_320Regs, TTMP_320Regs>; defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>; @@ -934,7 +920,7 @@ defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], SGPR_512 defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>; } -def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add VGPR_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasVGPR = 1; @@ -969,15 +955,15 @@ multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> { } } -defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4], +defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4bf16, v4i16, p0, p1, p4], (add VGPR_64)>; defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>; -defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add VGPR_128)>; +defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], (add VGPR_128)>; defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>; defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>; defm VReg_224 : VRegClass<7, [v7i32, v7f32], (add VGPR_224)>; -defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], (add VGPR_256)>; +defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16], (add VGPR_256)>; defm VReg_288 : VRegClass<9, [v9i32, v9f32], (add VGPR_288)>; defm VReg_320 : VRegClass<10, [v10i32, v10f32], (add VGPR_320)>; defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>; @@ -1007,7 +993,7 @@ multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> { defm AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16], (add AGPR_64)>; defm AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>; -defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add AGPR_128)>; +defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], (add AGPR_128)>; defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>; defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>; defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>; @@ -1046,14 +1032,14 @@ def VS_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, let HasVGPR = 1; } -def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasVGPR = 1; let HasSGPR = 1; } -def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add VGPR_32_Lo128, SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasVGPR = 1; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 0f92a56237ac..a91d77175234 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2296,8 +2296,6 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { // (move from MC* level to Target* level). Return size in bits. unsigned getRegBitWidth(unsigned RCID) { switch (RCID) { - case AMDGPU::VGPR_LO16RegClassID: - case AMDGPU::VGPR_HI16RegClassID: case AMDGPU::SGPR_LO16RegClassID: case AMDGPU::AGPR_LO16RegClassID: return 16; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index d3cefb339d9e..7f52501b5d90 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -190,9 +190,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like, // because dealing with the write to high half of the register is // difficult. def : GCNPat < - (build_vector f16:$elt0, (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), - (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), - (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))), + (build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), + (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), + (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))), (v2f16 (mixhi_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, @@ -203,9 +203,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like, def : GCNPat < (build_vector f16:$elt0, - (AMDGPUclamp (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), + (AMDGPUclamp (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), - (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))), + (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))))), (v2f16 (mixhi_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, @@ -215,12 +215,12 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like, def : GCNPat < (AMDGPUclamp (build_vector - (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)), + (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)), (f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)), - (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers)))), - (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)), + (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers))))), + (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)), (f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)), - (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers)))))), + (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers))))))), (v2f16 (mixhi_inst $hi_src0_modifiers, $hi_src0, $hi_src1_modifiers, $hi_src1, $hi_src2_modifiers, $hi_src2, @@ -243,8 +243,8 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like, >; def : GCNPat < - (build_vector f16:$elt0, (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)), - (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers))))), + (build_vector f16:$elt0, (f16 (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)), + (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers)))))), (v2f16 (mixhi_inst $src0_modifiers, $src0, $src1_modifiers, $src1, (i32 0), (i32 0), diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp index 5d9a366f5ed5..2265f5db6737 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp @@ -751,7 +751,7 @@ SDValue ARCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); - assert(cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0 && + assert(Op.getConstantOperandVal(0) == 0 && "Only support lowering frame addr of current frame."); Register FrameReg = ARI.getFrameRegister(MF); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index a0776296b8eb..ef02dc997011 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -4499,8 +4499,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, default: break; case ARM::LDRrs: case ARM::LDRBrs: { - unsigned ShOpVal = - cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShOpVal = DefNode->getConstantOperandVal(2); unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (ShImm == 0 || (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) @@ -4512,8 +4511,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2LDRHs: case ARM::t2LDRSHs: { // Thumb2 mode: lsl only. - unsigned ShAmt = - cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShAmt = DefNode->getConstantOperandVal(2); if (ShAmt == 0 || ShAmt == 2) Latency = *Latency - 1; break; @@ -4526,8 +4524,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, default: break; case ARM::LDRrs: case ARM::LDRBrs: { - unsigned ShOpVal = - cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShOpVal = DefNode->getConstantOperandVal(2); unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (ShImm == 0 || ((ShImm == 1 || ShImm == 2 || ShImm == 3) && diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 984d8d3e0b08..adc429b61bbc 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2422,8 +2422,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); SDValue Chain = N->getOperand(0); - unsigned Lane = - cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); + unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs); EVT VT = N->getOperand(Vec0Idx).getValueType(); bool is64BitVector = VT.is64BitVector(); @@ -2587,7 +2586,7 @@ void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, Ops.push_back(N->getOperand(2)); // vector of base addresses - int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + int32_t ImmValue = N->getConstantOperandVal(3); Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset if (Predicated) @@ -2622,7 +2621,7 @@ void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, // The shift count if (Immediate) { - int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + int32_t ImmValue = N->getConstantOperandVal(3); Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count } else { Ops.push_back(N->getOperand(3)); @@ -2630,7 +2629,7 @@ void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, // The immediate saturation operand, if any if (HasSaturationOperand) { - int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); + int32_t SatOp = N->getConstantOperandVal(4); int SatBit = (SatOp == 64 ? 0 : 1); Ops.push_back(getI32Imm(SatBit, Loc)); } @@ -2685,7 +2684,7 @@ void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { // and then an immediate shift count Ops.push_back(N->getOperand(1)); Ops.push_back(N->getOperand(2)); - int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + int32_t ImmValue = N->getConstantOperandVal(3); Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count if (Predicated) @@ -4138,14 +4137,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (InGlue.getOpcode() == ARMISD::CMPZ) { if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { SDValue Int = InGlue.getOperand(0); - uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); + uint64_t ID = Int->getConstantOperandVal(1); // Handle low-overhead loops. if (ID == Intrinsic::loop_decrement_reg) { SDValue Elements = Int.getOperand(2); - SDValue Size = CurDAG->getTargetConstant( - cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, - MVT::i32); + SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3), + dl, MVT::i32); SDValue Args[] = { Elements, Size, Int.getOperand(0) }; SDNode *LoopDec = @@ -4715,7 +4713,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default: break; @@ -4732,9 +4730,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) { Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); SmallVector<SDValue, 5> Ops; - Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ - Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ - Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ + Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */ + Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */ + Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */ // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded // instruction will always be '1111' but it is possible in assembly language to specify @@ -5181,7 +5179,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(0); switch (IntNo) { default: break; diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp index d00b7853816e..9f3bcffc7a99 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -4110,7 +4110,7 @@ SDValue ARMTargetLowering::LowerINTRINSIC_VOID( SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. @@ -4289,13 +4289,13 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, return Op.getOperand(0); SDLoc dl(Op); - unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; + unsigned isRead = ~Op.getConstantOperandVal(2) & 1; if (!isRead && (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) // ARMv7 with MP extension has PLDW. return Op.getOperand(0); - unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + unsigned isData = Op.getConstantOperandVal(4); if (Subtarget->isThumb()) { // Invert the bits. isRead = ~isRead & 1; @@ -4800,7 +4800,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) && LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) && !isSignedIntSetCC(CC)) { - unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue(); + unsigned Mask = LHS.getConstantOperandVal(1); auto *RHSC = cast<ConstantSDNode>(RHS.getNode()); uint64_t RHSV = RHSC->getZExtValue(); if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) { @@ -4823,9 +4823,8 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, isa<ConstantSDNode>(RHS) && cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U && CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) && - cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) { - unsigned ShiftAmt = - cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1; + LHS.getConstantOperandVal(1) < 31) { + unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1; SDValue Shift = DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, MVT::i32), LHS.getOperand(0), @@ -6112,7 +6111,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ EVT VT = Op.getValueType(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(4, dl, MVT::i32); @@ -6135,7 +6134,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); Register FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) @@ -8221,7 +8220,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); // Update the minimum and maximum lane number seen. - unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); + unsigned EltNo = V.getConstantOperandVal(1); Source->MinElt = std::min(Source->MinElt, EltNo); Source->MaxElt = std::max(Source->MaxElt, EltNo); } @@ -9034,7 +9033,7 @@ static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, SDValue Conv = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0)); - unsigned Lane = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Lane = Op.getConstantOperandVal(2); unsigned LaneWidth = getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8; unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth; @@ -9097,7 +9096,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, SDValue Conv = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0)); - unsigned Lane = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Lane = Op.getConstantOperandVal(1); unsigned LaneWidth = getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8; SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv, @@ -10682,7 +10681,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(0); unsigned Opc = 0; if (IntNo == Intrinsic::arm_smlald) Opc = ARMISD::SMLALD; @@ -14842,14 +14841,14 @@ static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) { assert(N->getOpcode() == ARMISD::BFI); SDValue From = N->getOperand(1); - ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue(); + ToMask = ~N->getConstantOperandAPInt(2); FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount()); // If the Base came from a SHR #C, we can deduce that it is really testing bit // #C in the base of the SHR. if (From->getOpcode() == ISD::SRL && isa<ConstantSDNode>(From->getOperand(1))) { - APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue(); + APInt Shift = From->getConstantOperandAPInt(1); assert(Shift.getLimitedValue() < 32 && "Shift too large!"); FromMask <<= Shift.getLimitedValue(31); From = From->getOperand(0); @@ -14908,7 +14907,7 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) { ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); if (!N11C) return SDValue(); - unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned InvMask = N->getConstantOperandVal(2); unsigned LSB = llvm::countr_zero(~InvMask); unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB; assert(Width < @@ -15448,8 +15447,7 @@ static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); - ARMCC::CondCodes Cond = - (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2); SDLoc dl(N); // vcmp X, 0, cc -> vcmpz X, cc @@ -15794,7 +15792,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, unsigned NewOpc = 0; unsigned NumVecs = 0; if (Target.isIntrinsic) { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default: llvm_unreachable("unexpected intrinsic for Neon base update"); @@ -16254,12 +16252,10 @@ static SDValue PerformMVEVLDCombine(SDNode *N, // For the stores, where there are multiple intrinsics we only actually want // to post-inc the last of the them. - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); - if (IntNo == Intrinsic::arm_mve_vst2q && - cast<ConstantSDNode>(N->getOperand(5))->getZExtValue() != 1) + unsigned IntNo = N->getConstantOperandVal(1); + if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1) return SDValue(); - if (IntNo == Intrinsic::arm_mve_vst4q && - cast<ConstantSDNode>(N->getOperand(7))->getZExtValue() != 3) + if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3) return SDValue(); // Search for a use of the address operand that is an increment. @@ -16381,7 +16377,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return false; unsigned NumVecs = 0; unsigned NewOpc = 0; - unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); + unsigned IntNo = VLD->getConstantOperandVal(1); if (IntNo == Intrinsic::arm_neon_vld2lane) { NumVecs = 2; NewOpc = ARMISD::VLD2DUP; @@ -16397,8 +16393,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // First check that all the vldN-lane uses are VDUPLANEs and that the lane // numbers match the load. - unsigned VLDLaneNo = - cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); + unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3); for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); UI != UE; ++UI) { // Ignore uses of the chain result. @@ -16406,7 +16401,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { continue; SDNode *User = *UI; if (User->getOpcode() != ARMISD::VDUPLANE || - VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) + VLDLaneNo != User->getConstantOperandVal(1)) return false; } @@ -16479,7 +16474,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N, // Make sure the VMOV element size is not bigger than the VDUPLANE elements. unsigned EltSize = Op.getScalarValueSizeInBits(); // The canonical VMOV for a zero vector uses a 32-bit element size. - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(0); unsigned EltBits; if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0) EltSize = 8; @@ -17479,7 +17474,7 @@ static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) { SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(0); switch (IntNo) { default: // Don't do anything for most intrinsics. @@ -17669,7 +17664,7 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N, case Intrinsic::arm_mve_addv: { // Turn this intrinsic straight into the appropriate ARMISD::VADDV node, // which allow PerformADDVecReduce to turn it into VADDLV when possible. - bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + bool Unsigned = N->getConstantOperandVal(2); unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs; return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1)); } @@ -17678,7 +17673,7 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N, case Intrinsic::arm_mve_addlv_predicated: { // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR // which recombines the two outputs into an i64 - bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + bool Unsigned = N->getConstantOperandVal(2); unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ? (Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) : (Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps); @@ -18193,7 +18188,7 @@ static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate); } case ISD::INTRINSIC_W_CHAIN: { - unsigned IntOp = cast<ConstantSDNode>(N.getOperand(1))->getZExtValue(); + unsigned IntOp = N.getConstantOperandVal(1); if (IntOp != Intrinsic::test_start_loop_iterations && IntOp != Intrinsic::loop_decrement_reg) return SDValue(); @@ -18271,7 +18266,7 @@ static SDValue PerformHWLoopCombine(SDNode *N, SDLoc dl(Int); SelectionDAG &DAG = DCI.DAG; SDValue Elements = Int.getOperand(2); - unsigned IntOp = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); + unsigned IntOp = Int->getConstantOperandVal(1); assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) && "expected single br user"); SDNode *Br = *N->use_begin(); @@ -18305,8 +18300,8 @@ static SDValue PerformHWLoopCombine(SDNode *N, DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0)); return Res; } else { - SDValue Size = DAG.getTargetConstant( - cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, MVT::i32); + SDValue Size = + DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32); SDValue Args[] = { Int.getOperand(0), Elements, Size, }; SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl, DAG.getVTList(MVT::i32, MVT::Other), Args); @@ -19051,7 +19046,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld1x2: case Intrinsic::arm_neon_vld1x3: diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp index 196122e45ab8..e67a1e2ed509 100644 --- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -335,7 +335,7 @@ template <> bool AVRDAGToDAGISel::select<ISD::STORE>(SDNode *N) { return false; } - int CST = (int)cast<ConstantSDNode>(BasePtr.getOperand(1))->getZExtValue(); + int CST = (int)BasePtr.getConstantOperandVal(1); SDValue Chain = ST->getChain(); EVT VT = ST->getValue().getValueType(); SDLoc DL(N); diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp index cd1dcfaea0eb..d36bfb188ed3 100644 --- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -298,8 +298,7 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { SDValue SrcHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), DAG.getConstant(1, dl, MVT::i16)); - uint64_t ShiftAmount = - cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + uint64_t ShiftAmount = N->getConstantOperandVal(1); if (ShiftAmount == 16) { // Special case these two operations because they appear to be used by the // generic codegen parts to lower 32-bit numbers. @@ -367,7 +366,7 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { } } - uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + uint64_t ShiftAmount = N->getConstantOperandVal(1); SDValue Victim = N->getOperand(0); switch (Op.getOpcode()) { diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index 909c7c005735..d8139958e9fc 100644 --- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -193,7 +193,7 @@ void BPFDAGToDAGISel::Select(SDNode *Node) { default: break; case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(1); switch (IntNo) { case Intrinsic::bpf_load_byte: case Intrinsic::bpf_load_half: @@ -469,7 +469,7 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, if (BaseV.getOpcode() != ISD::INTRINSIC_W_CHAIN) return; - unsigned IntNo = cast<ConstantSDNode>(BaseV->getOperand(1))->getZExtValue(); + unsigned IntNo = BaseV->getConstantOperandVal(1); uint64_t MaskV = MaskN->getZExtValue(); if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) || diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp index e3b4a2dc048a..90f70b83a02d 100644 --- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp @@ -1219,7 +1219,7 @@ SDValue CSKYTargetLowering::LowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); Register FrameReg = RI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) @@ -1240,7 +1240,7 @@ SDValue CSKYTargetLowering::LowerRETURNADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(4, dl, MVT::i32); diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index f930015026a5..eb5c59672224 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -192,7 +192,7 @@ MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) { return nullptr; SDLoc dl(IntN); - unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); + unsigned IntNo = IntN->getConstantOperandVal(1); static std::map<unsigned,unsigned> LoadPciMap = { { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci }, @@ -284,18 +284,18 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) { // can provide an address of an unsigned variable to store the result of // a sign-extending intrinsic into (or the other way around). ISD::LoadExtType IntExt; - switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) { - case Intrinsic::hexagon_circ_ldub: - case Intrinsic::hexagon_circ_lduh: - IntExt = ISD::ZEXTLOAD; - break; - case Intrinsic::hexagon_circ_ldw: - case Intrinsic::hexagon_circ_ldd: - IntExt = ISD::NON_EXTLOAD; - break; - default: - IntExt = ISD::SEXTLOAD; - break; + switch (C->getConstantOperandVal(1)) { + case Intrinsic::hexagon_circ_ldub: + case Intrinsic::hexagon_circ_lduh: + IntExt = ISD::ZEXTLOAD; + break; + case Intrinsic::hexagon_circ_ldw: + case Intrinsic::hexagon_circ_ldd: + IntExt = ISD::NON_EXTLOAD; + break; + default: + IntExt = ISD::SEXTLOAD; + break; } if (N->getExtensionType() != IntExt) return false; @@ -325,7 +325,7 @@ bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) { return false; const SDLoc &dl(IntN); - unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); + unsigned IntNo = IntN->getConstantOperandVal(1); static const std::map<unsigned, unsigned> LoadBrevMap = { { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr }, @@ -366,7 +366,7 @@ bool HexagonDAGToDAGISel::SelectNewCircIntrinsic(SDNode *IntN) { return false; SDLoc DL(IntN); - unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); + unsigned IntNo = IntN->getConstantOperandVal(1); SmallVector<SDValue, 7> Ops; static std::map<unsigned,unsigned> LoadNPcMap = { @@ -641,7 +641,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { if (SelectNewCircIntrinsic(N)) return; - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); if (IntNo == Intrinsic::hexagon_V6_vgathermw || IntNo == Intrinsic::hexagon_V6_vgathermw_128B || IntNo == Intrinsic::hexagon_V6_vgathermh || @@ -665,7 +665,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { } void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); unsigned Bits; switch (IID) { case Intrinsic::hexagon_S2_vsplatrb: diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index e08566718d7c..fb156f2583e8 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -2895,7 +2895,7 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32); unsigned Opcode; - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default: llvm_unreachable("Unexpected HVX gather intrinsic."); @@ -2934,7 +2934,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32); unsigned Opcode; - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default: llvm_unreachable("Unexpected HVX gather intrinsic."); @@ -2963,7 +2963,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { } void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); SDNode *Result; switch (IID) { case Intrinsic::hexagon_V6_vaddcarry: { diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index a7d452e7227d..51138091f4a5 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -669,8 +669,7 @@ HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { --NumOps; // Ignore the flag operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - const InlineAsm::Flag Flags( - cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue()); + const InlineAsm::Flag Flags(Op.getConstantOperandVal(i)); unsigned NumVals = Flags.getNumOperandRegisters(); ++i; // Skip the ID value. @@ -729,7 +728,7 @@ SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op, SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(1); // Lower the hexagon_prefetch builtin to DCFETCH, as above. if (IntNo == Intrinsic::hexagon_prefetch) { SDValue Addr = Op.getOperand(2); @@ -1176,7 +1175,7 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(4, dl, MVT::i32); @@ -1198,7 +1197,7 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, HRI.getFrameRegister(), VT); while (Depth--) diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index db416a500f59..665e2d79c83d 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -2127,7 +2127,7 @@ HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op, SDValue HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); SmallVector<SDValue> Ops(Op->ops().begin(), Op->ops().end()); auto Swap = [&](SDValue P) { @@ -2922,7 +2922,7 @@ SDValue HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == HexagonISD::TL_EXTEND || Op.getOpcode() == HexagonISD::TL_TRUNCATE); - unsigned Opc = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Opc = Op.getConstantOperandVal(2); return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0)); } diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp index cbb5c2b998e2..17d7ffb586f4 100644 --- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp @@ -1057,7 +1057,7 @@ SDValue LanaiTargetLowering::LowerRETURNADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); const unsigned Offset = -4; @@ -1080,7 +1080,7 @@ SDValue LanaiTargetLowering::LowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Lanai::FP, VT); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); while (Depth--) { const unsigned Offset = -8; SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index 276374afee38..66a37fce5dda 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -85,7 +85,7 @@ class LoongArchAsmParser : public MCTargetAsmParser { // "emitLoadAddress*" functions. void emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg, const MCExpr *Symbol, SmallVectorImpl<Inst> &Insts, - SMLoc IDLoc, MCStreamer &Out); + SMLoc IDLoc, MCStreamer &Out, bool RelaxHint = false); // Helper to emit pseudo instruction "la.abs $rd, sym". void emitLoadAddressAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); @@ -748,12 +748,14 @@ bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, void LoongArchAsmParser::emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg, const MCExpr *Symbol, SmallVectorImpl<Inst> &Insts, - SMLoc IDLoc, MCStreamer &Out) { + SMLoc IDLoc, MCStreamer &Out, + bool RelaxHint) { MCContext &Ctx = getContext(); for (LoongArchAsmParser::Inst &Inst : Insts) { unsigned Opc = Inst.Opc; LoongArchMCExpr::VariantKind VK = Inst.VK; - const LoongArchMCExpr *LE = LoongArchMCExpr::create(Symbol, VK, Ctx); + const LoongArchMCExpr *LE = + LoongArchMCExpr::create(Symbol, VK, Ctx, RelaxHint); switch (Opc) { default: llvm_unreachable("unexpected opcode"); @@ -854,7 +856,7 @@ void LoongArchAsmParser::emitLoadAddressPcrel(MCInst &Inst, SMLoc IDLoc, Insts.push_back( LoongArchAsmParser::Inst(ADDI, LoongArchMCExpr::VK_LoongArch_PCALA_LO12)); - emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true); } void LoongArchAsmParser::emitLoadAddressPcrelLarge(MCInst &Inst, SMLoc IDLoc, @@ -900,7 +902,7 @@ void LoongArchAsmParser::emitLoadAddressGot(MCInst &Inst, SMLoc IDLoc, Insts.push_back( LoongArchAsmParser::Inst(LD, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12)); - emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true); } void LoongArchAsmParser::emitLoadAddressGotLarge(MCInst &Inst, SMLoc IDLoc, diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 4794a131edae..e14bbadf9ed2 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -286,7 +286,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UNDEF, VT, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::SETCC, VT, Legal); @@ -406,6 +406,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerWRITE_REGISTER(Op, DAG); case ISD::INSERT_VECTOR_ELT: return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: @@ -514,6 +516,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, } SDValue +LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + EVT VecTy = Op->getOperand(0)->getValueType(0); + SDValue Idx = Op->getOperand(1); + EVT EltTy = VecTy.getVectorElementType(); + unsigned NumElts = VecTy.getVectorNumElements(); + + if (isa<ConstantSDNode>(Idx) && + (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || + EltTy == MVT::f64 || + cast<ConstantSDNode>(Idx)->getZExtValue() < NumElts / 2)) + return Op; + + return SDValue(); +} + +SDValue LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { if (isa<ConstantSDNode>(Op->getOperand(2))) @@ -569,7 +588,7 @@ SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); int GRLenInBytes = Subtarget.getGRLen() / 8; while (Depth--) { @@ -588,7 +607,7 @@ SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, return SDValue(); // Currently only support lowering return address for current frame. - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) { + if (Op.getConstantOperandVal(0) != 0) { DAG.getContext()->emitError( "return address can only be determined for the current frame"); return SDValue(); @@ -1244,7 +1263,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG); case Intrinsic::loongarch_csrrd_w: case Intrinsic::loongarch_csrrd_d: { - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(2); return !isUInt<14>(Imm) ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, @@ -1252,7 +1271,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, } case Intrinsic::loongarch_csrwr_w: case Intrinsic::loongarch_csrwr_d: { - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(3); return !isUInt<14>(Imm) ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, @@ -1261,7 +1280,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, } case Intrinsic::loongarch_csrxchg_w: case Intrinsic::loongarch_csrxchg_d: { - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(4); return !isUInt<14>(Imm) ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, @@ -1287,7 +1306,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, {Chain, Op.getOperand(2)}); } case Intrinsic::loongarch_lddir_d: { - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(3); return !isUInt<8>(Imm) ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) : Op; @@ -1295,7 +1314,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::loongarch_movfcsr2gr: { if (!Subtarget.hasBasicF()) return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG); - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(2); return !isUInt<2>(Imm) ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, @@ -1441,7 +1460,7 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, ASRT_LE_GT_CASE(asrtgt_d) #undef ASRT_LE_GT_CASE case Intrinsic::loongarch_ldpte_d: { - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(3); return !Subtarget.is64Bit() ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) @@ -1454,53 +1473,53 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, : SDValue(); case Intrinsic::loongarch_lasx_xvstelm_b: return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<5>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<5>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : SDValue(); case Intrinsic::loongarch_lsx_vstelm_b: return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<4>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : SDValue(); case Intrinsic::loongarch_lasx_xvstelm_h: return (!isShiftedInt<8, 1>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<4>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 2", DAG) : SDValue(); case Intrinsic::loongarch_lsx_vstelm_h: return (!isShiftedInt<8, 1>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<3>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 2", DAG) : SDValue(); case Intrinsic::loongarch_lasx_xvstelm_w: return (!isShiftedInt<8, 2>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<3>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 4", DAG) : SDValue(); case Intrinsic::loongarch_lsx_vstelm_w: return (!isShiftedInt<8, 2>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<2>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 4", DAG) : SDValue(); case Intrinsic::loongarch_lasx_xvstelm_d: return (!isShiftedInt<8, 3>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<2>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 8", DAG) : SDValue(); case Intrinsic::loongarch_lsx_vstelm_d: return (!isShiftedInt<8, 3>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<1>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<1>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 8", DAG) : SDValue(); @@ -1673,7 +1692,7 @@ replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp) { const StringRef ErrorMsgOOR = "argument out of range"; - unsigned Imm = cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue(); + unsigned Imm = Node->getConstantOperandVal(2); if (!isUInt<N>(Imm)) { emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, /*WithChain=*/false); @@ -1976,7 +1995,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( break; } case Intrinsic::loongarch_csrwr_w: { - unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + unsigned Imm = N->getConstantOperandVal(3); if (!isUInt<14>(Imm)) { emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; @@ -1991,7 +2010,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( break; } case Intrinsic::loongarch_csrxchg_w: { - unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); + unsigned Imm = N->getConstantOperandVal(4); if (!isUInt<14>(Imm)) { emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 2d73a7394946..6f8878f9ccd5 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -279,6 +279,7 @@ private: SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index ec6983d0f487..b3c11bc5423d 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1571,11 +1571,11 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; -// XVREPL128VEI_{W/D} +// XVREPLVE0_{W/D} def : Pat<(lasxsplatf32 FPR32:$fj), - (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; + (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>; def : Pat<(lasxsplatf64 FPR64:$fj), - (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; + (XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>; // Loads/Stores foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { @@ -1590,42 +1590,18 @@ def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; -def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)), - (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>; -def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)), - (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>; -def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)), - (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>; -def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)), - (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>; - -// Vector extraction with variable index. -def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)), - (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj, - i64:$rk), - sub_32)), - GPR), (i64 24))>; -def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)), - (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj, - i64:$rk), - sub_32)), - GPR), (i64 16))>; -def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)), - (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk), - sub_32)), - GPR)>; -def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)), - (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk), - sub_64)), - GPR)>; -def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), - (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>; -def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), - (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; +def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)), + (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>; +def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)), + (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>; +def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)), + (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>; +def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), + (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; // vselect -def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, - (v32i8 (SplatPat_uimm8 uimm8:$imm)))), +def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)), + LASX256:$xj)), (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index e468176885d7..5569c2cd15b5 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1731,8 +1731,8 @@ def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; // vselect -def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd, - (v16i8 (SplatPat_uimm8 uimm8:$imm)))), +def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)), + LSX128:$vj)), (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index 45169becca37..d2ea062dc09a 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/EndianStream.h" @@ -120,12 +121,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { assert(MO.isExpr() && "getExprOpValue expects only expressions"); + bool RelaxCandidate = false; + bool EnableRelax = STI.hasFeature(LoongArch::FeatureRelax); const MCExpr *Expr = MO.getExpr(); MCExpr::ExprKind Kind = Expr->getKind(); LoongArch::Fixups FixupKind = LoongArch::fixup_loongarch_invalid; if (Kind == MCExpr::Target) { const LoongArchMCExpr *LAExpr = cast<LoongArchMCExpr>(Expr); + RelaxCandidate = LAExpr->getRelaxHint(); switch (LAExpr->getKind()) { case LoongArchMCExpr::VK_LoongArch_None: case LoongArchMCExpr::VK_LoongArch_Invalid: @@ -270,6 +274,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, Fixups.push_back( MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc())); + + // Emit an R_LARCH_RELAX if linker relaxation is enabled and LAExpr has relax + // hint. + if (EnableRelax && RelaxCandidate) { + const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx); + Fixups.push_back(MCFixup::create( + 0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_relax), MI.getLoc())); + } + return 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp index 993111552a31..82c992b1cc8c 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp @@ -25,9 +25,10 @@ using namespace llvm; #define DEBUG_TYPE "loongarch-mcexpr" -const LoongArchMCExpr * -LoongArchMCExpr::create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx) { - return new (Ctx) LoongArchMCExpr(Expr, Kind); +const LoongArchMCExpr *LoongArchMCExpr::create(const MCExpr *Expr, + VariantKind Kind, MCContext &Ctx, + bool Hint) { + return new (Ctx) LoongArchMCExpr(Expr, Kind, Hint); } void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h index 0945cf82db86..93251f824103 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h @@ -67,16 +67,18 @@ public: private: const MCExpr *Expr; const VariantKind Kind; + const bool RelaxHint; - explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind) - : Expr(Expr), Kind(Kind) {} + explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind, bool Hint) + : Expr(Expr), Kind(Kind), RelaxHint(Hint) {} public: static const LoongArchMCExpr *create(const MCExpr *Expr, VariantKind Kind, - MCContext &Ctx); + MCContext &Ctx, bool Hint = false); VariantKind getKind() const { return Kind; } const MCExpr *getSubExpr() const { return Expr; } + bool getRelaxHint() const { return RelaxHint; } void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp index 7bd382107773..7fcc65beaa65 100644 --- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp @@ -161,6 +161,16 @@ bool M68kExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return TII->ExpandMOVSZX_RM(MIB, false, TII->get(M68k::MOV16rf), MVT::i32, MVT::i16); + case M68k::MOVSXd16q8: + return TII->ExpandMOVSZX_RM(MIB, true, TII->get(M68k::MOV8dq), MVT::i16, + MVT::i8); + case M68k::MOVSXd32q8: + return TII->ExpandMOVSZX_RM(MIB, true, TII->get(M68k::MOV8dq), MVT::i32, + MVT::i8); + case M68k::MOVSXd32q16: + return TII->ExpandMOVSZX_RM(MIB, true, TII->get(M68k::MOV16dq), MVT::i32, + MVT::i16); + case M68k::MOVZXd16q8: return TII->ExpandMOVSZX_RM(MIB, false, TII->get(M68k::MOV8dq), MVT::i16, MVT::i8); diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp index 0830cc7feb22..c4d7a0dec7f3 100644 --- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -94,11 +94,10 @@ M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM, setOperationAction(OP, MVT::i16, Expand); } - // FIXME It would be better to use a custom lowering for (auto OP : {ISD::SMULO, ISD::UMULO}) { - setOperationAction(OP, MVT::i8, Expand); - setOperationAction(OP, MVT::i16, Expand); - setOperationAction(OP, MVT::i32, Expand); + setOperationAction(OP, MVT::i8, Custom); + setOperationAction(OP, MVT::i16, Custom); + setOperationAction(OP, MVT::i32, Custom); } for (auto OP : {ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}) @@ -1533,46 +1532,119 @@ bool M68kTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, return VT.bitsLE(MVT::i32) || Subtarget.atLeastM68020(); } -SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { - // Lower the "add/sub/mul with overflow" instruction into a regular ins plus - // a "setcc" instruction that checks the overflow flag. The "brcond" lowering - // looks for this combo and may remove the "setcc" instruction if the "setcc" - // has only one use. +static bool isOverflowArithmetic(unsigned Opcode) { + switch (Opcode) { + case ISD::UADDO: + case ISD::SADDO: + case ISD::USUBO: + case ISD::SSUBO: + case ISD::UMULO: + case ISD::SMULO: + return true; + default: + return false; + } +} + +static void lowerOverflowArithmetic(SDValue Op, SelectionDAG &DAG, + SDValue &Result, SDValue &CCR, + unsigned &CC) { SDNode *N = Op.getNode(); + EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - unsigned BaseOp = 0; - unsigned Cond = 0; SDLoc DL(Op); + + unsigned TruncOp = 0; + auto PromoteMULO = [&](unsigned ExtOp) { + // We don't have 8-bit multiplications, so promote i8 version of U/SMULO + // to i16. + // Ideally this should be done by legalizer but sadly there is no promotion + // rule for U/SMULO at this moment. + if (VT == MVT::i8) { + LHS = DAG.getNode(ExtOp, DL, MVT::i16, LHS); + RHS = DAG.getNode(ExtOp, DL, MVT::i16, RHS); + VT = MVT::i16; + TruncOp = ISD::TRUNCATE; + } + }; + + bool NoOverflow = false; + unsigned BaseOp = 0; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: BaseOp = M68kISD::ADD; - Cond = M68k::COND_VS; + CC = M68k::COND_VS; break; case ISD::UADDO: BaseOp = M68kISD::ADD; - Cond = M68k::COND_CS; + CC = M68k::COND_CS; break; case ISD::SSUBO: BaseOp = M68kISD::SUB; - Cond = M68k::COND_VS; + CC = M68k::COND_VS; break; case ISD::USUBO: BaseOp = M68kISD::SUB; - Cond = M68k::COND_CS; + CC = M68k::COND_CS; + break; + case ISD::UMULO: + PromoteMULO(ISD::ZERO_EXTEND); + NoOverflow = VT != MVT::i32; + BaseOp = NoOverflow ? ISD::MUL : M68kISD::UMUL; + CC = M68k::COND_VS; + break; + case ISD::SMULO: + PromoteMULO(ISD::SIGN_EXTEND); + NoOverflow = VT != MVT::i32; + BaseOp = NoOverflow ? ISD::MUL : M68kISD::SMUL; + CC = M68k::COND_VS; break; } - // Also sets CCR. - SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i8); + SDVTList VTs; + if (NoOverflow) + VTs = DAG.getVTList(VT); + else + // Also sets CCR. + VTs = DAG.getVTList(VT, MVT::i8); + SDValue Arith = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); - SDValue SetCC = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1), - DAG.getConstant(Cond, DL, MVT::i8), - SDValue(Arith.getNode(), 1)); + Result = Arith.getValue(0); + if (TruncOp) + // Right now the only place to truncate is from i16 to i8. + Result = DAG.getNode(TruncOp, DL, MVT::i8, Arith); - return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Arith, SetCC); + if (NoOverflow) + CCR = DAG.getConstant(0, DL, N->getValueType(1)); + else + CCR = Arith.getValue(1); +} + +SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { + SDNode *N = Op.getNode(); + SDLoc DL(Op); + + // Lower the "add/sub/mul with overflow" instruction into a regular ins plus + // a "setcc" instruction that checks the overflow flag. + SDValue Result, CCR; + unsigned CC; + lowerOverflowArithmetic(Op, DAG, Result, CCR, CC); + + SDValue Overflow; + if (isa<ConstantSDNode>(CCR)) { + // It's likely a result of operations that will not overflow + // hence no setcc is needed. + Overflow = CCR; + } else { + // Generate a M68kISD::SETCC. + Overflow = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1), + DAG.getConstant(CC, DL, MVT::i8), CCR); + } + + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Overflow); } /// Create a BTST (Bit Test) node - Test bit \p BitNo in \p Src and set @@ -2206,8 +2278,7 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { isNullConstant(Cond.getOperand(1).getOperand(0))) { SDValue Cmp = Cond.getOperand(1); - unsigned CondCode = - cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue(); + unsigned CondCode = Cond.getConstantOperandVal(0); if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == M68k::COND_EQ || CondCode == M68k::COND_NE)) { @@ -2269,55 +2340,12 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = Cmp; addTest = false; } - } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || - CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || - CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) { - SDValue LHS = Cond.getOperand(0); - SDValue RHS = Cond.getOperand(1); - unsigned MxOpcode; - unsigned MxCond; - SDVTList VTs; - switch (CondOpcode) { - case ISD::UADDO: - MxOpcode = M68kISD::ADD; - MxCond = M68k::COND_CS; - break; - case ISD::SADDO: - MxOpcode = M68kISD::ADD; - MxCond = M68k::COND_VS; - break; - case ISD::USUBO: - MxOpcode = M68kISD::SUB; - MxCond = M68k::COND_CS; - break; - case ISD::SSUBO: - MxOpcode = M68kISD::SUB; - MxCond = M68k::COND_VS; - break; - case ISD::UMULO: - MxOpcode = M68kISD::UMUL; - MxCond = M68k::COND_VS; - break; - case ISD::SMULO: - MxOpcode = M68kISD::SMUL; - MxCond = M68k::COND_VS; - break; - default: - llvm_unreachable("unexpected overflowing operator"); - } - if (CondOpcode == ISD::UMULO) - VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i32); - else - VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); - - SDValue MxOp = DAG.getNode(MxOpcode, DL, VTs, LHS, RHS); - - if (CondOpcode == ISD::UMULO) - Cond = MxOp.getValue(2); - else - Cond = MxOp.getValue(1); - - CC = DAG.getConstant(MxCond, DL, MVT::i8); + } else if (isOverflowArithmetic(CondOpcode)) { + // Result is unused here. + SDValue Result; + unsigned CCode; + lowerOverflowArithmetic(Cond, DAG, Result, Cond, CCode); + CC = DAG.getConstant(CCode, DL, MVT::i8); addTest = false; } @@ -2377,6 +2405,17 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + // Simple optimization when Cond is a constant to avoid generating + // M68kISD::CMOV if possible. + // TODO: Generalize this to use SelectionDAG::computeKnownBits. + if (auto *Const = dyn_cast<ConstantSDNode>(Cond.getNode())) { + const APInt &C = Const->getAPIntValue(); + if (C.countr_zero() >= 5) + return Op2; + else if (C.countr_one() >= 5) + return Op1; + } + // M68kISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); @@ -2466,61 +2505,15 @@ SDValue M68kTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { } } CondOpcode = Cond.getOpcode(); - if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || - CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO) { - SDValue LHS = Cond.getOperand(0); - SDValue RHS = Cond.getOperand(1); - unsigned MxOpcode; - unsigned MxCond; - SDVTList VTs; - // Keep this in sync with LowerXALUO, otherwise we might create redundant - // instructions that can't be removed afterwards (i.e. M68kISD::ADD and - // M68kISD::INC). - switch (CondOpcode) { - case ISD::UADDO: - MxOpcode = M68kISD::ADD; - MxCond = M68k::COND_CS; - break; - case ISD::SADDO: - MxOpcode = M68kISD::ADD; - MxCond = M68k::COND_VS; - break; - case ISD::USUBO: - MxOpcode = M68kISD::SUB; - MxCond = M68k::COND_CS; - break; - case ISD::SSUBO: - MxOpcode = M68kISD::SUB; - MxCond = M68k::COND_VS; - break; - case ISD::UMULO: - MxOpcode = M68kISD::UMUL; - MxCond = M68k::COND_VS; - break; - case ISD::SMULO: - MxOpcode = M68kISD::SMUL; - MxCond = M68k::COND_VS; - break; - default: - llvm_unreachable("unexpected overflowing operator"); - } + if (isOverflowArithmetic(CondOpcode)) { + SDValue Result; + unsigned CCode; + lowerOverflowArithmetic(Cond, DAG, Result, Cond, CCode); if (Inverted) - MxCond = M68k::GetOppositeBranchCondition((M68k::CondCode)MxCond); + CCode = M68k::GetOppositeBranchCondition((M68k::CondCode)CCode); + CC = DAG.getConstant(CCode, DL, MVT::i8); - if (CondOpcode == ISD::UMULO) - VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i8); - else - VTs = DAG.getVTList(LHS.getValueType(), MVT::i8); - - SDValue MxOp = DAG.getNode(MxOpcode, DL, VTs, LHS, RHS); - - if (CondOpcode == ISD::UMULO) - Cond = MxOp.getValue(2); - else - Cond = MxOp.getValue(1); - - CC = DAG.getConstant(MxCond, DL, MVT::i8); AddTest = false; } else { unsigned CondOpc; @@ -3394,7 +3387,7 @@ SDValue M68kTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDNode *Node = Op.getNode(); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); - unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Align = Op.getConstantOperandVal(2); EVT VT = Node->getValueType(0); // Chain the dynamic stack allocation so that it doesn't modify the stack diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h index 02427a4e749e..d00907775f92 100644 --- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h @@ -194,6 +194,15 @@ private: unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG) const; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override { + // In many cases, `GA` doesn't give the correct offset to fold. It's + // hard to know if the real offset actually fits into the displacement + // of the perspective addressing mode. + // Thus, we disable offset folding altogether and leave that to ISel + // patterns. + return false; + } + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; /// Emit a load of return address if tail call diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrArithmetic.td index 15d2049f62cb..3532e56e7417 100644 --- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrArithmetic.td +++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrArithmetic.td @@ -590,8 +590,9 @@ class MxDiMuOp_DD<string MN, bits<4> CMD, bit SIGNED = false, } // $dreg <- $dreg op $dreg -class MxDiMuOp_DD_Long<string MN, bits<10> CMD, bit SIGNED = false> - : MxInst<(outs MxDRD32:$dst), (ins MxDRD32:$src, MxDRD32:$opd), MN#"\t$opd, $dst", []> { +class MxDiMuOp_DD_Long<string MN, SDNode NODE, bits<10> CMD, bit SIGNED = false> + : MxInst<(outs MxDRD32:$dst), (ins MxDRD32:$src, MxDRD32:$opd), MN#"\t$opd, $dst", + [(set i32:$dst, CCR, (NODE i32:$src, i32:$opd))]> { let Inst = (ascend (descend CMD, /*MODE*/0b000, /*REGISTER*/(operand "$opd", 3)), @@ -622,11 +623,9 @@ class MxDiMuOp_DI<string MN, bits<4> CMD, bit SIGNED = false, } // let Constraints } // Defs = [CCR] -multiclass MxDiMuOp<string MN, bits<4> CMD, bit isComm = 0> { - let isCommutable = isComm in { - def "S"#NAME#"d32d16" : MxDiMuOp_DD<MN#"s", CMD, /*SIGNED*/true, MxDRD32, MxDRD16>; - def "U"#NAME#"d32d16" : MxDiMuOp_DD<MN#"u", CMD, /*SIGNED*/false, MxDRD32, MxDRD16>; - } +multiclass MxDiMuOp<string MN, bits<4> CMD> { + def "S"#NAME#"d32d16" : MxDiMuOp_DD<MN#"s", CMD, /*SIGNED*/true, MxDRD32, MxDRD16>; + def "U"#NAME#"d32d16" : MxDiMuOp_DD<MN#"u", CMD, /*SIGNED*/false, MxDRD32, MxDRD16>; def "S"#NAME#"d32i16" : MxDiMuOp_DI<MN#"s", CMD, /*SIGNED*/true, MxDRD32, Mxi16imm>; def "U"#NAME#"d32i16" : MxDiMuOp_DI<MN#"u", CMD, /*SIGNED*/false, MxDRD32, Mxi16imm>; @@ -634,8 +633,8 @@ multiclass MxDiMuOp<string MN, bits<4> CMD, bit isComm = 0> { defm DIV : MxDiMuOp<"div", 0x8>; -def SDIVd32d32 : MxDiMuOp_DD_Long<"divs.l", 0x131, /*SIGNED*/true>; -def UDIVd32d32 : MxDiMuOp_DD_Long<"divu.l", 0x131, /*SIGNED*/false>; +def SDIVd32d32 : MxDiMuOp_DD_Long<"divs.l", sdiv, 0x131, /*SIGNED*/true>; +def UDIVd32d32 : MxDiMuOp_DD_Long<"divu.l", udiv, 0x131, /*SIGNED*/false>; // This is used to cast immediates to 16-bits for operations which don't // support smaller immediate sizes. @@ -685,60 +684,53 @@ def : Pat<(urem i16:$dst, i16:$opd), (LSR32di (LSR32di (UDIVd32d16 (MOVZXd32d16 $dst), $opd), 8), 8), MxSubRegIndex16Lo)>; - -// RR i32 -def : Pat<(sdiv i32:$dst, i32:$opd), (SDIVd32d32 $dst, $opd)>; - -def : Pat<(udiv i32:$dst, i32:$opd), (UDIVd32d32 $dst, $opd)>; - - // RI i8 -def : Pat<(sdiv i8:$dst, MximmSExt8:$opd), +def : Pat<(sdiv i8:$dst, Mxi8immSExt8:$opd), (EXTRACT_SUBREG (SDIVd32i16 (MOVSXd32d8 $dst), (as_i16imm $opd)), MxSubRegIndex8Lo)>; -def : Pat<(udiv i8:$dst, MximmSExt8:$opd), +def : Pat<(udiv i8:$dst, Mxi8immSExt8:$opd), (EXTRACT_SUBREG (UDIVd32i16 (MOVZXd32d8 $dst), (as_i16imm $opd)), MxSubRegIndex8Lo)>; -def : Pat<(srem i8:$dst, MximmSExt8:$opd), +def : Pat<(srem i8:$dst, Mxi8immSExt8:$opd), (EXTRACT_SUBREG (ASR32di (ASR32di (SDIVd32i16 (MOVSXd32d8 $dst), (as_i16imm $opd)), 8), 8), MxSubRegIndex8Lo)>; -def : Pat<(urem i8:$dst, MximmSExt8:$opd), +def : Pat<(urem i8:$dst, Mxi8immSExt8:$opd), (EXTRACT_SUBREG (LSR32di (LSR32di (UDIVd32i16 (MOVZXd32d8 $dst), (as_i16imm $opd)), 8), 8), MxSubRegIndex8Lo)>; // RI i16 -def : Pat<(sdiv i16:$dst, MximmSExt16:$opd), +def : Pat<(sdiv i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (SDIVd32i16 (MOVSXd32d16 $dst), imm:$opd), MxSubRegIndex16Lo)>; -def : Pat<(udiv i16:$dst, MximmSExt16:$opd), +def : Pat<(udiv i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (UDIVd32i16 (MOVZXd32d16 $dst), imm:$opd), MxSubRegIndex16Lo)>; -def : Pat<(srem i16:$dst, MximmSExt16:$opd), +def : Pat<(srem i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (ASR32di (ASR32di (SDIVd32i16 (MOVSXd32d16 $dst), imm:$opd), 8), 8), MxSubRegIndex16Lo)>; -def : Pat<(urem i16:$dst, MximmSExt16:$opd), +def : Pat<(urem i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (LSR32di (LSR32di (UDIVd32i16 (MOVZXd32d16 $dst), imm:$opd), 8), 8), MxSubRegIndex16Lo)>; -defm MUL : MxDiMuOp<"mul", 0xC, 1>; +defm MUL : MxDiMuOp<"mul", 0xC>; -def SMULd32d32 : MxDiMuOp_DD_Long<"muls.l", 0x130, /*SIGNED*/true>; -def UMULd32d32 : MxDiMuOp_DD_Long<"mulu.l", 0x130, /*SIGNED*/false>; +def SMULd32d32 : MxDiMuOp_DD_Long<"muls.l", MxSMul, 0x130, /*SIGNED*/true>; +def UMULd32d32 : MxDiMuOp_DD_Long<"mulu.l", MxUMul, 0x130, /*SIGNED*/false>; // RR def : Pat<(mul i16:$dst, i16:$opd), @@ -760,17 +752,17 @@ def : Pat<(mul i32:$dst, i32:$opd), (SMULd32d32 $dst, $opd)>; // RI -def : Pat<(mul i16:$dst, MximmSExt16:$opd), +def : Pat<(mul i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (SMULd32i16 (MOVXd32d16 $dst), imm:$opd), MxSubRegIndex16Lo)>; -def : Pat<(mulhs i16:$dst, MximmSExt16:$opd), +def : Pat<(mulhs i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (ASR32di (ASR32di (SMULd32i16 (MOVXd32d16 $dst), imm:$opd), 8), 8), MxSubRegIndex16Lo)>; -def : Pat<(mulhu i16:$dst, MximmSExt16:$opd), +def : Pat<(mulhu i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (LSR32di (LSR32di (UMULd32i16 (MOVXd32d16 $dst), imm:$opd), 8), 8), MxSubRegIndex16Lo)>; @@ -881,16 +873,16 @@ foreach N = ["add", "addc"] in { (ADD32df MxDRD32:$src, MxType32.FOp:$opd)>; // add reg, imm - def : Pat<(!cast<SDNode>(N) i8: $src, MximmSExt8:$opd), + def : Pat<(!cast<SDNode>(N) i8: $src, Mxi8immSExt8:$opd), (ADD8di MxDRD8 :$src, imm:$opd)>; - def : Pat<(!cast<SDNode>(N) i16:$src, MximmSExt16:$opd), + def : Pat<(!cast<SDNode>(N) i16:$src, Mxi16immSExt16:$opd), (ADD16di MxDRD16:$src, imm:$opd)>; // LEAp is more complex and thus will be selected over normal ADD32ri but it cannot // be used with data registers, here by adding complexity to a simple ADD32ri insts // we make sure it will be selected over LEAp let AddedComplexity = 15 in { - def : Pat<(!cast<SDNode>(N) i32:$src, MximmSExt32:$opd), + def : Pat<(!cast<SDNode>(N) i32:$src, Mxi32immSExt32:$opd), (ADD32di MxDRD32:$src, imm:$opd)>; } // AddedComplexity = 15 @@ -949,11 +941,11 @@ foreach N = ["sub", "subc"] in { (SUB32df MxDRD32:$src, MxType32.FOp:$opd)>; // sub reg, imm - def : Pat<(!cast<SDNode>(N) i8 :$src, MximmSExt8 :$opd), + def : Pat<(!cast<SDNode>(N) i8 :$src, Mxi8immSExt8 :$opd), (SUB8di MxDRD8 :$src, imm:$opd)>; - def : Pat<(!cast<SDNode>(N) i16:$src, MximmSExt16:$opd), + def : Pat<(!cast<SDNode>(N) i16:$src, Mxi16immSExt16:$opd), (SUB16di MxDRD16:$src, imm:$opd)>; - def : Pat<(!cast<SDNode>(N) i32:$src, MximmSExt32:$opd), + def : Pat<(!cast<SDNode>(N) i32:$src, Mxi32immSExt32:$opd), (SUB32di MxDRD32:$src, imm:$opd)>; // sub imm, (An) @@ -982,11 +974,11 @@ multiclass BitwisePat<string INST, SDNode OP> { def : Pat<(OP i32:$src, i32:$opd), (!cast<MxInst>(INST#"32dd") MxDRD32:$src, MxDRD32:$opd)>; // op reg, imm - def : Pat<(OP i8: $src, MximmSExt8 :$opd), + def : Pat<(OP i8: $src, Mxi8immSExt8 :$opd), (!cast<MxInst>(INST#"8di") MxDRD8 :$src, imm:$opd)>; - def : Pat<(OP i16:$src, MximmSExt16:$opd), + def : Pat<(OP i16:$src, Mxi16immSExt16:$opd), (!cast<MxInst>(INST#"16di") MxDRD16:$src, imm:$opd)>; - def : Pat<(OP i32:$src, MximmSExt32:$opd), + def : Pat<(OP i32:$src, Mxi32immSExt32:$opd), (!cast<MxInst>(INST#"32di") MxDRD32:$src, imm:$opd)>; } diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td index 624093661d19..fa7e7aa0ed46 100644 --- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td +++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td @@ -554,18 +554,21 @@ def: Pat<(MxSExtLoadi16i8 MxCP_ARID:$src), (EXTRACT_SUBREG (MOVSXd32p8 MxARID8:$src), MxSubRegIndex16Lo)>; def: Pat<(MxSExtLoadi16i8 MxCP_ARII:$src), (EXTRACT_SUBREG (MOVSXd32f8 MxARII8:$src), MxSubRegIndex16Lo)>; +def: Pat<(MxSExtLoadi16i8 MxCP_PCD:$src), (MOVSXd16q8 MxPCD8:$src)>; // i32 <- sext i8 def: Pat<(i32 (sext i8:$src)), (MOVSXd32d8 MxDRD8:$src)>; def: Pat<(MxSExtLoadi32i8 MxCP_ARI :$src), (MOVSXd32j8 MxARI8 :$src)>; def: Pat<(MxSExtLoadi32i8 MxCP_ARID:$src), (MOVSXd32p8 MxARID8:$src)>; def: Pat<(MxSExtLoadi32i8 MxCP_ARII:$src), (MOVSXd32f8 MxARII8:$src)>; +def: Pat<(MxSExtLoadi32i8 MxCP_PCD:$src), (MOVSXd32q8 MxPCD8:$src)>; // i32 <- sext i16 def: Pat<(i32 (sext i16:$src)), (MOVSXd32d16 MxDRD16:$src)>; def: Pat<(MxSExtLoadi32i16 MxCP_ARI :$src), (MOVSXd32j16 MxARI16 :$src)>; def: Pat<(MxSExtLoadi32i16 MxCP_ARID:$src), (MOVSXd32p16 MxARID16:$src)>; def: Pat<(MxSExtLoadi32i16 MxCP_ARII:$src), (MOVSXd32f16 MxARII16:$src)>; +def: Pat<(MxSExtLoadi32i16 MxCP_PCD:$src), (MOVSXd32q16 MxPCD16:$src)>; // i16 <- zext i8 def: Pat<(i16 (zext i8:$src)), diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrFormats.td index 38d3127ac6a6..99bac7a59939 100644 --- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrFormats.td @@ -17,22 +17,22 @@ /// 03 M68000 (An) j address register indirect /// 04 M68000 (An)+ o address register indirect with postincrement /// 05 M68000 -(An) e address register indirect with predecrement -/// 06 M68000 (i,An) p address register indirect with displacement -/// 10 M68000 (i,An,Xn.L) f address register indirect with index and scale = 1 -/// 07 M68000 (i,An,Xn.W) F address register indirect with index and scale = 1 -/// 12 M68020 (i,An,Xn.L,SCALE) g address register indirect with index -/// 11 M68020 (i,An,Xn.W,SCALE) G address register indirect with index +/// 06 M68000 (d16,An) p address register indirect with displacement +/// 10 M68000 (d8,An,Xn.L) f address register indirect with index and scale = 1 +/// 07 M68000 (d8,An,Xn.W) F address register indirect with index and scale = 1 +/// 12 M68020 (d8,An,Xn.L,SCALE) g address register indirect with index +/// 11 M68020 (d8,An,Xn.W,SCALE) G address register indirect with index /// 14 M68020 ([bd,An],Xn.L,SCALE,od) u memory indirect postindexed mode /// 13 M68020 ([bd,An],Xn.W,SCALE,od) U memory indirect postindexed mode /// 16 M68020 ([bd,An,Xn.L,SCALE],od) v memory indirect preindexed mode /// 15 M68020 ([bd,An,Xn.W,SCALE],od) V memory indirect preindexed mode /// 20 M68000 abs.L b absolute long address /// 17 M68000 abs.W B absolute short address -/// 21 M68000 (i,PC) q program counter with displacement -/// 23 M68000 (i,PC,Xn.L) k program counter with index and scale = 1 -/// 22 M68000 (i,PC,Xn.W) K program counter with index and scale = 1 -/// 25 M68020 (i,PC,Xn.L,SCALE) l program counter with index -/// 24 M68020 (i,PC,Xn.W,SCALE) L program counter with index +/// 21 M68000 (d16,PC) q program counter with displacement +/// 23 M68000 (d8,PC,Xn.L) k program counter with index and scale = 1 +/// 22 M68000 (d8,PC,Xn.W) K program counter with index and scale = 1 +/// 25 M68020 (d8,PC,Xn.L,SCALE) l program counter with index +/// 24 M68020 (d8,PC,Xn.W,SCALE) L program counter with index /// 27 M68020 ([bd,PC],Xn.L,SCALE,od) x program counter memory indirect postindexed mode /// 26 M68020 ([bd,PC],Xn.W,SCALE,od) X program counter memory indirect postindexed mode /// 31 M68020 ([bd,PC,Xn.L,SCALE],od) y program counter memory indirect preindexed mode diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td index dc66e103361a..84eb8e56da76 100644 --- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td @@ -55,15 +55,6 @@ def MxSDT_BiArithCCRInOut : SDTypeProfile<2, 3, [ /* CCR */ SDTCisSameAs<1, 4> ]>; -// RES1, RES2, CCR <- op LHS, RHS -def MxSDT_2BiArithCCROut : SDTypeProfile<3, 2, [ - /* RES 1 */ SDTCisInt<0>, - /* RES 2 */ SDTCisSameAs<0, 1>, - /* CCR */ SDTCisVT<1, i8>, - /* LHS */ SDTCisSameAs<0, 2>, - /* RHS */ SDTCisSameAs<0, 3> -]>; - def MxSDT_CmpTest : SDTypeProfile<1, 2, [ /* CCR */ SDTCisVT<0, i8>, /* Ops */ SDTCisSameAs<1, 2> @@ -134,7 +125,7 @@ def MxAddX : SDNode<"M68kISD::ADDX", MxSDT_BiArithCCRInOut>; def MxSubX : SDNode<"M68kISD::SUBX", MxSDT_BiArithCCRInOut>; def MxSMul : SDNode<"M68kISD::SMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>; -def MxUMul : SDNode<"M68kISD::UMUL", MxSDT_2BiArithCCROut, [SDNPCommutative]>; +def MxUMul : SDNode<"M68kISD::UMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>; def MxCmp : SDNode<"M68kISD::CMP", MxSDT_CmpTest>; def MxBtst : SDNode<"M68kISD::BTST", MxSDT_CmpTest>; @@ -522,9 +513,14 @@ def MxCP_PCI : ComplexPattern<iPTR, 2, "SelectPCI", // Pattern Fragments //===----------------------------------------------------------------------===// -def MximmSExt8 : PatLeaf<(i8 imm)>; -def MximmSExt16 : PatLeaf<(i16 imm)>; -def MximmSExt32 : PatLeaf<(i32 imm)>; +def Mxi8immSExt8 : PatLeaf<(i8 imm)>; +def MximmSExt8 : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>; + +def Mxi16immSExt16 : PatLeaf<(i16 imm)>; +def MximmSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; + +def Mxi32immSExt32 : PatLeaf<(i32 imm)>; +def MximmSExt32 : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>; // Used for Shifts and Rotations, since M68k immediates in these instructions // are 1 <= i <= 8. Generally, if immediate is bigger than 8 it will be moved @@ -717,7 +713,7 @@ foreach size = [8, 16, 32] in { // #imm def MxOp#size#AddrMode_i : MxImmOpBundle<size, !cast<MxOperand>("Mxi"#size#"imm"), - !cast<PatFrag>("MximmSExt"#size)>; + !cast<PatFrag>("Mxi"#size#"immSExt"#size)>; } // foreach size = [8, 16, 32] foreach size = [16, 32] in { @@ -747,7 +743,7 @@ class MxType8Class<string rLet, MxOperand reg> MxAL8, MxCP_AL, MxPCD8, MxCP_PCD, MxPCI8, MxCP_PCI, - Mxi8imm, MximmSExt8, + Mxi8imm, Mxi8immSExt8, Mxloadi8>; def MxType8 : MxType8Class<?,?>; @@ -762,7 +758,7 @@ class MxType16Class<string rLet, MxOperand reg> MxAL16, MxCP_AL, MxPCD16, MxCP_PCD, MxPCI16, MxCP_PCI, - Mxi16imm, MximmSExt16, + Mxi16imm, Mxi16immSExt16, Mxloadi16>; def MxType16 : MxType16Class<?,?>; @@ -777,7 +773,7 @@ class MxType32Class<string rLet, MxOperand reg> MxAL32, MxCP_AL, MxPCD32, MxCP_PCD, MxPCI32, MxCP_PCI, - Mxi32imm, MximmSExt32, + Mxi32imm, Mxi32immSExt32, Mxloadi32>; def MxType32 : MxType32Class<?,?>; diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index ee7762c296bf..d3b59138a5a9 100644 --- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -964,7 +964,7 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op, if (!isa<ConstantSDNode>(N->getOperand(1))) return Op; - uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + uint64_t ShiftAmount = N->getConstantOperandVal(1); // Expand the stuff into sequence of shifts. SDValue Victim = N->getOperand(0); @@ -1269,7 +1269,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDLoc dl(Op); EVT PtrVT = Op.getValueType(); @@ -1295,7 +1295,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::R4, VT); while (Depth--) diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td index ac679c4c01bc..c0e7eef8dd9d 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -164,20 +164,20 @@ def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>, GPR_64; /// Shift Instructions let AdditionalPredicates = [NotInMicroMips] in { - def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, shl, + def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, mshl_64, immZExt6>, SRA_FM<0x38, 0>, ISA_MIPS3; - def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, srl, + def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, msrl_64, immZExt6>, SRA_FM<0x3a, 0>, ISA_MIPS3; - def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, sra, + def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, msra_64, immZExt6>, SRA_FM<0x3b, 0>, ISA_MIPS3; - def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, shl>, + def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, mshl_64>, SRLV_FM<0x14, 0>, ISA_MIPS3; - def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, sra>, + def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, msra_64>, SRLV_FM<0x17, 0>, ISA_MIPS3; - def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, srl>, + def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, msrl_64>, SRLV_FM<0x16, 0>, ISA_MIPS3; def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd, II_DSLL32>, SRA_FM<0x3c, 0>, ISA_MIPS3; diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index 77ce8ba890a8..01b41f3b2159 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/CFG.h" @@ -31,6 +32,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -324,6 +326,24 @@ bool MipsDAGToDAGISel::SelectInlineAsmMemoryOperand( return true; } +bool MipsDAGToDAGISel::isUnneededShiftMask(SDNode *N, + unsigned ShAmtBits) const { + assert(N->getOpcode() == ISD::AND && "Unexpected opcode"); + + const APInt &RHS = N->getConstantOperandAPInt(1); + if (RHS.countr_one() >= ShAmtBits) { + LLVM_DEBUG( + dbgs() + << DEBUG_TYPE + << " Need optimize 'and & shl/srl/sra' and operand value bits is " + << RHS.countr_one() << "\n"); + return true; + } + + KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); + return (Known.Zero | RHS).countr_one() >= ShAmtBits; +} + char MipsDAGToDAGISel::ID = 0; INITIALIZE_PASS(MipsDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h index e41cb08712ca..52207d0f6284 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h @@ -143,6 +143,7 @@ private: bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector<SDValue> &OutOps) override; + bool isUnneededShiftMask(SDNode *N, unsigned ShAmtBits) const; }; } diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp index a0cab8024386..483eba4e4f47 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -2508,7 +2508,7 @@ SDValue MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const { SDValue MipsTargetLowering:: lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // check the depth - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) { + if (Op.getConstantOperandVal(0) != 0) { DAG.getContext()->emitError( "return address can be determined only for current frame"); return SDValue(); @@ -2529,7 +2529,7 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op, return SDValue(); // check the depth - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) { + if (Op.getConstantOperandVal(0) != 0) { DAG.getContext()->emitError( "return address can be determined only for current frame"); return SDValue(); diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrCompiler.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrCompiler.td new file mode 100644 index 000000000000..8ae3d71978b1 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrCompiler.td @@ -0,0 +1,33 @@ +//===- MipsInstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the various pseudo instructions used by the compiler, +// as well as Pat patterns used during instruction selection. +// +//===----------------------------------------------------------------------===// + + +def shiftMask_32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{ + return isUnneededShiftMask(N, 5); +}]>; + +def shiftMask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm), [{ + return isUnneededShiftMask(N, 6); +}]>; + +foreach width = [32, 64] in { +defvar shiftMask = !cast<SDPatternOperator>("shiftMask_"#width); +def mshl_#width : PatFrags<(ops node:$src0, node:$src1), + [(shl node:$src0, node:$src1), (shl node:$src0, (shiftMask node:$src1))]>; + +def msrl_#width : PatFrags<(ops node:$src0, node:$src1), + [(srl node:$src0, node:$src1), (srl node:$src0, (shiftMask node:$src1))]>; + +def msra_#width : PatFrags<(ops node:$src0, node:$src1), + [(sra node:$src0, node:$src1), (sra node:$src0, (shiftMask node:$src1))]>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td index 75270857ea13..4b6f4b22e71b 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// // Mips profiles and nodes //===----------------------------------------------------------------------===// +include "MipsInstrCompiler.td" def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, @@ -2079,17 +2080,17 @@ let AdditionalPredicates = [NotInMicroMips] in { let AdditionalPredicates = [NotInMicroMips] in { /// Shift Instructions - def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL, shl, + def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL, mshl_32, immZExt5>, SRA_FM<0, 0>, ISA_MIPS1; - def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, II_SRL, srl, + def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, II_SRL, msrl_32, immZExt5>, SRA_FM<2, 0>, ISA_MIPS1; - def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, II_SRA, sra, + def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, II_SRA, msra_32, immZExt5>, SRA_FM<3, 0>, ISA_MIPS1; - def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, II_SLLV, shl>, + def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, II_SLLV, mshl_32>, SRLV_FM<4, 0>, ISA_MIPS1; - def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, II_SRLV, srl>, + def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, II_SRLV, msrl_32>, SRLV_FM<6, 0>, ISA_MIPS1; - def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, II_SRAV, sra>, + def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, II_SRAV, msra_32>, SRLV_FM<7, 0>, ISA_MIPS1; // Rotate Instructions diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 8c865afd4207..0ed87ee0809a 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -831,8 +831,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } case ISD::INTRINSIC_W_CHAIN: { - const unsigned IntrinsicOpcode = - cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const unsigned IntrinsicOpcode = Node->getConstantOperandVal(1); switch (IntrinsicOpcode) { default: break; @@ -885,7 +884,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } case ISD::INTRINSIC_WO_CHAIN: { - switch (cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue()) { + switch (Node->getConstantOperandVal(0)) { default: break; @@ -901,8 +900,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } case ISD::INTRINSIC_VOID: { - const unsigned IntrinsicOpcode = - cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const unsigned IntrinsicOpcode = Node->getConstantOperandVal(1); switch (IntrinsicOpcode) { default: break; diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index 5c34067c8888..e9788fa7ed73 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1519,7 +1519,7 @@ static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); EVT ResTy = Op->getValueType(0); APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) - << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); + << Op->getConstantOperandAPInt(2); SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); @@ -1528,7 +1528,7 @@ static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue(); + unsigned Intrinsic = Op->getConstantOperandVal(0); switch (Intrinsic) { default: return SDValue(); @@ -2300,7 +2300,7 @@ static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { - unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); + unsigned Intr = Op->getConstantOperandVal(1); switch (Intr) { default: return SDValue(); @@ -2375,7 +2375,7 @@ static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { - unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); + unsigned Intr = Op->getConstantOperandVal(1); switch (Intr) { default: return SDValue(); diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 894a8636f458..815c46edb6fa 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -513,7 +513,7 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) { } bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { - unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(1); switch (IID) { default: return false; @@ -730,7 +730,7 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, } bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); switch (IID) { default: return false; @@ -1246,7 +1246,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { Op1 = N->getOperand(2); Mem = cast<MemIntrinsicSDNode>(N); - unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(1); switch (IID) { default: return false; diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b57d185bb638..ed96339240d9 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4902,8 +4902,7 @@ bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) { return false; if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN || - cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() != - Intrinsic::loop_decrement) + LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement) return false; if (!isa<ConstantSDNode>(RHS)) @@ -6011,7 +6010,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // Op #3 is the Dest MBB // Op #4 is the Flag. // Prevent PPC::PRED_* from being selected into LI. - unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned PCC = N->getConstantOperandVal(1); if (EnableBranchHint) PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3)); diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 385b3b74c34d..8f27e6677afa 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2817,8 +2817,8 @@ bool PPCTargetLowering::SelectAddressRegImm( return true; // [r+i] } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { // Match LOAD (ADD (X, Lo(G))). - assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() - && "Cannot handle constant offsets yet!"); + assert(!N.getOperand(1).getConstantOperandVal(1) && + "Cannot handle constant offsets yet!"); Disp = N.getOperand(1).getOperand(0); // The global address. assert(Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || @@ -3824,8 +3824,7 @@ SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { // Check all operands that may contain the LR. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - const InlineAsm::Flag Flags( - cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue()); + const InlineAsm::Flag Flags(Op.getConstantOperandVal(i)); unsigned NumVals = Flags.getNumOperandRegisters(); ++i; // Skip the ID value. @@ -10442,8 +10441,7 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG, /// information about the intrinsic. static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget) { - unsigned IntrinsicID = - cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Intrin.getConstantOperandVal(0); CompareOpc = -1; isDot = false; switch (IntrinsicID) { @@ -10728,8 +10726,7 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { - unsigned IntrinsicID = - cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(0); SDLoc dl(Op); @@ -10947,7 +10944,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // Unpack the result based on how the target uses it. unsigned BitNo; // Bit # of CR6. bool InvertBit; // Invert result? - switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { + switch (Op.getConstantOperandVal(1)) { default: // Can't happen, don't crash on invalid number though. case 0: // Return the value of the EQ bit of CR6. BitNo = 0; InvertBit = false; @@ -10983,7 +10980,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, // the beginning of the argument list. int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1; SDLoc DL(Op); - switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) { + switch (Op.getConstantOperandVal(ArgStart)) { case Intrinsic::ppc_cfence: { assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); SDValue Val = Op.getOperand(ArgStart + 1); @@ -11548,7 +11545,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { return SDValue(); // Custom lower is only done for high or low doubleword. - int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue(); + int Idx = Op0.getConstantOperandVal(1); if (Idx % 2 != 0) return SDValue(); @@ -11717,8 +11714,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, break; } case ISD::INTRINSIC_W_CHAIN: { - if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != - Intrinsic::loop_decrement) + if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement) break; assert(N->getValueType(0) == MVT::i1 && @@ -11734,7 +11730,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, break; } case ISD::INTRINSIC_WO_CHAIN: { - switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) { + switch (N->getConstantOperandVal(0)) { case Intrinsic::ppc_pack_longdouble: Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, N->getOperand(2), N->getOperand(1))); @@ -13654,7 +13650,7 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { EVT VT; - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { default: return false; case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: @@ -13682,7 +13678,7 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, if (N->getOpcode() == ISD::INTRINSIC_VOID) { EVT VT; - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { default: return false; case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: @@ -15546,8 +15542,7 @@ SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN, } static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) { - unsigned IntrinsicID = - cast<ConstantSDNode>(Intrin.getOperand(1))->getZExtValue(); + unsigned IntrinsicID = Intrin.getConstantOperandVal(1); if (IntrinsicID == Intrinsic::ppc_stdcx) StoreWidth = 8; else if (IntrinsicID == Intrinsic::ppc_stwcx) @@ -15979,7 +15974,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::INTRINSIC_WO_CHAIN: { bool isLittleEndian = Subtarget.isLittleEndian(); - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) { @@ -15992,36 +15987,34 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, .zext(Add.getScalarValueSizeInBits()))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode *U : BasePtr->uses()) { - if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) { - // We've found another LVSL/LVSR, and this address is an aligned - // multiple of that one. The results will be the same, so use the - // one we've just found instead. + if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN && + U->getConstantOperandVal(0) == IID) { + // We've found another LVSL/LVSR, and this address is an aligned + // multiple of that one. The results will be the same, so use the + // one we've just found instead. - return SDValue(U, 0); - } + return SDValue(U, 0); + } } } if (isa<ConstantSDNode>(Add->getOperand(1))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode *U : BasePtr->uses()) { - if (U->getOpcode() == ISD::ADD && - isa<ConstantSDNode>(U->getOperand(1)) && - (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - - cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) % - (1ULL << Bits) == - 0) { - SDNode *OtherAdd = U; - for (SDNode *V : OtherAdd->uses()) { - if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() == - IID) { - return SDValue(V, 0); - } + if (U->getOpcode() == ISD::ADD && + isa<ConstantSDNode>(U->getOperand(1)) && + (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) % + (1ULL << Bits) == + 0) { + SDNode *OtherAdd = U; + for (SDNode *V : OtherAdd->uses()) { + if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN && + V->getConstantOperandVal(0) == IID) { + return SDValue(V, 0); } } } + } } } @@ -16061,30 +16054,30 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::INTRINSIC_W_CHAIN: - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { - default: - break; - case Intrinsic::ppc_altivec_vsum4sbs: - case Intrinsic::ppc_altivec_vsum4shs: - case Intrinsic::ppc_altivec_vsum4ubs: { - // These sum-across intrinsics only have a chain due to the side effect - // that they may set the SAT bit. If we know the SAT bit will not be set - // for some inputs, we can replace any uses of their chain with the input - // chain. - if (BuildVectorSDNode *BVN = - dyn_cast<BuildVectorSDNode>(N->getOperand(3))) { - APInt APSplatBits, APSplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - bool BVNIsConstantSplat = BVN->isConstantSplat( - APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, - !Subtarget.isLittleEndian()); - // If the constant splat vector is 0, the SAT bit will not be set. - if (BVNIsConstantSplat && APSplatBits == 0) - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0)); + switch (N->getConstantOperandVal(1)) { + default: + break; + case Intrinsic::ppc_altivec_vsum4sbs: + case Intrinsic::ppc_altivec_vsum4shs: + case Intrinsic::ppc_altivec_vsum4ubs: { + // These sum-across intrinsics only have a chain due to the side effect + // that they may set the SAT bit. If we know the SAT bit will not be set + // for some inputs, we can replace any uses of their chain with the + // input chain. + if (BuildVectorSDNode *BVN = + dyn_cast<BuildVectorSDNode>(N->getOperand(3))) { + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool BVNIsConstantSplat = BVN->isConstantSplat( + APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, + !Subtarget.isLittleEndian()); + // If the constant splat vector is 0, the SAT bit will not be set. + if (BVNIsConstantSplat && APSplatBits == 0) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0)); + } + return SDValue(); } - return SDValue(); - } case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x: // For little endian, VSX loads require generating lxvd2x/xxswapd. @@ -16098,7 +16091,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // For little endian, VSX stores require generating xxswapd/stxvd2x. // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. if (Subtarget.needsSwapsForVSXMemOps()) { - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { default: break; case Intrinsic::ppc_vsx_stxvw4x: @@ -16327,7 +16320,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Unpack the result based on how the target uses it. PPC::Predicate CompOpc; - switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { + switch (LHS.getConstantOperandVal(1)) { default: // Can't happen, don't crash on invalid number though. case 0: // Branch on the value of the EQ bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; @@ -16406,7 +16399,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; } case ISD::INTRINSIC_WO_CHAIN: { - switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { + switch (Op.getConstantOperandVal(0)) { default: break; case Intrinsic::ppc_altivec_vcmpbfp_p: case Intrinsic::ppc_altivec_vcmpeqfp_p: @@ -16433,7 +16426,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; } case ISD::INTRINSIC_W_CHAIN: { - switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { + switch (Op.getConstantOperandVal(1)) { default: break; case Intrinsic::ppc_load2r: @@ -16868,7 +16861,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, return SDValue(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); // Make sure the function does not optimize away the store of the RA to // the stack. @@ -16901,7 +16894,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -18086,8 +18079,7 @@ static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates. else FlagSet |= PPC::MOF_RPlusR; // Register. - } else if (RHS.getOpcode() == PPCISD::Lo && - !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue()) + } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1)) FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo. else FlagSet |= PPC::MOF_RPlusR; @@ -18131,7 +18123,7 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N, unsigned ParentOp = Parent->getOpcode(); if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) || (ParentOp == ISD::INTRINSIC_VOID))) { - unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue(); + unsigned ID = Parent->getConstantOperandVal(1); if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) { SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) ? Parent->getOperand(2) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index f3ea0f597eec..4759aa951664 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1832,57 +1832,18 @@ ParseStatus RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) { if (getParser().parseIdentifier(Identifier)) return ParseStatus::Failure; - // Check for CSR names conflicts. - // Custom CSR names might conflict with CSR names in privileged spec. - // E.g. - SiFive mnscratch(0x350) and privileged spec mnscratch(0x740). - auto CheckCSRNameConflict = [&]() { - if (!(RISCVSysReg::lookupSysRegByName(Identifier))) { - Error(S, "system register use requires an option to be enabled"); - return true; - } - return false; - }; - - // First check for vendor specific CSRs. - auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByName(Identifier); - if (SiFiveReg) { - if (SiFiveReg->haveVendorRequiredFeatures(getSTI().getFeatureBits())) { - Operands.push_back( - RISCVOperand::createSysReg(Identifier, S, SiFiveReg->Encoding)); - return ParseStatus::Success; - } - if (CheckCSRNameConflict()) - return ParseStatus::Failure; - } - auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier); if (!SysReg) + SysReg = RISCVSysReg::lookupSysRegByAltName(Identifier); + if (!SysReg) if ((SysReg = RISCVSysReg::lookupSysRegByDeprecatedName(Identifier))) Warning(S, "'" + Identifier + "' is a deprecated alias for '" + SysReg->Name + "'"); - // Check for CSR encoding conflicts. - // Custom CSR encoding might conflict with CSR encoding in privileged spec. - // E.g. - SiFive mnscratch(0x350) and privileged spec miselect(0x350). - auto CheckCSREncodingConflict = [&]() { - auto Reg = RISCVSysReg::lookupSiFiveRegByEncoding(SysReg->Encoding); - if (Reg && Reg->haveVendorRequiredFeatures(getSTI().getFeatureBits())) { - Warning(S, "'" + Identifier + "' CSR is not available on the current " + - "subtarget. Instead '" + Reg->Name + - "' CSR will be used."); - Operands.push_back( - RISCVOperand::createSysReg(Reg->Name, S, Reg->Encoding)); - return true; - } - return false; - }; - - // Accept a named SysReg if the required features are present. + // Accept a named Sys Reg if the required features are present. if (SysReg) { if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits())) return Error(S, "system register use requires an option to be enabled"); - if (CheckCSREncodingConflict()) - return ParseStatus::Success; Operands.push_back( RISCVOperand::createSysReg(Identifier, S, SysReg->Encoding)); return ParseStatus::Success; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 53e2b6b4d94e..ed80da14c795 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -74,6 +74,17 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + MCRegister Reg = RISCV::X0 + RegNo; + if (Reg != RISCV::X1 && Reg != RISCV::X5) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { @@ -359,6 +370,10 @@ static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address, static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder); +static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, + uint64_t Address, + const MCDisassembler *Decoder); + #include "RISCVGenDisassemblerTables.inc" static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn, @@ -373,6 +388,16 @@ static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn, return MCDisassembler::Success; } +static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, + uint64_t Address, + const MCDisassembler *Decoder) { + uint32_t Rs1 = fieldFromInstruction(Insn, 7, 5); + DecodeStatus Result = DecodeGPRX1X5RegisterClass(Inst, Rs1, Address, Decoder); + (void)Result; + assert(Result == MCDisassembler::Success && "Invalid register"); + return MCDisassembler::Success; +} + static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, uint32_t Insn, uint64_t Address, const MCDisassembler *Decoder) { @@ -462,10 +487,8 @@ static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address, return MCDisassembler::Success; } -// spimm is based on rlist now. static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { - // TODO: check if spimm matches rlist Inst.addOperand(MCOperand::createImm(Imm)); return MCDisassembler::Success; } @@ -568,8 +591,6 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, TRY_TO_DECODE_FEATURE( RISCV::FeatureVendorXSfvfnrclipxfqf, DecoderTableXSfvfnrclipxfqf32, "SiFive FP32-to-int8 Ranged Clip Instructions opcode table"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfcie, DecoderTableXSfcie32, - "Sifive CIE custom opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbitmanip, DecoderTableXCVbitmanip32, "CORE-V Bit Manipulation custom opcode table"); @@ -600,6 +621,8 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, TRY_TO_DECODE_AND_ADD_SP(!STI.hasFeature(RISCV::Feature64Bit), DecoderTableRISCV32Only_16, "RISCV32Only_16 table (16-bit Instruction)"); + TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZicfiss, DecoderTableZicfiss16, + "RVZicfiss table (Shadow Stack)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZcmt, DecoderTableRVZcmt16, "Zcmt table (16-bit Table Jump Instructions)"); TRY_TO_DECODE_FEATURE( diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 28ec999157c6..079906d1958c 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -101,7 +101,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower(); auto &RotateActions = getActionDefinitionsBuilder({G_ROTL, G_ROTR}); - if (ST.hasStdExtZbb()) { + if (ST.hasStdExtZbb() || ST.hasStdExtZbkb()) { RotateActions.legalFor({{s32, sXLen}, {sXLen, sXLen}}); // Widen s32 rotate amount to s64 so SDAG patterns will match. if (ST.is64Bit()) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 66a46a485f53..74d0db545e55 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -27,7 +27,6 @@ extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures]; namespace RISCVSysReg { #define GET_SysRegsList_IMPL -#define GET_SiFiveRegsList_IMPL #include "RISCVGenSearchableTables.inc" } // namespace RISCVSysReg diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 30ed36525e29..c32210fc1419 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -401,6 +401,7 @@ int getLoadFPImm(APFloat FPImm); namespace RISCVSysReg { struct SysReg { const char *Name; + const char *AltName; const char *DeprecatedName; unsigned Encoding; // FIXME: add these additional fields when needed. @@ -424,22 +425,9 @@ struct SysReg { return true; return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; } - - bool haveVendorRequiredFeatures(const FeatureBitset &ActiveFeatures) const { - // Not in 32-bit mode. - if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit]) - return false; - // No required feature associated with the system register. - if (FeaturesRequired.none()) - return false; - return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; - } }; -struct SiFiveReg : SysReg {}; - #define GET_SysRegsList_DECL -#define GET_SiFiveRegsList_DECL #include "RISCVGenSearchableTables.inc" } // end namespace RISCVSysReg diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 195dda0b8b14..bd899495812f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -121,11 +121,8 @@ void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); - auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByEncoding(Imm); auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm); - if (SiFiveReg && SiFiveReg->haveVendorRequiredFeatures(STI.getFeatureBits())) - markup(O, Markup::Register) << SiFiveReg->Name; - else if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits())) + if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits())) markup(O, Markup::Register) << SysReg->Name; else markup(O, Markup::Register) << formatImm(Imm); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallingConv.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallingConv.td index 130a6ecc143d..3dd0b3723828 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -14,7 +14,7 @@ // RISCVISelLowering.cpp (CC_RISCV). def CSR_ILP32_LP64 - : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; + : CalleeSavedRegs<(add X1, X8, X9, (sequence "X%u", 18, 27))>; def CSR_ILP32F_LP64F : CalleeSavedRegs<(add CSR_ILP32_LP64, @@ -29,7 +29,7 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>; // Interrupt handler needs to save/restore all registers that are used, // both Caller and Callee saved registers. -def CSR_Interrupt : CalleeSavedRegs<(add X1, (sequence "X%u", 3, 31))>; +def CSR_Interrupt : CalleeSavedRegs<(add X1, (sequence "X%u", 5, 31))>; // Same as CSR_Interrupt, but including all 32-bit FP registers. def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td index a66dd135ae5f..59b202606dad 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -687,6 +687,28 @@ def HasStdExtZicond : Predicate<"Subtarget->hasStdExtZicond()">, AssemblerPredicate<(all_of FeatureStdExtZicond), "'Zicond' (Integer Conditional Operations)">; +def FeatureStdExtZimop : SubtargetFeature<"experimental-zimop", "HasStdExtZimop", "true", + "'Zimop' (May-Be-Operations)">; +def HasStdExtZimop : Predicate<"Subtarget->hasStdExtZimop()">, + AssemblerPredicate<(all_of FeatureStdExtZimop), + "'Zimop' (May-Be-Operations)">; + +def FeatureStdExtZcmop : SubtargetFeature<"experimental-zcmop", "HasStdExtZcmop", "true", + "'Zcmop' (Compressed May-Be-Operations)", + [FeatureStdExtZca]>; +def HasStdExtZcmop : Predicate<"Subtarget->hasStdExtZcmop()">, + AssemblerPredicate<(all_of FeatureStdExtZcmop), + "'Zcmop' (Compressed May-Be-Operations)">; + +def FeatureStdExtZicfiss + : SubtargetFeature<"experimental-zicfiss", "HasStdExtZicfiss", "true", + "'Zicfiss' (Shadow stack)", + [FeatureStdExtZicsr, FeatureStdExtZimop]>; +def HasStdExtZicfiss : Predicate<"Subtarget->hasStdExtZicfiss()">, + AssemblerPredicate<(all_of FeatureStdExtZicfiss), + "'Zicfiss' (Shadow stack)">; +def NoHasStdExtZicfiss : Predicate<"!Subtarget->hasStdExtZicfiss()">; + def FeatureStdExtSmaia : SubtargetFeature<"smaia", "HasStdExtSmaia", "true", "'Smaia' (Smaia encompasses all added CSRs and all " @@ -813,13 +835,6 @@ def HasVendorXSfvcp : Predicate<"Subtarget->hasVendorXSfvcp()">, AssemblerPredicate<(all_of FeatureVendorXSfvcp), "'XSfvcp' (SiFive Custom Vector Coprocessor Interface Instructions)">; -def FeatureVendorXSfcie - : SubtargetFeature<"xsfcie", "HasVendorXSfcie", "true", - "'XSfcie' (SiFive Custom Instruction Extension SCIE.)">; -def HasVendorXSfcie : Predicate<"Subtarget->hasVendorXSfcie()">, - AssemblerPredicate<(all_of FeatureVendorXSfcie), - "'XSfcie' (SiFive Custom Instruction Extension SCIE.)">; - def FeatureVendorXSfvqmaccdod : SubtargetFeature<"xsfvqmaccdod", "HasVendorXSfvqmaccdod", "true", "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))", diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 098a320c9153..bfa3bf3cc74e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1360,7 +1360,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) break; - uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t C2 = N0.getConstantOperandVal(1); // Constant should be a mask. if (!isMask_64(C2)) @@ -1604,7 +1604,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; } case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(1); switch (IntNo) { // By default we do not custom select any intrinsic. default: @@ -1825,7 +1825,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; } case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(1); switch (IntNo) { case Intrinsic::riscv_vsseg2: case Intrinsic::riscv_vsseg3: diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c2508a158837..03a59f8a8b57 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1374,8 +1374,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, - ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, - ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); + ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, + ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); if (Subtarget.is64Bit()) setTargetDAGCombine(ISD::SRA); @@ -7235,7 +7235,7 @@ SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); while (Depth--) { int Offset = -(XLenInBytes * 2); SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, @@ -7260,7 +7260,7 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); if (Depth) { int Off = -XLenInBytes; SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); @@ -11731,7 +11731,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, break; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(0); switch (IntNo) { default: llvm_unreachable( @@ -12850,9 +12850,9 @@ struct CombineResult; /// Helper class for folding sign/zero extensions. /// In particular, this class is used for the following combines: -/// add_vl -> vwadd(u) | vwadd(u)_w -/// sub_vl -> vwsub(u) | vwsub(u)_w -/// mul_vl -> vwmul(u) | vwmul_su +/// add | add_vl -> vwadd(u) | vwadd(u)_w +/// sub | sub_vl -> vwsub(u) | vwsub(u)_w +/// mul | mul_vl -> vwmul(u) | vwmul_su /// /// An object of this class represents an operand of the operation we want to /// combine. @@ -12897,6 +12897,8 @@ struct NodeExtensionHelper { /// E.g., for zext(a), this would return a. SDValue getSource() const { switch (OrigOperand.getOpcode()) { + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return OrigOperand.getOperand(0); @@ -12913,7 +12915,8 @@ struct NodeExtensionHelper { /// Get or create a value that can feed \p Root with the given extension \p /// SExt. If \p SExt is std::nullopt, this returns the source of this operand. /// \see ::getSource(). - SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG, + SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, std::optional<bool> SExt) const { if (!SExt.has_value()) return OrigOperand; @@ -12928,8 +12931,10 @@ struct NodeExtensionHelper { // If we need an extension, we should be changing the type. SDLoc DL(Root); - auto [Mask, VL] = getMaskAndVL(Root); + auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); switch (OrigOperand.getOpcode()) { + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); @@ -12969,12 +12974,15 @@ struct NodeExtensionHelper { /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { + case ISD::ADD: case RISCVISD::ADD_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; + case ISD::MUL: case RISCVISD::MUL_VL: return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; + case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -12987,7 +12995,8 @@ struct NodeExtensionHelper { /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> /// newOpcode(a, b). static unsigned getSUOpcode(unsigned Opcode) { - assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL"); + assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && + "SU is only supported for MUL"); return RISCVISD::VWMULSU_VL; } @@ -12995,8 +13004,10 @@ struct NodeExtensionHelper { /// newOpcode(a, b). static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { + case ISD::ADD: case RISCVISD::ADD_VL: return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; + case ISD::SUB: case RISCVISD::SUB_VL: return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; default: @@ -13006,19 +13017,33 @@ struct NodeExtensionHelper { using CombineToTry = std::function<std::optional<CombineResult>( SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, - const NodeExtensionHelper & /*RHS*/)>; + const NodeExtensionHelper & /*RHS*/, SelectionDAG &, + const RISCVSubtarget &)>; /// Check if this node needs to be fully folded or extended for all users. bool needToPromoteOtherUsers() const { return EnforceOneUse; } /// Helper method to set the various fields of this struct based on the /// type of \p Root. - void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) { + void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { SupportsZExt = false; SupportsSExt = false; EnforceOneUse = true; CheckMask = true; - switch (OrigOperand.getOpcode()) { + unsigned Opc = OrigOperand.getOpcode(); + switch (Opc) { + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: { + if (OrigOperand.getValueType().isVector()) { + SupportsZExt = Opc == ISD::ZERO_EXTEND; + SupportsSExt = Opc == ISD::SIGN_EXTEND; + SDLoc DL(Root); + MVT VT = Root->getSimpleValueType(0); + std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); + } + break; + } case RISCVISD::VZEXT_VL: SupportsZExt = true; Mask = OrigOperand.getOperand(1); @@ -13074,8 +13099,16 @@ struct NodeExtensionHelper { } /// Check if \p Root supports any extension folding combines. - static bool isSupportedRoot(const SDNode *Root) { + static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) { switch (Root->getOpcode()) { + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(Root->getValueType(0))) + return false; + return Root->getValueType(0).isScalableVector(); + } case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: @@ -13090,9 +13123,10 @@ struct NodeExtensionHelper { } /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). - NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) { - assert(isSupportedRoot(Root) && "Trying to build an helper with an " - "unsupported root"); + NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an " + "unsupported root"); assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); OrigOperand = Root->getOperand(OperandIdx); @@ -13108,7 +13142,7 @@ struct NodeExtensionHelper { SupportsZExt = Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; SupportsSExt = !SupportsZExt; - std::tie(Mask, VL) = getMaskAndVL(Root); + std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget); CheckMask = true; // There's no existing extension here, so we don't have to worry about // making sure it gets removed. @@ -13117,7 +13151,7 @@ struct NodeExtensionHelper { } [[fallthrough]]; default: - fillUpExtensionSupport(Root, DAG); + fillUpExtensionSupport(Root, DAG, Subtarget); break; } } @@ -13133,14 +13167,27 @@ struct NodeExtensionHelper { } /// Helper function to get the Mask and VL from \p Root. - static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) { - assert(isSupportedRoot(Root) && "Unexpected root"); - return std::make_pair(Root->getOperand(3), Root->getOperand(4)); + static std::pair<SDValue, SDValue> + getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(isSupportedRoot(Root, DAG) && "Unexpected root"); + switch (Root->getOpcode()) { + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: { + SDLoc DL(Root); + MVT VT = Root->getSimpleValueType(0); + return getDefaultScalableVLOps(VT, DL, DAG, Subtarget); + } + default: + return std::make_pair(Root->getOperand(3), Root->getOperand(4)); + } } /// Check if the Mask and VL of this operand are compatible with \p Root. - bool areVLAndMaskCompatible(const SDNode *Root) const { - auto [Mask, VL] = getMaskAndVL(Root); + bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) const { + auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); return isMaskCompatible(Mask) && isVLCompatible(VL); } @@ -13148,11 +13195,14 @@ struct NodeExtensionHelper { /// foldings that are supported by this class. static bool isCommutative(const SDNode *N) { switch (N->getOpcode()) { + case ISD::ADD: + case ISD::MUL: case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return true; + case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -13197,14 +13247,25 @@ struct CombineResult { /// Return a value that uses TargetOpcode and that can be used to replace /// Root. /// The actual replacement is *not* done in that method. - SDValue materialize(SelectionDAG &DAG) const { + SDValue materialize(SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) const { SDValue Mask, VL, Merge; - std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root); - Merge = Root->getOperand(2); + std::tie(Mask, VL) = + NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); + switch (Root->getOpcode()) { + default: + Merge = Root->getOperand(2); + break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + Merge = DAG.getUNDEF(Root->getValueType(0)); + break; + } return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), - LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS), - RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge, - Mask, VL); + LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS), + RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS), + Merge, Mask, VL); } }; @@ -13221,15 +13282,16 @@ struct CombineResult { static std::optional<CombineResult> canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, const NodeExtensionHelper &RHS, bool AllowSExt, - bool AllowZExt) { + bool AllowZExt, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); - if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) + if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || + !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) return std::nullopt; if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/false), - Root, LHS, /*SExtLHS=*/false, RHS, - /*SExtRHS=*/false); + Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false); if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/true), @@ -13246,9 +13308,10 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/true); + /*AllowZExt=*/true, DAG, Subtarget); } /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) @@ -13257,8 +13320,9 @@ canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { - if (!RHS.areVLAndMaskCompatible(Root)) + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) return std::nullopt; // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar @@ -13282,9 +13346,10 @@ canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/false); + /*AllowZExt=*/false, DAG, Subtarget); } /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) @@ -13293,9 +13358,10 @@ canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, - /*AllowZExt=*/true); + /*AllowZExt=*/true, DAG, Subtarget); } /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) @@ -13304,10 +13370,13 @@ canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (!LHS.SupportsSExt || !RHS.SupportsZExt) return std::nullopt; - if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) + if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || + !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) return std::nullopt; return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); @@ -13317,6 +13386,8 @@ SmallVector<NodeExtensionHelper::CombineToTry> NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { SmallVector<CombineToTry> Strategies; switch (Root->getOpcode()) { + case ISD::ADD: + case ISD::SUB: case RISCVISD::ADD_VL: case RISCVISD::SUB_VL: // add|sub -> vwadd(u)|vwsub(u) @@ -13324,6 +13395,7 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { // add|sub -> vwadd(u)_w|vwsub(u)_w Strategies.push_back(canFoldToVW_W); break; + case ISD::MUL: case RISCVISD::MUL_VL: // mul -> vwmul(u) Strategies.push_back(canFoldToVWWithSameExtension); @@ -13354,12 +13426,14 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { /// mul_vl -> vwmul(u) | vwmul_su /// vwadd_w(u) -> vwadd(u) /// vwub_w(u) -> vwadd(u) -static SDValue -combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { +static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; - assert(NodeExtensionHelper::isSupportedRoot(N) && - "Shouldn't have called this method"); + if (!NodeExtensionHelper::isSupportedRoot(N, DAG)) + return SDValue(); + SmallVector<SDNode *> Worklist; SmallSet<SDNode *, 8> Inserted; Worklist.push_back(N); @@ -13368,11 +13442,11 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { while (!Worklist.empty()) { SDNode *Root = Worklist.pop_back_val(); - if (!NodeExtensionHelper::isSupportedRoot(Root)) + if (!NodeExtensionHelper::isSupportedRoot(Root, DAG)) return SDValue(); - NodeExtensionHelper LHS(N, 0, DAG); - NodeExtensionHelper RHS(N, 1, DAG); + NodeExtensionHelper LHS(N, 0, DAG, Subtarget); + NodeExtensionHelper RHS(N, 1, DAG, Subtarget); auto AppendUsersIfNeeded = [&Worklist, &Inserted](const NodeExtensionHelper &Op) { if (Op.needToPromoteOtherUsers()) { @@ -13399,7 +13473,8 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (NodeExtensionHelper::CombineToTry FoldingStrategy : FoldingStrategies) { - std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS); + std::optional<CombineResult> Res = + FoldingStrategy(N, LHS, RHS, DAG, Subtarget); if (Res) { Matched = true; CombinesToApply.push_back(*Res); @@ -13428,7 +13503,7 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; ValuesToReplace.reserve(CombinesToApply.size()); for (CombineResult Res : CombinesToApply) { - SDValue NewValue = Res.materialize(DAG); + SDValue NewValue = Res.materialize(DAG, Subtarget); if (!InputRootReplacement) { assert(Res.Root == N && "First element is expected to be the current node"); @@ -14078,7 +14153,7 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, for (SDNode *U : N0->uses()) { if (U->getOpcode() != ISD::SRA || !isa<ConstantSDNode>(U->getOperand(1)) || - cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() > 32) + U->getConstantOperandVal(1) > 32) return SDValue(); } @@ -14700,13 +14775,20 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - assert(N->getOpcode() == RISCVISD::ADD_VL); + + assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); + + if (N->getValueType(0).isFixedLengthVector()) + return SDValue(); + SDValue Addend = N->getOperand(0); SDValue MulOp = N->getOperand(1); - SDValue AddMergeOp = N->getOperand(2); - if (!AddMergeOp.isUndef()) - return SDValue(); + if (N->getOpcode() == RISCVISD::ADD_VL) { + SDValue AddMergeOp = N->getOperand(2); + if (!AddMergeOp.isUndef()) + return SDValue(); + } auto IsVWMulOpc = [](unsigned Opc) { switch (Opc) { @@ -14730,8 +14812,16 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, if (!MulMergeOp.isUndef()) return SDValue(); - SDValue AddMask = N->getOperand(3); - SDValue AddVL = N->getOperand(4); + auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (N->getOpcode() == ISD::ADD) { + SDLoc DL(N); + return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG, + Subtarget); + } + return std::make_pair(N->getOperand(3), N->getOperand(4)); + }(N, DAG, Subtarget); + SDValue MulMask = MulOp.getOperand(3); SDValue MulVL = MulOp.getOperand(4); @@ -14997,10 +15087,18 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(ISD::AND, DL, VT, NewFMV, DAG.getConstant(~SignBit, DL, VT)); } - case ISD::ADD: + case ISD::ADD: { + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; + if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) + return V; return performADDCombine(N, DAG, Subtarget); - case ISD::SUB: + } + case ISD::SUB: { + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; return performSUBCombine(N, DAG, Subtarget); + } case ISD::AND: return performANDCombine(N, DCI, Subtarget); case ISD::OR: @@ -15008,6 +15106,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return performXORCombine(N, DAG, Subtarget); case ISD::MUL: + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; return performMULCombine(N, DAG); case ISD::FADD: case ISD::UMAX: @@ -15484,7 +15584,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } case RISCVISD::ADD_VL: - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI)) + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) return V; return combineToVWMACC(N, DAG, Subtarget); case RISCVISD::SUB_VL: @@ -15493,7 +15593,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: case RISCVISD::MUL_VL: - return combineBinOp_VLToVWBinOp_VL(N, DCI); + return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); case RISCVISD::VFMADD_VL: case RISCVISD::VFNMADD_VL: case RISCVISD::VFMSUB_VL: diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp index de2227f82192..e487cc8b2e20 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp @@ -198,13 +198,23 @@ char RISCVInsertWriteVXRM::ID = 0; INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME, false, false) +static bool ignoresVXRM(const MachineInstr &MI) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { + default: + return false; + case RISCV::VNCLIP_WI: + case RISCV::VNCLIPU_WI: + return MI.getOperand(3).getImm() == 0; + } +} + bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) { BlockData &BBInfo = BlockInfo[MBB.getNumber()]; bool NeedVXRMWrite = false; for (const MachineInstr &MI : MBB) { int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); - if (VXRMIdx >= 0) { + if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); if (!BBInfo.VXRMUse.isValid()) @@ -356,7 +366,7 @@ void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) { for (MachineInstr &MI : MBB) { int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); - if (VXRMIdx >= 0) { + if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); if (PendingInsert || !Info.isStatic() || diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 1dcff7eb563e..cd98438eed88 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2282,9 +2282,14 @@ bool RISCVInstrInfo::shouldClusterMemOps( return false; } - // TODO: Use a more carefully chosen heuristic, e.g. only cluster if offsets - // indicate they likely share a cache line. - return ClusterSize <= 4; + unsigned CacheLineSize = + BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize(); + // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget. + CacheLineSize = CacheLineSize ? CacheLineSize : 64; + // Cluster if the memory operations are on the same or a neighbouring cache + // line, but limit the maximum ClusterSize to avoid creating too much + // additional register pressure. + return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize; } // Set BaseReg (the base register operand), Offset (the byte offset being diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td index edc08187d8f7..35e8edf5d2fa 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -2111,13 +2111,16 @@ include "RISCVInstrInfoZk.td" include "RISCVInstrInfoV.td" include "RISCVInstrInfoZvk.td" -// Integer -include "RISCVInstrInfoZicbo.td" -include "RISCVInstrInfoZicond.td" - // Compressed include "RISCVInstrInfoC.td" include "RISCVInstrInfoZc.td" +include "RISCVInstrInfoZcmop.td" + +// Integer +include "RISCVInstrInfoZimop.td" +include "RISCVInstrInfoZicbo.td" +include "RISCVInstrInfoZicond.td" +include "RISCVInstrInfoZicfiss.td" //===----------------------------------------------------------------------===// // Vendor extensions diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 488ffa73f4e4..30deeaa06448 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -257,13 +257,13 @@ class SegRegClass<LMULInfo m, int nf> { // Vector register and vector group type information. //===----------------------------------------------------------------------===// -class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M, +class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, LMULInfo M, ValueType Scal = XLenVT, RegisterClass ScalarReg = GPR> { ValueType Vector = Vec; ValueType Mask = Mas; int SEW = Sew; int Log2SEW = !logtwo(Sew); - VReg RegClass = Reg; + VReg RegClass = M.vrclass; LMULInfo LMul = M; ValueType Scalar = Scal; RegisterClass ScalarRegClass = ScalarReg; @@ -279,9 +279,9 @@ class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M, } class GroupVTypeInfo<ValueType Vec, ValueType VecM1, ValueType Mas, int Sew, - VReg Reg, LMULInfo M, ValueType Scal = XLenVT, + LMULInfo M, ValueType Scal = XLenVT, RegisterClass ScalarReg = GPR> - : VTypeInfo<Vec, Mas, Sew, Reg, M, Scal, ScalarReg> { + : VTypeInfo<Vec, Mas, Sew, M, Scal, ScalarReg> { ValueType VectorM1 = VecM1; } @@ -289,70 +289,70 @@ defset list<VTypeInfo> AllVectors = { defset list<VTypeInfo> AllIntegerVectors = { defset list<VTypeInfo> NoGroupIntegerVectors = { defset list<VTypeInfo> FractionalGroupIntegerVectors = { - def VI8MF8: VTypeInfo<vint8mf8_t, vbool64_t, 8, VR, V_MF8>; - def VI8MF4: VTypeInfo<vint8mf4_t, vbool32_t, 8, VR, V_MF4>; - def VI8MF2: VTypeInfo<vint8mf2_t, vbool16_t, 8, VR, V_MF2>; - def VI16MF4: VTypeInfo<vint16mf4_t, vbool64_t, 16, VR, V_MF4>; - def VI16MF2: VTypeInfo<vint16mf2_t, vbool32_t, 16, VR, V_MF2>; - def VI32MF2: VTypeInfo<vint32mf2_t, vbool64_t, 32, VR, V_MF2>; + def VI8MF8: VTypeInfo<vint8mf8_t, vbool64_t, 8, V_MF8>; + def VI8MF4: VTypeInfo<vint8mf4_t, vbool32_t, 8, V_MF4>; + def VI8MF2: VTypeInfo<vint8mf2_t, vbool16_t, 8, V_MF2>; + def VI16MF4: VTypeInfo<vint16mf4_t, vbool64_t, 16, V_MF4>; + def VI16MF2: VTypeInfo<vint16mf2_t, vbool32_t, 16, V_MF2>; + def VI32MF2: VTypeInfo<vint32mf2_t, vbool64_t, 32, V_MF2>; } - def VI8M1: VTypeInfo<vint8m1_t, vbool8_t, 8, VR, V_M1>; - def VI16M1: VTypeInfo<vint16m1_t, vbool16_t, 16, VR, V_M1>; - def VI32M1: VTypeInfo<vint32m1_t, vbool32_t, 32, VR, V_M1>; - def VI64M1: VTypeInfo<vint64m1_t, vbool64_t, 64, VR, V_M1>; + def VI8M1: VTypeInfo<vint8m1_t, vbool8_t, 8, V_M1>; + def VI16M1: VTypeInfo<vint16m1_t, vbool16_t, 16, V_M1>; + def VI32M1: VTypeInfo<vint32m1_t, vbool32_t, 32, V_M1>; + def VI64M1: VTypeInfo<vint64m1_t, vbool64_t, 64, V_M1>; } defset list<GroupVTypeInfo> GroupIntegerVectors = { - def VI8M2: GroupVTypeInfo<vint8m2_t, vint8m1_t, vbool4_t, 8, VRM2, V_M2>; - def VI8M4: GroupVTypeInfo<vint8m4_t, vint8m1_t, vbool2_t, 8, VRM4, V_M4>; - def VI8M8: GroupVTypeInfo<vint8m8_t, vint8m1_t, vbool1_t, 8, VRM8, V_M8>; + def VI8M2: GroupVTypeInfo<vint8m2_t, vint8m1_t, vbool4_t, 8, V_M2>; + def VI8M4: GroupVTypeInfo<vint8m4_t, vint8m1_t, vbool2_t, 8, V_M4>; + def VI8M8: GroupVTypeInfo<vint8m8_t, vint8m1_t, vbool1_t, 8, V_M8>; - def VI16M2: GroupVTypeInfo<vint16m2_t,vint16m1_t,vbool8_t, 16,VRM2, V_M2>; - def VI16M4: GroupVTypeInfo<vint16m4_t,vint16m1_t,vbool4_t, 16,VRM4, V_M4>; - def VI16M8: GroupVTypeInfo<vint16m8_t,vint16m1_t,vbool2_t, 16,VRM8, V_M8>; + def VI16M2: GroupVTypeInfo<vint16m2_t, vint16m1_t, vbool8_t, 16, V_M2>; + def VI16M4: GroupVTypeInfo<vint16m4_t, vint16m1_t, vbool4_t, 16, V_M4>; + def VI16M8: GroupVTypeInfo<vint16m8_t, vint16m1_t, vbool2_t, 16, V_M8>; - def VI32M2: GroupVTypeInfo<vint32m2_t,vint32m1_t,vbool16_t,32,VRM2, V_M2>; - def VI32M4: GroupVTypeInfo<vint32m4_t,vint32m1_t,vbool8_t, 32,VRM4, V_M4>; - def VI32M8: GroupVTypeInfo<vint32m8_t,vint32m1_t,vbool4_t, 32,VRM8, V_M8>; + def VI32M2: GroupVTypeInfo<vint32m2_t, vint32m1_t, vbool16_t, 32, V_M2>; + def VI32M4: GroupVTypeInfo<vint32m4_t, vint32m1_t, vbool8_t, 32, V_M4>; + def VI32M8: GroupVTypeInfo<vint32m8_t, vint32m1_t, vbool4_t, 32, V_M8>; - def VI64M2: GroupVTypeInfo<vint64m2_t,vint64m1_t,vbool32_t,64,VRM2, V_M2>; - def VI64M4: GroupVTypeInfo<vint64m4_t,vint64m1_t,vbool16_t,64,VRM4, V_M4>; - def VI64M8: GroupVTypeInfo<vint64m8_t,vint64m1_t,vbool8_t, 64,VRM8, V_M8>; + def VI64M2: GroupVTypeInfo<vint64m2_t, vint64m1_t, vbool32_t, 64, V_M2>; + def VI64M4: GroupVTypeInfo<vint64m4_t, vint64m1_t, vbool16_t, 64, V_M4>; + def VI64M8: GroupVTypeInfo<vint64m8_t, vint64m1_t, vbool8_t, 64, V_M8>; } } defset list<VTypeInfo> AllFloatVectors = { defset list<VTypeInfo> NoGroupFloatVectors = { defset list<VTypeInfo> FractionalGroupFloatVectors = { - def VF16MF4: VTypeInfo<vfloat16mf4_t, vbool64_t, 16, VR, V_MF4, f16, FPR16>; - def VF16MF2: VTypeInfo<vfloat16mf2_t, vbool32_t, 16, VR, V_MF2, f16, FPR16>; - def VF32MF2: VTypeInfo<vfloat32mf2_t,vbool64_t, 32, VR, V_MF2, f32, FPR32>; + def VF16MF4: VTypeInfo<vfloat16mf4_t, vbool64_t, 16, V_MF4, f16, FPR16>; + def VF16MF2: VTypeInfo<vfloat16mf2_t, vbool32_t, 16, V_MF2, f16, FPR16>; + def VF32MF2: VTypeInfo<vfloat32mf2_t, vbool64_t, 32, V_MF2, f32, FPR32>; } - def VF16M1: VTypeInfo<vfloat16m1_t, vbool16_t, 16, VR, V_M1, f16, FPR16>; - def VF32M1: VTypeInfo<vfloat32m1_t, vbool32_t, 32, VR, V_M1, f32, FPR32>; - def VF64M1: VTypeInfo<vfloat64m1_t, vbool64_t, 64, VR, V_M1, f64, FPR64>; + def VF16M1: VTypeInfo<vfloat16m1_t, vbool16_t, 16, V_M1, f16, FPR16>; + def VF32M1: VTypeInfo<vfloat32m1_t, vbool32_t, 32, V_M1, f32, FPR32>; + def VF64M1: VTypeInfo<vfloat64m1_t, vbool64_t, 64, V_M1, f64, FPR64>; } defset list<GroupVTypeInfo> GroupFloatVectors = { def VF16M2: GroupVTypeInfo<vfloat16m2_t, vfloat16m1_t, vbool8_t, 16, - VRM2, V_M2, f16, FPR16>; + V_M2, f16, FPR16>; def VF16M4: GroupVTypeInfo<vfloat16m4_t, vfloat16m1_t, vbool4_t, 16, - VRM4, V_M4, f16, FPR16>; + V_M4, f16, FPR16>; def VF16M8: GroupVTypeInfo<vfloat16m8_t, vfloat16m1_t, vbool2_t, 16, - VRM8, V_M8, f16, FPR16>; + V_M8, f16, FPR16>; def VF32M2: GroupVTypeInfo<vfloat32m2_t, vfloat32m1_t, vbool16_t, 32, - VRM2, V_M2, f32, FPR32>; + V_M2, f32, FPR32>; def VF32M4: GroupVTypeInfo<vfloat32m4_t, vfloat32m1_t, vbool8_t, 32, - VRM4, V_M4, f32, FPR32>; + V_M4, f32, FPR32>; def VF32M8: GroupVTypeInfo<vfloat32m8_t, vfloat32m1_t, vbool4_t, 32, - VRM8, V_M8, f32, FPR32>; + V_M8, f32, FPR32>; def VF64M2: GroupVTypeInfo<vfloat64m2_t, vfloat64m1_t, vbool32_t, 64, - VRM2, V_M2, f64, FPR64>; + V_M2, f64, FPR64>; def VF64M4: GroupVTypeInfo<vfloat64m4_t, vfloat64m1_t, vbool16_t, 64, - VRM4, V_M4, f64, FPR64>; + V_M4, f64, FPR64>; def VF64M8: GroupVTypeInfo<vfloat64m8_t, vfloat64m1_t, vbool8_t, 64, - VRM8, V_M8, f64, FPR64>; + V_M8, f64, FPR64>; } } } @@ -360,19 +360,19 @@ defset list<VTypeInfo> AllVectors = { defset list<VTypeInfo> AllBFloatVectors = { defset list<VTypeInfo> NoGroupBFloatVectors = { defset list<VTypeInfo> FractionalGroupBFloatVectors = { - def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, VR, V_MF4, bf16, FPR16>; - def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, VR, V_MF2, bf16, FPR16>; + def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, V_MF4, bf16, FPR16>; + def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, V_MF2, bf16, FPR16>; } - def VBF16M1: VTypeInfo<vbfloat16m1_t, vbool16_t, 16, VR, V_M1, bf16, FPR16>; + def VBF16M1: VTypeInfo<vbfloat16m1_t, vbool16_t, 16, V_M1, bf16, FPR16>; } defset list<GroupVTypeInfo> GroupBFloatVectors = { def VBF16M2: GroupVTypeInfo<vbfloat16m2_t, vbfloat16m1_t, vbool8_t, 16, - VRM2, V_M2, bf16, FPR16>; + V_M2, bf16, FPR16>; def VBF16M4: GroupVTypeInfo<vbfloat16m4_t, vbfloat16m1_t, vbool4_t, 16, - VRM4, V_M4, bf16, FPR16>; + V_M4, bf16, FPR16>; def VBF16M8: GroupVTypeInfo<vbfloat16m8_t, vbfloat16m1_t, vbool2_t, 16, - VRM8, V_M8, bf16, FPR16>; + V_M8, bf16, FPR16>; } } @@ -1069,7 +1069,8 @@ class VPseudoUnaryMask<VReg RetClass, class VPseudoUnaryMaskRoundingMode<VReg RetClass, VReg OpClass, - string Constraint = ""> : + string Constraint = "", + int TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, VMaskOp:$vm, ixlenimm:$rm, @@ -1079,6 +1080,7 @@ class VPseudoUnaryMaskRoundingMode<VReg RetClass, let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1106,7 +1108,8 @@ class VPseudoUnaryMask_NoExcept<VReg RetClass, class VPseudoUnaryNoMask_FRM<VReg RetClass, VReg OpClass, - string Constraint = ""> : + string Constraint = "", + int TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, @@ -1115,6 +1118,7 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass, let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1123,7 +1127,8 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass, class VPseudoUnaryMask_FRM<VReg RetClass, VReg OpClass, - string Constraint = ""> : + string Constraint = "", + int TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, VMaskOp:$vm, ixlenimm:$frm, @@ -1133,6 +1138,7 @@ class VPseudoUnaryMask_FRM<VReg RetClass, let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1528,7 +1534,8 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass, DAGOperand Op2Class, LMULInfo MInfo, bit CarryIn, - string Constraint> : + string Constraint, + int TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), !if(CarryIn, (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, @@ -1540,6 +1547,7 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass, let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 0; @@ -2447,10 +2455,11 @@ multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>; } -multiclass VPseudoTiedBinaryV_VM<LMULInfo m> { +multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1> { def "_VVM" # "_" # m.MX: VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, - m.vrclass, m.vrclass, m, 1, "">; + m.vrclass, m.vrclass, m, 1, "", + TargetConstraintType>; } multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, @@ -2462,10 +2471,11 @@ multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, m.vrclass, GPR, m, CarryIn, Constraint, TargetConstraintType>; } -multiclass VPseudoTiedBinaryV_XM<LMULInfo m> { +multiclass VPseudoTiedBinaryV_XM<LMULInfo m, int TargetConstraintType = 1> { def "_VXM" # "_" # m.MX: VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, - m.vrclass, GPR, m, 1, "">; + m.vrclass, GPR, m, 1, "", + TargetConstraintType>; } multiclass VPseudoVMRG_FM { @@ -2596,45 +2606,48 @@ multiclass VPseudoVRCP_V_RM { } } -multiclass PseudoVEXT_VF2<int TargetConstraintType = 1> { +multiclass PseudoVEXT_VF2 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF2 in { defvar mx = m.MX; + defvar CurrTypeConstraints = !if(!or(!eq(mx, "MF4"), !eq(mx, "MF2"), !eq(mx, "M1")), 1, 3); let VLMul = m.value in { - def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>, + def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints, CurrTypeConstraints>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; def "_" # mx # "_MASK" : - VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>, + VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints, CurrTypeConstraints>, RISCVMaskedPseudo<MaskIdx=2>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; } } } -multiclass PseudoVEXT_VF4<int TargetConstraintType = 1> { +multiclass PseudoVEXT_VF4 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF4 in { defvar mx = m.MX; + defvar CurrTypeConstraints = !if(!or(!eq(mx, "MF2"), !eq(mx, "M1"), !eq(mx, "M2")), 1, 3); let VLMul = m.value in { - def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>, + def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints, CurrTypeConstraints>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; def "_" # mx # "_MASK" : - VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>, + VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints, CurrTypeConstraints>, RISCVMaskedPseudo<MaskIdx=2>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; } } } -multiclass PseudoVEXT_VF8<int TargetConstraintType = 1> { +multiclass PseudoVEXT_VF8 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF8 in { defvar mx = m.MX; + defvar CurrTypeConstraints = !if(!or(!eq(mx, "M1"), !eq(mx, "M2"), !eq(mx, "M4")), 1, 3); let VLMul = m.value in { - def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>, + def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints, CurrTypeConstraints>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; def "_" # mx # "_MASK" : - VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>, + VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints, CurrTypeConstraints>, RISCVMaskedPseudo<MaskIdx=2>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; } @@ -3619,7 +3632,7 @@ multiclass VPseudoConversionRoundingMode<VReg RetClass, let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>; def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class, - Constraint>, + Constraint, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=2>; } } @@ -3628,12 +3641,13 @@ multiclass VPseudoConversionRoundingMode<VReg RetClass, multiclass VPseudoConversionRM<VReg RetClass, VReg Op1Class, LMULInfo MInfo, - string Constraint = ""> { + string Constraint = "", + int TargetConstraintType = 1> { let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoUnaryNoMask_FRM<RetClass, Op1Class, - Constraint>; + Constraint, TargetConstraintType>; def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask_FRM<RetClass, Op1Class, - Constraint>, + Constraint, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=2>; } } @@ -3761,7 +3775,7 @@ multiclass VPseudoVNCVTI_W_RM { multiclass VPseudoVNCVTI_RM_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in { - defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>, + defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>, SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX, forceMergeOpRead=true>; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 33bdc3366aa3..5b50a4a78c01 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2338,6 +2338,64 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">; defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">; defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">; +// 12.5. Vector Narrowing Fixed-Point Clip Instructions +class VPatTruncSatClipMaxMinBase<string inst, + VTypeInfo vti, + VTypeInfo wti, + SDPatternOperator op1, + int op1_value, + SDPatternOperator op2, + int op2_value> : + Pat<(vti.Vector (riscv_trunc_vector_vl + (wti.Vector (op1 + (wti.Vector (op2 + (wti.Vector wti.RegClass:$rs1), + (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))), + (wti.Vector undef),(wti.Mask V0), VLOpFrag)), + (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))), + (wti.Vector undef), (wti.Mask V0), VLOpFrag)), + (vti.Mask V0), VLOpFrag)), + (!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0, + (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>; + +class VPatTruncSatClipUMin<VTypeInfo vti, + VTypeInfo wti, + int uminval> : + Pat<(vti.Vector (riscv_trunc_vector_vl + (wti.Vector (riscv_umin_vl + (wti.Vector wti.RegClass:$rs1), + (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))), + (wti.Vector undef), (wti.Mask V0), VLOpFrag)), + (vti.Mask V0), VLOpFrag)), + (!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0, + (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>; + +multiclass VPatTruncSatClipMaxMin<string inst, VTypeInfo vti, VTypeInfo wti, + SDPatternOperator max, int maxval, SDPatternOperator min, int minval> { + def : VPatTruncSatClipMaxMinBase<inst, vti, wti, max, maxval, min, minval>; + def : VPatTruncSatClipMaxMinBase<inst, vti, wti, min, minval, max, maxval>; +} + +multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> { + defvar sew = vti.SEW; + defvar uminval = !sub(!shl(1, sew), 1); + defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1); + defvar smaxval = !sub(0, !shl(1, !sub(sew, 1))); + + let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates, + GetVTypePredicates<wti>.Predicates) in { + defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl, + sminval, riscv_smax_vl, smaxval>; + def : VPatTruncSatClipUMin<vti, wti, uminval>; + } + +} + +foreach vtiToWti = AllWidenableIntVectors in + defm : VPatTruncSatClip<vtiToWti.Vti, vtiToWti.Wti>; + // 13. Vector Floating-Point Instructions // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 0b1d5b664df9..31f832dfd84c 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -349,20 +349,26 @@ multiclass VPseudoSiFiveVMACC<string mx, VReg vd_type, VReg vs2_type, : VPseudoTernaryNoMaskWithPolicy<vd_type, V_M1.vrclass, vs2_type, Constraint>; } -multiclass VPseudoSiFiveVQMACC<string Constraint = ""> { +multiclass VPseudoSiFiveVQMACCDOD<string Constraint = ""> { foreach m = MxListVF8 in let VLMul = m.value in defm NAME : VPseudoSiFiveVMACC<m.MX, m.vrclass, m.vrclass, Constraint>; } +multiclass VPseudoSiFiveVQMACCQOQ<string Constraint = ""> { + foreach m = [V_MF2, V_M1, V_M2, V_M4] in + let VLMul = m.value in + defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>; +} + multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> { - foreach m = MxListFW in + foreach m = MxListVF2 in let VLMul = m.value in defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>; } multiclass VPseudoSiFiveVFNRCLIP<string Constraint = "@earlyclobber $rd"> { - foreach i = [0, 1, 2, 3, 4] in + foreach i = 0-4 in let hasSideEffects = 0 in defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<MxListW[i].vrclass, MxListVF4[i].vrclass, @@ -400,17 +406,17 @@ let Predicates = [HasVendorXSfvcp] in { } let Predicates = [HasVendorXSfvqmaccdod] in { - defm VQMACCU_2x8x2 : VPseudoSiFiveVQMACC; - defm VQMACC_2x8x2 : VPseudoSiFiveVQMACC; - defm VQMACCUS_2x8x2 : VPseudoSiFiveVQMACC; - defm VQMACCSU_2x8x2 : VPseudoSiFiveVQMACC; + defm VQMACCU_2x8x2 : VPseudoSiFiveVQMACCDOD; + defm VQMACC_2x8x2 : VPseudoSiFiveVQMACCDOD; + defm VQMACCUS_2x8x2 : VPseudoSiFiveVQMACCDOD; + defm VQMACCSU_2x8x2 : VPseudoSiFiveVQMACCDOD; } let Predicates = [HasVendorXSfvqmaccqoq] in { - defm VQMACCU_4x8x4 : VPseudoSiFiveVQMACC; - defm VQMACC_4x8x4 : VPseudoSiFiveVQMACC; - defm VQMACCUS_4x8x4 : VPseudoSiFiveVQMACC; - defm VQMACCSU_4x8x4 : VPseudoSiFiveVQMACC; + defm VQMACCU_4x8x4 : VPseudoSiFiveVQMACCQOQ; + defm VQMACC_4x8x4 : VPseudoSiFiveVQMACCQOQ; + defm VQMACCUS_4x8x4 : VPseudoSiFiveVQMACCQOQ; + defm VQMACCSU_4x8x4 : VPseudoSiFiveVQMACCQOQ; } let Predicates = [HasVendorXSfvfwmaccqqq] in { @@ -566,16 +572,25 @@ multiclass VPatVMACC<string intrinsic, string instruction, string kind, } } -defset list<VTypeInfoToWide> VQMACCInfoPairs = { +defset list<VTypeInfoToWide> VQMACCDODInfoPairs = { def : VTypeInfoToWide<VI8M1, VI32M1>; def : VTypeInfoToWide<VI8M2, VI32M2>; def : VTypeInfoToWide<VI8M4, VI32M4>; def : VTypeInfoToWide<VI8M8, VI32M8>; } -multiclass VPatVQMACC<string intrinsic, string instruction, string kind> - : VPatVMACC<intrinsic, instruction, kind, VQMACCInfoPairs, vint8m1_t>; +defset list<VTypeInfoToWide> VQMACCQOQInfoPairs = { + def : VTypeInfoToWide<VI8MF2, VI32M1>; + def : VTypeInfoToWide<VI8M1, VI32M2>; + def : VTypeInfoToWide<VI8M2, VI32M4>; + def : VTypeInfoToWide<VI8M4, VI32M8>; +} + +multiclass VPatVQMACCDOD<string intrinsic, string instruction, string kind> + : VPatVMACC<intrinsic, instruction, kind, VQMACCDODInfoPairs, vint8m1_t>; +multiclass VPatVQMACCQOQ<string intrinsic, string instruction, string kind> + : VPatVMACC<intrinsic, instruction, kind, VQMACCQOQInfoPairs, vint8m1_t>; multiclass VPatVFWMACC<string intrinsic, string instruction, string kind> : VPatVMACC<intrinsic, instruction, kind, AllWidenableBFloatToFloatVectors, @@ -637,17 +652,17 @@ let Predicates = [HasVendorXSfvcp] in { } let Predicates = [HasVendorXSfvqmaccdod] in { - defm : VPatVQMACC<"vqmaccu_2x8x2", "VQMACCU", "2x8x2">; - defm : VPatVQMACC<"vqmacc_2x8x2", "VQMACC", "2x8x2">; - defm : VPatVQMACC<"vqmaccus_2x8x2", "VQMACCUS", "2x8x2">; - defm : VPatVQMACC<"vqmaccsu_2x8x2", "VQMACCSU", "2x8x2">; + defm : VPatVQMACCDOD<"vqmaccu_2x8x2", "VQMACCU", "2x8x2">; + defm : VPatVQMACCDOD<"vqmacc_2x8x2", "VQMACC", "2x8x2">; + defm : VPatVQMACCDOD<"vqmaccus_2x8x2", "VQMACCUS", "2x8x2">; + defm : VPatVQMACCDOD<"vqmaccsu_2x8x2", "VQMACCSU", "2x8x2">; } let Predicates = [HasVendorXSfvqmaccqoq] in { - defm : VPatVQMACC<"vqmaccu_4x8x4", "VQMACCU", "4x8x4">; - defm : VPatVQMACC<"vqmacc_4x8x4", "VQMACC", "4x8x4">; - defm : VPatVQMACC<"vqmaccus_4x8x4", "VQMACCUS", "4x8x4">; - defm : VPatVQMACC<"vqmaccsu_4x8x4", "VQMACCSU", "4x8x4">; + defm : VPatVQMACCQOQ<"vqmaccu_4x8x4", "VQMACCU", "4x8x4">; + defm : VPatVQMACCQOQ<"vqmacc_4x8x4", "VQMACC", "4x8x4">; + defm : VPatVQMACCQOQ<"vqmaccus_4x8x4", "VQMACCUS", "4x8x4">; + defm : VPatVQMACCQOQ<"vqmaccsu_4x8x4", "VQMACCSU", "4x8x4">; } let Predicates = [HasVendorXSfvfwmaccqqq] in { @@ -658,27 +673,3 @@ let Predicates = [HasVendorXSfvfnrclipxfqf] in { defm : VPatVFNRCLIP<"vfnrclip_xu_f_qf", "VFNRCLIP_XU_F_QF">; defm : VPatVFNRCLIP<"vfnrclip_x_f_qf", "VFNRCLIP_X_F_QF">; } - -let Predicates = [HasVendorXSfcie] in { -let hasSideEffects = 1, mayLoad = 0, mayStore = 0, DecoderNamespace = "XSfcie" in { -def SF_CFLUSH_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cflush.d.l1","$rs1">, - Sched<[]> { - let rd = 0; - let imm12 = {0b1111,0b1100,0b0000}; -} - -def SF_CDISCARD_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cdiscard.d.l1","$rs1">, - Sched<[]> { - let rd = 0; - let imm12 = {0b1111,0b1100,0b0010}; -} - -def SF_CEASE : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "cease","">, Sched<[]> { - let rs1 = 0; - let rd = 0; - let imm12 = {0b0011,0b0000,0b0101}; -} -} -def : InstAlias<"cflush.d.l1", (SF_CFLUSH_D_L1 X0)>; -def : InstAlias<"cdiscard.d.l1", (SF_CDISCARD_D_L1 X0)>; -} // Predicates = [HasVendorXScie] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td index a78f36244468..3506204d6c25 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td @@ -56,9 +56,8 @@ def rlist : Operand<OtherVT> { int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) return false; - if (!isUInt<4>(Imm)) return false; // 0~3 Reserved for EABI - return (Imm >= 4) && (Imm <= 15); + return isUInt<4>(Imm) && Imm >= 4; }]; } @@ -70,7 +69,7 @@ def spimm : Operand<OtherVT> { int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) return false; - return isShiftedUInt<5, 4>(Imm); + return isShiftedUInt<2, 4>(Imm); }]; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td new file mode 100644 index 000000000000..6fbfde5ef488 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td @@ -0,0 +1,34 @@ +//===-- RISCVInstrInfoZcmop.td -----------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard Compressed +// May-Be-Operations Extension (Zcmop). +// This version is still experimental as the 'Zcmop' extension hasn't been +// ratified yet. It is based on v0.2 of the specification. +// +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class CMOPInst<bits<3> imm3, string opcodestr> + : RVInst16CI<0b011, 0b01, (outs), (ins), opcodestr, ""> { + let Inst{6-2} = 0; + let Inst{7} = 1; + let Inst{10-8} = imm3; + let Inst{12-11} = 0; +} + +// CMOP1, CMOP5 is used by Zicfiss. +let Predicates = [HasStdExtZcmop, NoHasStdExtZicfiss] in { + def CMOP1 : CMOPInst<0, "cmop.1">, Sched<[]>; + def CMOP5 : CMOPInst<2, "cmop.5">, Sched<[]>; +} + +foreach n = [3, 7, 9, 11, 13, 15] in { + let Predicates = [HasStdExtZcmop] in + def CMOP # n : CMOPInst<!srl(n, 1), "cmop." # n>, Sched<[]>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td new file mode 100644 index 000000000000..49a57f86cccd --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td @@ -0,0 +1,72 @@ +//===------ RISCVInstrInfoZicfiss.td - RISC-V Zicfiss -*- tablegen -*------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction class templates +//===----------------------------------------------------------------------===// + +class RVC_SSInst<bits<5> rs1val, RegisterClass reg_class, string opcodestr> : + RVInst16<(outs), (ins reg_class:$rs1), opcodestr, "$rs1", [], InstFormatOther> { + let Inst{15-13} = 0b011; + let Inst{12} = 0; + let Inst{11-7} = rs1val; + let Inst{6-2} = 0b00000; + let Inst{1-0} = 0b01; + let DecoderMethod = "decodeCSSPushPopchk"; +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZicfiss] in { +let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def SSPOPCHK : RVInstI<0b100, OPC_SYSTEM, (outs), (ins GPRX1X5:$rs1), "sspopchk", + "$rs1"> { + let rd = 0; + let imm12 = 0b110011011100; +} // Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 1, mayStore = 0 + +let Uses = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +def SSRDP : RVInstI<0b100, OPC_SYSTEM, (outs GPRNoX0:$rd), (ins), "ssrdp", "$rd"> { + let imm12 = 0b110011011100; + let rs1 = 0b00000; +} +} // Uses = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 0 + +let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def SSPUSH : RVInstR<0b1100111, 0b100, OPC_SYSTEM, (outs), (ins GPRX1X5:$rs2), + "sspush", "$rs2"> { + let rd = 0b00000; + let rs1 = 0b00000; +} +} // Predicates = [HasStdExtZicfiss] + +let Predicates = [HasStdExtZicfiss, HasStdExtZcmop], + DecoderNamespace = "Zicfiss" in { +let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def C_SSPUSH : RVC_SSInst<0b00001, GPRX1, "c.sspush">; + +let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def C_SSPOPCHK : RVC_SSInst<0b00101, GPRX5, "c.sspopchk">; +} // Predicates = [HasStdExtZicfiss, HasStdExtZcmop] + +let Predicates = [HasStdExtZicfiss] in +defm SSAMOSWAP_W : AMO_rr_aq_rl<0b01001, 0b010, "ssamoswap.w">; + +let Predicates = [HasStdExtZicfiss, IsRV64] in +defm SSAMOSWAP_D : AMO_rr_aq_rl<0b01001, 0b011, "ssamoswap.d">; + +//===----------------------------------------------------------------------===/ +// Compress Instruction tablegen backend. +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZicfiss, HasStdExtZcmop] in { +def : CompressPat<(SSPUSH X1), (C_SSPUSH X1)>; +def : CompressPat<(SSPOPCHK X5), (C_SSPOPCHK X5)>; +} // Predicates = [HasStdExtZicfiss, HasStdExtZcmop] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td new file mode 100644 index 000000000000..1e8c70046c63 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td @@ -0,0 +1,59 @@ +//===-- RISCVInstrInfoZimop.td -----------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard +// May-Be-Operations Extension (Zimop). +// This version is still experimental as the 'Zimop' extension hasn't been +// ratified yet. It is based on v0.1 of the specification. +// +//===----------------------------------------------------------------------===// + +class RVInstIMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, RISCVOpcode opcode, + dag outs, dag ins, string opcodestr, string argstr> + : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> { + let Inst{31} = imm7{6}; + let Inst{30} = imm5{4}; + let Inst{29-28} = imm7{5-4}; + let Inst{27-26} = imm5{3-2}; + let Inst{25-22} = imm7{3-0}; + let Inst{21-20} = imm5{1-0}; +} + +class RVInstRMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcode, + dag outs, dag ins, string opcodestr, string argstr> + : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> { + let Inst{31} = imm4{3}; + let Inst{30} = imm3{2}; + let Inst{29-28} = imm4{2-1}; + let Inst{27-26} = imm3{1-0}; + let Inst{25} = imm4{0}; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, + RISCVOpcode opcode, string opcodestr> + : RVInstIMopr<imm7, imm5, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1), + opcodestr, "$rd, $rs1">; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, + RISCVOpcode opcode, string opcodestr> + : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), + opcodestr, "$rd, $rs1, $rs2">; + +foreach i = 0...31 in { + let Predicates = [HasStdExtZimop] in + def MOPR#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>, + Sched<[]>; +} + +foreach i = 0...7 in { + let Predicates = [HasStdExtZimop] in + def MOPRR#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>, + Sched<[]>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td index 6362a3bef6f2..ba8996e710ed 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -174,8 +174,7 @@ def SIFIVE_S76 : RISCVProcessorModel<"sifive-s76", FeatureStdExtF, FeatureStdExtD, FeatureStdExtC, - FeatureStdExtZihintpause, - FeatureVendorXSfcie], + FeatureStdExtZihintpause], [TuneSiFive7]>; def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54", diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index a3c19115bd31..24f8d600f1ea 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -127,6 +127,9 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, RISCV::X27); } + // Shadow stack pointer. + markSuperRegs(Reserved, RISCV::SSP); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index c59c9b294d79..840fd149d681 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -137,6 +137,8 @@ def GPR : GPRRegisterClass<(add (sequence "X%u", 10, 17), (sequence "X%u", 0, 4))>; def GPRX0 : GPRRegisterClass<(add X0)>; +def GPRX1 : GPRRegisterClass<(add X1)>; +def GPRX5 : GPRRegisterClass<(add X5)>; def GPRNoX0 : GPRRegisterClass<(sub GPR, X0)>; @@ -165,6 +167,8 @@ def SP : GPRRegisterClass<(add X2)>; def SR07 : GPRRegisterClass<(add (sequence "X%u", 8, 9), (sequence "X%u", 18, 23))>; +def GPRX1X5 : GPRRegisterClass<(add X1, X5)>; + // Floating point registers let RegAltNameIndices = [ABIRegAltName] in { def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; @@ -591,3 +595,6 @@ foreach m = LMULList in { // Special registers def FFLAGS : RISCVReg<0, "fflags">; def FRM : RISCVReg<0, "frm">; + +// Shadow Stack register +def SSP : RISCVReg<0, "ssp">; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td index 953df7b15e2f..43475e825b46 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td @@ -19,9 +19,11 @@ include "llvm/TableGen/SearchableTable.td" class SysReg<string name, bits<12> op> { string Name = name; - // A maximum of one deprecated name is supported right now. It generates a - // diagnostic when the name is used to encourage software to migrate away from - // the name. + // A maximum of one alias is supported right now. + string AltName = name; + // A maximum of one deprecated name is supported right now. Unlike the + // `AltName` alias, a `DeprecatedName` generates a diagnostic when the name is + // used to encourage software to migrate away from the name. string DeprecatedName = ""; bits<12> Encoding = op; // FIXME: add these additional fields when needed. @@ -41,7 +43,7 @@ def SysRegsList : GenericTable { let FilterClass = "SysReg"; // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed. let Fields = [ - "Name", "DeprecatedName", "Encoding", "FeaturesRequired", + "Name", "AltName", "DeprecatedName", "Encoding", "FeaturesRequired", "isRV32Only", ]; @@ -54,32 +56,13 @@ def lookupSysRegByName : SearchIndex { let Key = [ "Name" ]; } -def lookupSysRegByDeprecatedName : SearchIndex { +def lookupSysRegByAltName : SearchIndex { let Table = SysRegsList; - let Key = [ "DeprecatedName" ]; -} - -class SiFiveReg<string name, bits<12> op> : SysReg<name, op>; - -def SiFiveRegsList : GenericTable { - let FilterClass = "SiFiveReg"; - // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed. - let Fields = [ - "Name", "DeprecatedName", "Encoding", "FeaturesRequired", - "isRV32Only", - ]; - - let PrimaryKey = [ "Encoding" ]; - let PrimaryKeyName = "lookupSiFiveRegByEncoding"; + let Key = [ "AltName" ]; } -def lookupSiFiveRegByName : SearchIndex { - let Table = SiFiveRegsList; - let Key = [ "Name" ]; -} - -def lookupSiFiveRegByDeprecatedName : SearchIndex { - let Table = SiFiveRegsList; +def lookupSysRegByDeprecatedName : SearchIndex { + let Table = SysRegsList; let Key = [ "DeprecatedName" ]; } @@ -309,7 +292,7 @@ foreach i = 3...31 in //===----------------------------------------------------------------------===// // Machine Counter Setup //===----------------------------------------------------------------------===// -let DeprecatedName = "mucounteren" in // Privileged spec v1.9.1 Name +let AltName = "mucounteren" in // Privileged spec v1.9.1 Name def : SysReg<"mcountinhibit", 0x320>; // mhpmevent3-mhpmevent31 at 0x323-0x33F. @@ -323,20 +306,6 @@ foreach i = 3...31 in { } //===----------------------------------------------------------------------===// -// SiFive Custom Machine Mode Registers -//===----------------------------------------------------------------------===// - -let FeaturesRequired = [{ {RISCV::FeatureVendorXSfcie} }] in { -def : SiFiveReg<"mnscratch", 0x350>; -def : SiFiveReg<"mnepc", 0x351>; -def : SiFiveReg<"mncause", 0x352>; -def : SiFiveReg<"mnstatus", 0x353>; -def : SiFiveReg<"mbpm", 0x7C0>; -def : SiFiveReg<"mfd", 0x7C1>; -def : SiFiveReg<"mpd", 0x7C8>; -} - -//===----------------------------------------------------------------------===// // Debug/ Trace Registers (shared with Debug Mode) //===----------------------------------------------------------------------===// def : SysReg<"tselect", 0x7A0>; @@ -353,7 +322,7 @@ def : SysReg<"dpc", 0x7B1>; // "dscratch" is an alternative name for "dscratch0" which appeared in earlier // drafts of the RISC-V debug spec -let DeprecatedName = "dscratch" in +let AltName = "dscratch" in def : SysReg<"dscratch0", 0x7B2>; def : SysReg<"dscratch1", 0x7B3>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 96ecc771863e..4c955744b37d 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -359,7 +359,8 @@ public: const TargetTransformInfo::LSRCost &C2); bool shouldFoldTerminatingConditionAfterLSR() const { - return true; + // FIXME: Enabling this causes miscompiles. + return false; } }; diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 3a34a0bfae46..6c009b9e8dde 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -959,8 +959,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName( // N is the number of elements of the vector. Type *Ty; - if (TypeStr.starts_with("atomic_")) - TypeStr = TypeStr.substr(strlen("atomic_")); + TypeStr.consume_front("atomic_"); if (TypeStr.starts_with("void")) { Ty = Type::getVoidTy(Ctx); @@ -1007,8 +1006,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName( // Handle "typeN*" or "type vector[N]*". bool IsPtrToVec = TypeStr.consume_back("*"); - if (TypeStr.starts_with(" vector[")) { - TypeStr = TypeStr.substr(strlen(" vector[")); + if (TypeStr.consume_front(" vector[")) { TypeStr = TypeStr.substr(0, TypeStr.find(']')); } TypeStr.getAsInteger(10, VecElts); diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 4f0801479211..78bdf3ae9a84 100644 --- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -2050,7 +2050,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, LHS.getOperand(3).getOpcode() == SPISD::CMPFCC_V9))) && isOneConstant(LHS.getOperand(0)) && isNullConstant(LHS.getOperand(1))) { SDValue CMPCC = LHS.getOperand(3); - SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue(); + SPCC = LHS.getConstantOperandVal(2); LHS = CMPCC.getOperand(0); RHS = CMPCC.getOperand(1); } @@ -3186,7 +3186,7 @@ static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) { SDValue SparcTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 559f2ca476d7..045c4c0aac07 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2186,7 +2186,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // the mask of valid CC values if so. static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid) { - unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(1); switch (Id) { case Intrinsic::s390_tbegin: Opcode = SystemZISD::TBEGIN; @@ -2212,7 +2212,7 @@ static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, // CC value as its final argument. Provide the associated SystemZISD // opcode and the mask of valid CC values if so. static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { - unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpkshs: case Intrinsic::s390_vpksfs: @@ -2600,10 +2600,9 @@ static bool shouldSwapCmpOperands(const Comparison &C) { return true; if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) return true; - if (C.ICmpType != SystemZICMP::SignedOnly && - Opcode0 == ISD::AND && + if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && C.Op0.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff) + C.Op0.getConstantOperandVal(1) == 0xffffffff) return true; return false; @@ -3429,11 +3428,9 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { return (Neg.getOpcode() == ISD::SUB && Neg.getOperand(0).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 && - Neg.getOperand(1) == Pos && - (Pos == CmpOp || - (Pos.getOpcode() == ISD::SIGN_EXTEND && - Pos.getOperand(0) == CmpOp))); + Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos && + (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && + Pos.getOperand(0) == CmpOp))); } // Return the absolute or negative absolute of Op; IsNegative decides which. @@ -3740,7 +3737,7 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, MFI.setFrameAddressIsTaken(true); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // By definition, the frame address is the address of the back chain. (In @@ -3776,7 +3773,7 @@ SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, return SDValue(); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); EVT PtrVT = getPointerTy(DAG.getDataLayout()); if (Depth > 0) { @@ -4226,7 +4223,7 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { if (HighOp.getOpcode() == ISD::AND && HighOp.getOperand(1).getOpcode() == ISD::Constant) { SDValue HighOp0 = HighOp.getOperand(0); - uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue(); + uint64_t Mask = HighOp.getConstantOperandVal(1); if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) HighOp = HighOp0; } @@ -4485,10 +4482,10 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); - SyncScope::ID FenceSSID = static_cast<SyncScope::ID>( - cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + AtomicOrdering FenceOrdering = + static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); + SyncScope::ID FenceSSID = + static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. @@ -4773,13 +4770,13 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const { - bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + bool IsData = Op.getConstantOperandVal(4); if (!IsData) // Just preserve the chain. return Op.getOperand(0); SDLoc DL(Op); - bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + bool IsWrite = Op.getConstantOperandVal(2); unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), @@ -4825,7 +4822,7 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); } - unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::thread_pointer: return lowerThreadPointer(SDLoc(Op), DAG); @@ -5628,7 +5625,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, Op = Op.getOperand(0); if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op.getOperand(1).getOpcode() == ISD::Constant) { - unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Elem = Op.getConstantOperandVal(1); if (!GS.add(Op.getOperand(0), Elem)) return SDValue(); FoundOne = true; @@ -6727,8 +6724,7 @@ SDValue SystemZTargetLowering::combineLOAD( int Index = 1; if (User->getOpcode() == ISD::SRL && User->getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(User->getOperand(1))->getZExtValue() == 64 && - User->hasOneUse()) { + User->getConstantOperandVal(1) == 64 && User->hasOneUse()) { User = *User->use_begin(); Index = 0; } @@ -6857,7 +6853,7 @@ static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) { std::swap(Op0, Op1); if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() || Op1.getOperand(1).getOpcode() != ISD::Constant || - cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue() != 64) + Op1.getConstantOperandVal(1) != 64) return false; Op1 = Op1.getOperand(0); @@ -7149,20 +7145,18 @@ SDValue SystemZTargetLowering::combineFP_ROUND( unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(OpNo); - if (N->getValueType(0) == MVT::f32 && - Op0.hasOneUse() && + if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0).getValueType() == MVT::v2f64 && Op0.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { + Op0.getConstantOperandVal(1) == 0) { SDValue Vec = Op0.getOperand(0); for (auto *U : Vec->uses()) { - if (U != Op0.getNode() && - U->hasOneUse() && + if (U != Op0.getNode() && U->hasOneUse() && U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && U->getOperand(0) == Vec && U->getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { + U->getConstantOperandVal(1) == 1) { SDValue OtherRound = SDValue(*U->use_begin(), 0); if (OtherRound.getOpcode() == N->getOpcode() && OtherRound.getOperand(OpNo) == SDValue(U, 0) && @@ -7215,20 +7209,18 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(OpNo); - if (N->getValueType(0) == MVT::f64 && - Op0.hasOneUse() && + if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0).getValueType() == MVT::v4f32 && Op0.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { + Op0.getConstantOperandVal(1) == 0) { SDValue Vec = Op0.getOperand(0); for (auto *U : Vec->uses()) { - if (U != Op0.getNode() && - U->hasOneUse() && + if (U != Op0.getNode() && U->hasOneUse() && U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && U->getOperand(0) == Vec && U->getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) { + U->getConstantOperandVal(1) == 2) { SDValue OtherExtend = SDValue(*U->use_begin(), 0); if (OtherExtend.getOpcode() == N->getOpcode() && OtherExtend.getOperand(OpNo) == SDValue(U, 0) && @@ -7605,7 +7597,7 @@ SDValue SystemZTargetLowering::combineINTRINSIC( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned Id = N->getConstantOperandVal(1); switch (Id) { // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 // or larger is simply a vector load. @@ -7679,7 +7671,7 @@ static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, APInt SrcDemE; unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { - unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: @@ -7723,7 +7715,7 @@ static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, SrcDemE = APInt(NumElts, 0); if (!DemandedElts[OpNo - 1]) break; - unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Mask = Op.getConstantOperandVal(3); unsigned MaskBit = ((OpNo - 1) ? 1 : 4); // Demand input element 0 or 1, given by the mask bit value. SrcDemE.setBit((Mask & MaskBit)? 1 : 0); @@ -7732,7 +7724,7 @@ static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, case Intrinsic::s390_vsldb: { // VECTOR SHIFT LEFT DOUBLE BY BYTE assert(VT == MVT::v16i8 && "Unexpected type."); - unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned FirstIdx = Op.getConstantOperandVal(3); assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); unsigned NumSrc0Els = 16 - FirstIdx; SrcDemE = APInt(NumElts, 0); @@ -7808,7 +7800,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { bool IsLogical = false; - unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: @@ -7908,7 +7900,7 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode( return 1; unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { - unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td index af6cf340f8a3..d98bb886c185 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -507,11 +507,11 @@ def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs), // Signed and unsigned comparisons. def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ - unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned Type = N->getConstantOperandVal(2); return Type != SystemZICMP::UnsignedOnly; }]>; def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ - unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned Type = N->getConstantOperandVal(2); return Type != SystemZICMP::SignedOnly; }]>; diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp index 0267aefd1e91..0e41a2d7aa03 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp @@ -1101,10 +1101,10 @@ Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder, SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); - SyncScope::ID FenceSSID = static_cast<SyncScope::ID>( - cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + AtomicOrdering FenceOrdering = + static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); + SyncScope::ID FenceSSID = + static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); // VE uses Release consistency, so need a fence instruction if it is a // cross-thread fence. @@ -1766,7 +1766,7 @@ static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG, SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); switch (IntNo) { default: // Don't custom lower most intrinsics. return SDValue(); @@ -2937,8 +2937,8 @@ static bool isI32Insn(const SDNode *User, const SDNode *N) { if (User->getOperand(1).getNode() != N && User->getOperand(2).getNode() != N && isa<ConstantSDNode>(User->getOperand(3))) { - VECC::CondCode VECCVal = static_cast<VECC::CondCode>( - cast<ConstantSDNode>(User->getOperand(3))->getZExtValue()); + VECC::CondCode VECCVal = + static_cast<VECC::CondCode>(User->getConstantOperandVal(3)); return isIntVECondCode(VECCVal); } [[fallthrough]]; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index bc5f562d9589..051f6caa8c04 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -108,6 +108,8 @@ class X86AsmParser : public MCTargetAsmParser { // Does this instruction use apx extended register? bool UseApxExtendedReg = false; + // Is this instruction explicitly required not to update flags? + bool ForcedNoFlag = false; private: SMLoc consumeToken() { @@ -2312,8 +2314,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, // Drop the optional '.'. StringRef DotDispStr = Tok.getString(); - if (DotDispStr.starts_with(".")) - DotDispStr = DotDispStr.drop_front(1); + DotDispStr.consume_front("."); StringRef TrailingDot; // .Imm gets lexed as a real. @@ -3126,6 +3127,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, ForcedVEXEncoding = VEXEncoding_Default; ForcedDispEncoding = DispEncoding_Default; UseApxExtendedReg = false; + ForcedNoFlag = false; // Parse pseudo prefixes. while (true) { @@ -3150,6 +3152,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, ForcedDispEncoding = DispEncoding_Disp8; else if (Prefix == "disp32") ForcedDispEncoding = DispEncoding_Disp32; + else if (Prefix == "nf") + ForcedNoFlag = true; else return Error(NameLoc, "unknown prefix"); @@ -3997,6 +4001,8 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID)) return Match_Unsupported; + if (ForcedNoFlag != !!(MCID.TSFlags & X86II::EVEX_NF)) + return Match_Unsupported; if (ForcedVEXEncoding == VEXEncoding_EVEX && (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 59e2008f5632..347dc0d4ed43 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -1169,7 +1169,11 @@ static int getInstructionID(struct InternalInstruction *insn, attrMask |= ATTR_EVEXKZ; if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) attrMask |= ATTR_EVEXB; - if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) + // nf bit is the MSB of aaa + if (nfFromEVEX4of4(insn->vectorExtensionPrefix[3]) && + insn->opcodeType == MAP4) + attrMask |= ATTR_EVEXNF; + else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) attrMask |= ATTR_EVEXK; if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) attrMask |= ATTR_VEXL; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index decc45091941..4c7b1c094522 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -103,6 +103,7 @@ namespace X86Disassembler { #define bFromEVEX4of4(evex) bitFromOffset4(evex) #define v2FromEVEX4of4(evex) invertedBitFromOffset3(evex) #define aaaFromEVEX4of4(evex) threeBitsFromOffset0(evex) +#define nfFromEVEX4of4(evex) bitFromOffset2(evex) // These enums represent Intel registers for use by the decoder. #define REGS_8BIT \ diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index b0fcaef5f4b0..e006dd877360 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -870,7 +870,10 @@ enum : uint64_t { ExplicitVEXPrefix = 2ULL << ExplicitOpPrefixShift, /// For instructions that are promoted to EVEX space for EGPR. ExplicitEVEXPrefix = 3ULL << ExplicitOpPrefixShift, - ExplicitOpPrefixMask = 3ULL << ExplicitOpPrefixShift + ExplicitOpPrefixMask = 3ULL << ExplicitOpPrefixShift, + /// EVEX_NF - Set if this instruction has EVEX.NF field set. + EVEX_NFShift = ExplicitOpPrefixShift + 2, + EVEX_NF = 1ULL << EVEX_NFShift }; /// \returns true if the instruction with given opcode is a prefix. @@ -992,6 +995,12 @@ inline unsigned getOperandBias(const MCInstrDesc &Desc) { } } +/// \returns true if the instruction has a NDD (new data destination). +inline bool hasNewDataDest(uint64_t TSFlags) { + return (TSFlags & X86II::OpMapMask) == X86II::T_MAP4 && + (TSFlags & X86II::EVEX_B) && (TSFlags & X86II::VEX_4V); +} + /// \returns operand # for the first field of the memory operand or -1 if no /// memory operands. /// NOTE: This ignores tied operands. If there is a tied register which is @@ -1018,7 +1027,7 @@ inline int getMemoryOperandNo(uint64_t TSFlags) { return -1; case X86II::MRMDestMem: case X86II::MRMDestMemFSIB: - return 0; + return hasNewDataDest(TSFlags); case X86II::MRMSrcMem: case X86II::MRMSrcMemFSIB: // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp index cab2f0a2e1c1..1947313a9dfb 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -369,6 +369,9 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O, else if (Flags & X86::IP_HAS_REPEAT) O << "\trep\t"; + if (TSFlags & X86II::EVEX_NF) + O << "\t{nf}"; + // These all require a pseudo prefix if ((Flags & X86::IP_USE_VEX) || (TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitVEXPrefix) diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 9e1f1eb97e70..924956295e7c 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -251,6 +251,7 @@ public: void setAAA(const MCInst &MI, unsigned OpNum) { EVEX_aaa = getRegEncoding(MI, OpNum); } + void setNF(bool V) { EVEX_aaa |= V << 2; } X86OpcodePrefixHelper(const MCRegisterInfo &MRI) : W(0), R(0), X(0), B(0), M(0), R2(0), X2(0), B2(0), VEX_4V(0), VEX_L(0), @@ -987,9 +988,11 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, } Prefix.setW(TSFlags & X86II::REX_W); + Prefix.setNF(TSFlags & X86II::EVEX_NF); bool HasEVEX_K = TSFlags & X86II::EVEX_K; bool HasVEX_4V = TSFlags & X86II::VEX_4V; + bool IsND = X86II::hasNewDataDest(TSFlags); // IsND implies HasVEX_4V bool HasEVEX_RC = TSFlags & X86II::EVEX_RC; switch (TSFlags & X86II::OpMapMask) { @@ -1049,6 +1052,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, bool EncodeRC = false; uint8_t EVEX_rc = 0; + unsigned CurOp = X86II::getOperandBias(Desc); switch (TSFlags & X86II::FormMask) { @@ -1073,16 +1077,21 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, // MemAddr, src1(VEX_4V), src2(ModR/M) // MemAddr, src1(ModR/M), imm8 // + // NDD: + // dst(VEX_4V), MemAddr, src1(ModR/M) Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg); Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg); Prefix.setV2(MI, MemOperand + X86::AddrIndexReg, HasVEX_4V); + if (IsND) + Prefix.set4VV2(MI, CurOp++); + CurOp += X86::AddrNumOperands; if (HasEVEX_K) Prefix.setAAA(MI, CurOp++); - if (HasVEX_4V) + if (!IsND && HasVEX_4V) Prefix.set4VV2(MI, CurOp++); Prefix.setRR2(MI, CurOp++); @@ -1098,12 +1107,18 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, // // FMA4: // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(Imm[7:4]) + // + // NDD: + // dst(VEX_4V), src1(ModR/M), MemAddr + if (IsND) + Prefix.set4VV2(MI, CurOp++); + Prefix.setRR2(MI, CurOp++); if (HasEVEX_K) Prefix.setAAA(MI, CurOp++); - if (HasVEX_4V) + if (!IsND && HasVEX_4V) Prefix.set4VV2(MI, CurOp++); Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg); @@ -1160,12 +1175,17 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, // // FMA4: // dst(ModR/M.reg), src1(VEX_4V), src2(Imm[7:4]), src3(ModR/M), + // + // NDD: + // dst(VEX_4V), src1(ModR/M.reg), src2(ModR/M) + if (IsND) + Prefix.set4VV2(MI, CurOp++); Prefix.setRR2(MI, CurOp++); if (HasEVEX_K) Prefix.setAAA(MI, CurOp++); - if (HasVEX_4V) + if (!IsND && HasVEX_4V) Prefix.set4VV2(MI, CurOp++); Prefix.setBB2(MI, CurOp); @@ -1209,6 +1229,11 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + // + // NDD: + // dst(VEX_4V), src1(ModR/M), src2(ModR/M) + if (IsND) + Prefix.set4VV2(MI, CurOp++); Prefix.setBB2(MI, CurOp); Prefix.setX(MI, CurOp, 4); ++CurOp; @@ -1216,7 +1241,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, if (HasEVEX_K) Prefix.setAAA(MI, CurOp++); - if (HasVEX_4V) + if (!IsND && HasVEX_4V) Prefix.set4VV2(MI, CurOp++); Prefix.setRR2(MI, CurOp++); @@ -1508,6 +1533,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, unsigned OpcodeOffset = 0; + bool IsND = X86II::hasNewDataDest(TSFlags); + uint64_t Form = TSFlags & X86II::FormMask; switch (Form) { default: @@ -1576,6 +1603,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) ++SrcRegNum; + if (IsND) // Skip the NDD operand encoded in EVEX_VVVV + ++CurOp; emitRegModRMByte(MI.getOperand(CurOp), getX86RegNum(MI.getOperand(SrcRegNum)), CB); @@ -1602,6 +1631,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) ++SrcRegNum; + if (IsND) // Skip new data destination + ++CurOp; + bool ForceSIB = (Form == X86II::MRMDestMemFSIB); emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags, Kind, StartByte, CB, Fixups, STI, ForceSIB); @@ -1669,6 +1701,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, case X86II::MRMSrcMem: { unsigned FirstMemOp = CurOp + 1; + if (IsND) // Skip new data destination + CurOp++; + if (HasEVEX_K) // Skip writemask ++FirstMemOp; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td index 5fd6828f4312..e89ddcc570c9 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td @@ -1256,11 +1256,6 @@ def ProcessorFeatures { list<SubtargetFeature> SRFFeatures = !listconcat(ADLFeatures, SRFAdditionalFeatures); - // Grandridge - list<SubtargetFeature> GRRAdditionalFeatures = [FeatureRAOINT]; - list<SubtargetFeature> GRRFeatures = - !listconcat(SRFFeatures, GRRAdditionalFeatures); - // Arrowlake S list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16, FeatureSHA512, @@ -1706,10 +1701,10 @@ foreach P = ["goldmont_plus", "goldmont-plus"] in { } def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, ProcessorFeatures.TRMTuning>; -def : ProcModel<"sierraforest", AlderlakePModel, ProcessorFeatures.SRFFeatures, - ProcessorFeatures.TRMTuning>; -def : ProcModel<"grandridge", AlderlakePModel, ProcessorFeatures.GRRFeatures, +foreach P = ["sierraforest", "grandridge"] in { + def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures, ProcessorFeatures.TRMTuning>; +} // "Arrandale" along with corei3 and corei5 foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in { diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 77a997588c4f..73b10cf3067e 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -487,7 +487,7 @@ namespace { // from PatFrags in tablegen. bool isUnneededShiftMask(SDNode *N, unsigned Width) const { assert(N->getOpcode() == ISD::AND && "Unexpected opcode"); - const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + const APInt &Val = N->getConstantOperandAPInt(1); if (Val.countr_one() >= Width) return true; @@ -5233,7 +5233,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { break; case X86ISD::VPTERNLOG: { - uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue(); + uint8_t Imm = Node->getConstantOperandVal(3); if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), Imm)) return; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index 63bdf24d6b4f..1e4b1361f98a 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2267,6 +2267,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); } setOperationAction(ISD::FP_ROUND, MVT::v8bf16, Custom); addLegalFPImmediate(APFloat::getZero(APFloat::BFloat())); @@ -2282,6 +2284,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32bf16, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32bf16, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32bf16, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { @@ -3737,9 +3741,11 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, // type. This ensures they get CSE'd. But if the integer type is not // available, use a floating-point +0.0 instead. SDValue Vec; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!Subtarget.hasSSE2() && VT.is128BitVector()) { Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); - } else if (VT.isFloatingPoint()) { + } else if (VT.isFloatingPoint() && + TLI.isTypeLegal(VT.getVectorElementType())) { Vec = DAG.getConstantFP(+0.0, dl, VT); } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && @@ -31752,7 +31758,7 @@ static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) { static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + unsigned IsData = Op.getConstantOperandVal(4); // We don't support non-data prefetch without PREFETCHI. // Just preserve the chain. diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp index 6c23928228d2..9aa70dff5f93 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -135,8 +135,7 @@ bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, int64_t D = static_cast<int64_t>(S_V.second); unsigned IID = 0; for (const auto &HintType : HintTypes) { - if (Name.starts_with(HintType.first)) { - Name = Name.drop_front(HintType.first.size()); + if (Name.consume_front(HintType.first)) { IID = HintType.second; break; } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td index 7f3e193d9a1b..c47bee070e04 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td @@ -14,35 +14,45 @@ //===----------------------------------------------------------------------===// // AMX instructions -let Predicates = [HasAMXTILE, In64BitMode] in { - let SchedRW = [WriteSystem] in { - let hasSideEffects = 1, - Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in - def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src), - "ldtilecfg\t$src", - [(int_x86_ldtilecfg addr:$src)]>, VEX, T8; - let hasSideEffects = 1 in - def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src), - "sttilecfg\t$src", - [(int_x86_sttilecfg addr:$src)]>, VEX, T8, PD; - let mayLoad = 1 in - def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), - (ins sibmem:$src), - "tileloadd\t{$src, $dst|$dst, $src}", []>, - VEX, T8, XD; - let mayLoad = 1 in - def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), - (ins sibmem:$src), - "tileloaddt1\t{$src, $dst|$dst, $src}", []>, - VEX, T8, PD; +multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> { +let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in { + let hasSideEffects = 1, + Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in + def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src), + "ldtilecfg\t$src", + [(int_x86_ldtilecfg addr:$src)]>, + T8, PS; + let hasSideEffects = 1 in + def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src), + "sttilecfg\t$src", + [(int_x86_sttilecfg addr:$src)]>, + T8, PD; + let mayLoad = 1 in + def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), + (ins sibmem:$src), + "tileloadd\t{$src, $dst|$dst, $src}", []>, + T8, XD; + let mayLoad = 1 in + def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), + (ins sibmem:$src), + "tileloaddt1\t{$src, $dst|$dst, $src}", []>, + T8, PD; + let mayStore = 1 in + def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs), + (ins sibmem:$dst, TILE:$src), + "tilestored\t{$src, $dst|$dst, $src}", []>, + T8, XS; +} +} + +let SchedRW = [WriteSystem] in { + defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX; + defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8; + + let Predicates = [HasAMXTILE, In64BitMode] in { let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in def TILERELEASE : I<0x49, MRM_C0, (outs), (ins), - "tilerelease", [(int_x86_tilerelease)]>, VEX, T8; - let mayStore = 1 in - def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs), - (ins sibmem:$dst, TILE:$src), - "tilestored\t{$src, $dst|$dst, $src}", []>, - VEX, T8, XS; + "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS; def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins), "tilezero\t$dst", []>, VEX, T8, XD; @@ -82,8 +92,8 @@ let Predicates = [HasAMXTILE, In64BitMode] in { def PTILEZERO : PseudoI<(outs), (ins u8imm:$src), [(int_x86_tilezero timm:$src)]>; } - } // SchedRW -} // HasAMXTILE + } // Predicates +} // SchedRW let Predicates = [HasAMXINT8, In64BitMode] in { let SchedRW = [WriteSystem] in { diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td index 7c3c1d5fe42b..c3a673f97d34 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1447,6 +1447,17 @@ def : Pat<(vselect_mask VK8WM:$mask, (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; } +let Predicates = [HasBF16] in { + def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)), + (VBROADCASTF64X4rm addr:$src)>; + def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTF32X4rm addr:$src)>; +} + +let Predicates = [HasBF16, HasVLX] in + def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTF32X4Z256rm addr:$src)>; + let Predicates = [HasVLX, HasDQI] in { defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td index 936db48bb9df..6b0c1b8c28c9 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -44,591 +44,298 @@ def PLEA32r : PseudoI<(outs GR32:$dst), (ins anymem:$src), []>; def PLEA64r : PseudoI<(outs GR64:$dst), (ins anymem:$src), []>; } -// BinOpRR - Instructions that read "reg, reg". -class BinOpRR<bits<8> o, string m, X86TypeInfo t, dag out, list<dag> p> - : ITy<o, MRMDestReg, t, out, (ins t.RegClass:$src1, t.RegClass:$src2), m, - binop_args, p>, Sched<[WriteALU]>; -// BinOpRR_F - Instructions that read "reg, reg" and write EFLAGS only. -class BinOpRR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpRR<o, m, t, (outs), - [(set EFLAGS, (node t.RegClass:$src1, t.RegClass:$src2))]>, - DefEFLAGS; -// BinOpRR_F_Rev - Reversed encoding of BinOpRR_F -class BinOpRR_F_Rev<bits<8> o, string m, X86TypeInfo t> - : BinOpRR_F<o, m, t, null_frag>, DisassembleOnly { - let Form = MRMSrcReg; -} -// BinOpRR_RF - Instructions that read "reg, reg", and write "reg", EFLAGS. -class BinOpRR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpRR<o, m, t, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, - (node t.RegClass:$src1, t.RegClass:$src2))]>, DefEFLAGS; -// BinOpRR_RF_Rev - Reversed encoding of BinOpRR_RF. -class BinOpRR_RF_Rev<bits<8> o, string m, X86TypeInfo t> - : BinOpRR_RF<o, m, t, null_frag>, DisassembleOnly { - let Form = MRMSrcReg; -} -// BinOpRRF_RF - Instructions that read "reg, reg", write "reg" and read/write -// EFLAGS. -class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpRR<o, m, t, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, - (node t.RegClass:$src1, t.RegClass:$src2, - EFLAGS))]>, DefEFLAGS, UseEFLAGS { - let SchedRW = [WriteADC]; -} -// BinOpRRF_RF_Rev - Reversed encoding of BinOpRRF_RF -class BinOpRRF_RF_Rev<bits<8> o, string m, X86TypeInfo t> - : BinOpRRF_RF<o, m, t, null_frag>, DisassembleOnly { - let Form = MRMSrcReg; -} - -// BinOpRM - Instructions that read "reg, [mem]". -class BinOpRM<bits<8> o, string m, X86TypeInfo t, dag out, list<dag> p> - : ITy<o, MRMSrcMem, t, out, (ins t.RegClass:$src1, t.MemOperand:$src2), m, - binop_args, p>, - Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]> { - let mayLoad = 1; -} -// BinOpRM_F - Instructions that read "reg, [mem]" and write EFLAGS only. -class BinOpRM_F<bits<8> o, string m, X86TypeInfo t, SDNode node> - : BinOpRM<o, m, t, (outs), - [(set EFLAGS, (node t.RegClass:$src1, - (t.LoadNode addr:$src2)))]>, DefEFLAGS; -// BinOpRM_RF - Instructions that read "reg, reg", and write "reg", EFLAGS. -class BinOpRM_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpRM<o, m, t, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, - (t.LoadNode addr:$src2)))]>, DefEFLAGS; -// BinOpRMF_RF - Instructions that read "reg, [mem]", write "reg" and read/write -// EFLAGS. -class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpRM<o, m, t, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, - (node t.RegClass:$src1, (t.LoadNode addr:$src2), EFLAGS))]>, - DefEFLAGS, UseEFLAGS { - let SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold, - // base, scale, index, offset, segment. - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - // implicit register read. - WriteADC.ReadAfterFold]; -} - -// BinOpRI - Instructions that read "reg, imm". -class BinOpRI<bits<8> o, string m, X86TypeInfo t, Format f, dag out, list<dag> p> - : ITy<o, f, t, out, (ins t.RegClass:$src1, t.ImmOperand:$src2), m, - binop_args, p>, Sched<[WriteALU]> { - let ImmT = t.ImmEncoding; -} -// BinOpRI_F - Instructions that read "reg, imm" and write EFLAGS only. -class BinOpRI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, - Format f> - : BinOpRI<o, m, t, f, (outs), - [(set EFLAGS, (node t.RegClass:$src1, - t.ImmOperator:$src2))]>, DefEFLAGS; -// BinOpRI_RF - Instructions that read "reg, imm" and write "reg", EFLAGS. -class BinOpRI_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> - : BinOpRI<o, m, t, f, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, - (node t.RegClass:$src1, t.ImmOperator:$src2))]>, DefEFLAGS; -// BinOpRIF_RF - Instructions that read "reg, imm", write "reg" and read/write -// EFLAGS. -class BinOpRIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> - : BinOpRI<o, m, t, f, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, - (node t.RegClass:$src1, t.ImmOperator:$src2, - EFLAGS))]>, DefEFLAGS, UseEFLAGS { - let SchedRW = [WriteADC]; -} -// BinOpRI8 - Instructions that read "reg, imm8". -class BinOpRI8<bits<8> o, string m, X86TypeInfo t, Format f, dag out> - : ITy<o, f, t, out, (ins t.RegClass:$src1, t.Imm8Operand:$src2), m, - binop_args, []>, Sched<[WriteALU]> { - let ImmT = Imm8; -} -// BinOpRI8_F - Instructions that read "reg, imm8" and write EFLAGS only. -class BinOpRI8_F<bits<8> o, string m, X86TypeInfo t, Format f> - : BinOpRI8<o, m, t, f, (outs)>, DefEFLAGS; -// BinOpRI8_RF - Instructions that read "reg, imm8" and write "reg", EFLAGS. -class BinOpRI8_RF<bits<8> o, string m, X86TypeInfo t, Format f> - : BinOpRI8<o, m, t, f, (outs t.RegClass:$dst)>, DefEFLAGS; -// BinOpRI8F_RF - Instructions that read "reg, imm", write "reg" and read/write -// EFLAGS. -class BinOpRI8F_RF<bits<8> o, string m, X86TypeInfo t, Format f> - : BinOpRI8<o, m, t, f, (outs t.RegClass:$dst)>, DefEFLAGS, UseEFLAGS { - let SchedRW = [WriteADC]; -} - -// BinOpMR - Instructions that read "[mem], reg". -class BinOpMR<bits<8> o, string m, X86TypeInfo t, list<dag> p> - : ITy<o, MRMDestMem, t, (outs), (ins t.MemOperand:$src1, t.RegClass:$src2), m, - binop_args, p> { - let mayLoad = 1; -} -// BinOpMR_F - Instructions that read "[mem], imm8" and write EFLAGS only. -class BinOpMR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpMR<o, m, t, - [(set EFLAGS, (node (t.LoadNode addr:$src1), t.RegClass:$src2))]>, - Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>, DefEFLAGS; -// BinOpMR_MF - Instructions that read "[mem], reg" and write "[mem]", EFLAGS. -class BinOpMR_MF<bits<8> o, string m, X86TypeInfo t, SDNode node> - : BinOpMR<o, m, t, - [(store (node (load addr:$src1), t.RegClass:$src2), addr:$src1), - (implicit EFLAGS)]>, - Sched<[WriteALURMW, - // base, scale, index, offset, segment - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - WriteALU.ReadAfterFold]>, // reg - DefEFLAGS { - let mayStore = 1; -} -// BinOpMRF_MF - Instructions that read "[mem], reg", write "[mem]" and -// read/write EFLAGS. -class BinOpMRF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node> - : BinOpMR<o, m, t, - [(store (node (load addr:$src1), t.RegClass:$src2, EFLAGS), - addr:$src1), (implicit EFLAGS)]>, - Sched<[WriteADCRMW, - // base, scale, index, offset, segment - ReadDefault, ReadDefault, ReadDefault, - ReadDefault, ReadDefault, - WriteALU.ReadAfterFold, // reg - WriteALU.ReadAfterFold]>, // EFLAGS - DefEFLAGS, UseEFLAGS { - let mayStore = 1; -} - -// BinOpMI - Instructions that read "[mem], imm". -class BinOpMI<bits<8> o, string m, X86TypeInfo t, Format f, list<dag> p> - : ITy<o, f, t, (outs), (ins t.MemOperand:$src1, t.ImmOperand:$src2), m, - binop_args, p> { - let ImmT = t.ImmEncoding; - let mayLoad = 1; -} -// BinOpMI_F - Instructions that read "[mem], imm" and write EFLAGS only. -class BinOpMI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, - Format f> - : BinOpMI<o, m, t, f, - [(set EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>, - Sched<[WriteALU.Folded]>, DefEFLAGS; -// BinOpMI_MF - Instructions that read "[mem], imm" and write "[mem]", EFLAGS. -class BinOpMI_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> - : BinOpMI<o, m, t, f, - [(store (node (t.VT (load addr:$src1)), - t.ImmOperator:$src2), addr:$src1), (implicit EFLAGS)]>, - Sched<[WriteALURMW]>, DefEFLAGS { - let mayStore = 1; -} -// BinOpMIF_MF - Instructions that read "[mem], imm", write "[mem]" and -// read/write EFLAGS. -class BinOpMIF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> - : BinOpMI<o, m, t, f, - [(store (node (t.VT (load addr:$src1)), - t.ImmOperator:$src2, EFLAGS), addr:$src1), (implicit EFLAGS)]>, - Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS { - let mayStore = 1; -} - -// BinOpMI8 - Instructions that read "[mem], imm8". -class BinOpMI8<string m, X86TypeInfo t, Format f> - : ITy<0x83, f, t, (outs), (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m, - binop_args, []> { - let ImmT = Imm8; - let mayLoad = 1; -} -// BinOpMI8_F - Instructions that read "[mem], imm8" and write EFLAGS only. -class BinOpMI8_F<string m, X86TypeInfo t, Format f> - : BinOpMI8<m, t, f>, Sched<[WriteALU.Folded]>, DefEFLAGS; -// BinOpMI8_MF - Instructions that read "[mem], imm8" and write "[mem]", EFLAGS. -class BinOpMI8_MF<string m, X86TypeInfo t, Format f> - : BinOpMI8<m, t, f>, Sched<[WriteALURMW]>, DefEFLAGS { - let mayStore = 1; -} -// BinOpMI8F_MF - Instructions that read "[mem], imm8", write "[mem]" and -// read/write EFLAGS. -class BinOpMI8F_MF<string m, X86TypeInfo t, Format f> - : BinOpMI8<m, t, f>, Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS { - let mayStore = 1; -} - -// BinOpAI - Instructions that read "a-reg imm" (Accumulator register). -class BinOpAI<bits<8> o, string m, X86TypeInfo t, Register areg, string args> - : ITy<o, RawFrm, t, (outs), (ins t.ImmOperand:$src), m, args, []>, - Sched<[WriteALU]> { - let ImmT = t.ImmEncoding; - let Uses = [areg]; -} -// BinOpAI_F - Instructions that read "a-reg imm" and write EFLAGS only. -class BinOpAI_F<bits<8> o, string m, X86TypeInfo t, Register areg, string args> - : BinOpAI<o, m, t, areg, args>, DefEFLAGS; - -// BinOpAI_AF - Instructions that read "a-reg imm" and write a-reg/EFLAGS. -class BinOpAI_AF<bits<8> o, string m, X86TypeInfo t, Register areg, - string args> : BinOpAI<o, m, t, areg, args> { - let Defs = [areg, EFLAGS]; -} -// BinOpAIF_AF - Instructions that read "a-reg imm", write a-reg and read/write -// EFLAGS. -class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg, - string args> : BinOpAI<o, m, t, areg, args> { - let Uses = [areg, EFLAGS]; - let Defs = [areg, EFLAGS]; - let SchedRW = [WriteADC]; +//===----------------------------------------------------------------------===// +// MUL/IMUL and DIV/IDIV Instructions +// +class MulDivOpR<bits<8> o, Format f, string m, X86TypeInfo t, + X86FoldableSchedWrite sched, list<dag> p> + : UnaryOpR<o, f, m, "$src1", t, (outs), p> { + let SchedRW = [sched]; } -// UnaryOpR - Instructions that read "reg" and write "reg". -class UnaryOpR<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p> - : ITy<o, f, t, (outs t.RegClass:$dst), - (ins t.RegClass:$src1), m, "$dst", p>, Sched<[WriteALU]>; - -// UnaryOpM - Instructions that read "[mem]" and writes "[mem]". -class UnaryOpM<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p> - : ITy<o, f, t, (outs), (ins t.MemOperand:$dst), m, "$dst", p>, - Sched<[WriteALURMW]> { - let mayLoad = 1; - let mayStore = 1; +class MulDivOpM<bits<8> o, Format f, string m, X86TypeInfo t, + X86FoldableSchedWrite sched, list<dag> p> + : UnaryOpM<o, f, m, "$src1", t, (outs), p> { + let SchedRW = + [sched.Folded, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Register reads (implicit or explicit). + sched.ReadAfterFold, sched.ReadAfterFold]; } -// INCDECR - Instructions like "inc reg". -class INCDECR<Format f, string m, X86TypeInfo t, SDPatternOperator node> - : UnaryOpR<0xFF, f, m, t, - [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, 1))]>, - DefEFLAGS { - let isConvertibleToThreeAddress = 1; // Can xform into LEA. +multiclass Mul<bits<8> o, string m, Format RegMRM, Format MemMRM, SDPatternOperator node> { + // AL is really implied by AX, but the registers in Defs must match the + // SDNode results (i8, i32). + // + // FIXME: Used for 8-bit mul, ignore result upper 8 bits. + // This probably ought to be moved to a def : Pat<> if the + // syntax can be accepted. + let Defs = [AL,EFLAGS,AX], Uses = [AL] in + def 8r : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8, + [(set AL, (node AL, GR8:$src1)), (implicit EFLAGS)]>; + let Defs = [AX,DX,EFLAGS], Uses = [AX] in + def 16r : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, OpSize16; + let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in + def 32r : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, OpSize32; + let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in + def 64r : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>; + let Defs = [AL,EFLAGS,AX], Uses = [AL] in + def 8m : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8, + [(set AL, (node AL, (loadi8 addr:$src1))), (implicit EFLAGS)]>; + let Defs = [AX,DX,EFLAGS], Uses = [AX] in + def 16m : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, OpSize16; + let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in + def 32m : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, OpSize32; + let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in + def 64m : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, Requires<[In64BitMode]>; } -// INCDECM - Instructions like "inc [mem]". -class INCDECM<Format f, string m, X86TypeInfo t, int num> - : UnaryOpM<0xFF, f, m, t, - [(store (add (t.LoadNode addr:$dst), num), addr:$dst), - (implicit EFLAGS)]>, DefEFLAGS; - -// INCDECR_ALT - Instructions like "inc reg" short forms. -class INCDECR_ALT<bits<8> o, string m, X86TypeInfo t> - : UnaryOpR<o, AddRegFrm, m, t, []>, DefEFLAGS { - // Short forms only valid in 32-bit mode. Selected during MCInst lowering. - let Predicates = [Not64BitMode]; +defm MUL : Mul<0xF7, "mul", MRM4r, MRM4m, mul>; +defm IMUL : Mul<0xF7, "imul", MRM5r, MRM5m, null_frag>; + +multiclass Div<bits<8> o, string m, Format RegMRM, Format MemMRM> { + defvar sched8 = !if(!eq(m, "div"), WriteDiv8, WriteIDiv8); + defvar sched16 = !if(!eq(m, "div"), WriteDiv16, WriteIDiv16); + defvar sched32 = !if(!eq(m, "div"), WriteDiv32, WriteIDiv32); + defvar sched64 = !if(!eq(m, "div"), WriteDiv64, WriteIDiv64); + let Defs = [AL,AH,EFLAGS], Uses = [AX] in + def 8r : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>; + let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in + def 16r : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, OpSize16; + let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in + def 32r : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, OpSize32; + let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in + def 64r : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>; + let Defs = [AL,AH,EFLAGS], Uses = [AX] in + def 8m : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>; + let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in + def 16m : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, OpSize16; + let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in + def 32m : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, OpSize32; + let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in + def 64m : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, Requires<[In64BitMode]>; } - -// MulOpR - Instructions like "mul reg". -class MulOpR<bits<8> o, Format f, string m, X86TypeInfo t, - X86FoldableSchedWrite sched, list<dag> p> - : ITy<o, f, t, (outs), (ins t.RegClass:$src), m, "$src", p>, Sched<[sched]>; - -// MulOpM - Instructions like "mul [mem]". -class MulOpM<bits<8> o, Format f, string m, X86TypeInfo t, - X86FoldableSchedWrite sched, list<dag> p> - : ITy<o, f, t, (outs), (ins t.MemOperand:$src), m, - "$src", p>, SchedLoadReg<sched> { - let mayLoad = 1; +let hasSideEffects = 1 in { // so that we don't speculatively execute +defm DIV: Div<0xF7, "div", MRM6r, MRM6m>; +defm IDIV: Div<0xF7, "idiv", MRM7r, MRM7m>; } -// NegOpR - Instructions like "neg reg". -class NegOpR<bits<8> o, string m, X86TypeInfo t> - : UnaryOpR<o, MRM3r, m, t, - [(set t.RegClass:$dst, (ineg t.RegClass:$src1)), - (implicit EFLAGS)]>, DefEFLAGS; - -// NegOpM - Instructions like "neg [mem]". -class NegOpM<bits<8> o, string m, X86TypeInfo t> - : UnaryOpM<o, MRM3m, m, t, - [(store (ineg (t.LoadNode addr:$dst)), addr:$dst), - (implicit EFLAGS)]>, DefEFLAGS; - -// NOTE: NOT does not set EFLAGS! -// NotOpR - Instructions like "not reg". -class NotOpR<bits<8> o, string m, X86TypeInfo t> - : UnaryOpR<o, MRM2r, m, t, [(set t.RegClass:$dst, (not t.RegClass:$src1))]>; - -// NotOpM - Instructions like "neg [mem]". -class NotOpM<bits<8> o, string m, X86TypeInfo t> - : UnaryOpM<o, MRM2m, m, t, - [(store (not (t.LoadNode addr:$dst)), addr:$dst)]>; - -// IMulOpRR - Instructions like "imul reg, reg, i8". -class IMulOpRR<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched> - : BinOpRR_RF<o, m, t, X86smul_flag>, TB { +class IMulOpRR<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRR_RF<0xAF, "imul", t, X86smul_flag>, TB { let Form = MRMSrcReg; let SchedRW = [sched]; // X = IMUL Y, Z --> X = IMUL Z, Y let isCommutable = 1; } - -// IMulOpRM - Instructions like "imul reg, reg, [mem]". -class IMulOpRM<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched> - : BinOpRM_RF<o, m, t, X86smul_flag>, TB { +class IMulOpRM<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRM_RF<0xAF, "imul", t, X86smul_flag>, TB { let Form = MRMSrcMem; let SchedRW = [sched.Folded, sched.ReadAfterFold]; } -// IMulOpRRI8 - Instructions like "imul reg, reg, i8". -class IMulOpRRI8<bits<8> o, string m, X86TypeInfo t, - X86FoldableSchedWrite sched> - : ITy<o, MRMSrcReg, t, (outs t.RegClass:$dst), - (ins t.RegClass:$src1, t.Imm8Operand:$src2), m, - "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched]>, DefEFLAGS { - let ImmT = Imm8; -} +def IMUL16rr : IMulOpRR<Xi16, WriteIMul16Reg>, OpSize16; +def IMUL32rr : IMulOpRR<Xi32, WriteIMul32Reg>, OpSize32; +def IMUL64rr : IMulOpRR<Xi64, WriteIMul64Reg>; +def IMUL16rm : IMulOpRM<Xi16, WriteIMul16Reg>, OpSize16; +def IMUL32rm : IMulOpRM<Xi32, WriteIMul32Reg>, OpSize32; +def IMUL64rm : IMulOpRM<Xi64, WriteIMul64Reg>; -// IMulOpRRI - Instructions like "imul reg, reg, i16/i32/i64". -class IMulOpRRI<bits<8> o, string m, X86TypeInfo t, - X86FoldableSchedWrite sched> - : ITy<o, MRMSrcReg, t, (outs t.RegClass:$dst), - (ins t.RegClass:$src1, t.ImmOperand:$src2), m, - "{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set t.RegClass:$dst, EFLAGS, (X86smul_flag t.RegClass:$src1, - t.ImmNoSuOperator:$src2))]>, - Sched<[sched]>, DefEFLAGS { - let ImmT = t.ImmEncoding; +class IMulOpRI8_R<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRI8<0x6B, "imul", binop_ndd_args, t, MRMSrcReg, + (outs t.RegClass:$dst)>, DefEFLAGS { + let SchedRW = [sched]; } - -// IMulOpRMI8 - Instructions like "imul reg, [mem], i8". -class IMulOpRMI8<bits<8> o, string m, X86TypeInfo t, - X86FoldableSchedWrite sched> - : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst), - (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m, - "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched.Folded]>, +class IMulOpRI_R<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRI<0x69, "imul", binop_ndd_args, t, MRMSrcReg, + (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (X86smul_flag t.RegClass:$src1, + t.ImmNoSuOperator:$src2))]>, DefEFLAGS { + let SchedRW = [sched]; +} +class IMulOpMI8_R<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpMI8<"imul", binop_ndd_args, t, MRMSrcMem, (outs t.RegClass:$dst)>, DefEFLAGS { - let ImmT = Imm8; - let mayLoad = 1; + let Opcode = 0x6B; + let SchedRW = [sched.Folded]; } - -// IMulOpRMI - Instructions like "imul reg, [mem], i16/i32/i64". -class IMulOpRMI<bits<8> o, string m, X86TypeInfo t, - X86FoldableSchedWrite sched> - : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst), - (ins t.MemOperand:$src1, t.ImmOperand:$src2), m, - "{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set t.RegClass:$dst, EFLAGS, - (X86smul_flag (t.LoadNode addr:$src1), t.ImmNoSuOperator:$src2))]>, - Sched<[sched.Folded]>, DefEFLAGS { - let ImmT = t.ImmEncoding; +class IMulOpMI_R<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpMI<0x69, "imul", binop_ndd_args, t, MRMSrcMem, + (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (X86smul_flag (t.LoadNode addr:$src1), + t.ImmNoSuOperator:$src2))]>, + DefEFLAGS { + let SchedRW = [sched.Folded]; } +def IMUL16rri8 : IMulOpRI8_R<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rri8 : IMulOpRI8_R<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rri8 : IMulOpRI8_R<Xi64, WriteIMul64Imm>; +def IMUL16rri : IMulOpRI_R<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rri : IMulOpRI_R<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rri32 : IMulOpRI_R<Xi64, WriteIMul64Imm>; + +def IMUL16rmi8 : IMulOpMI8_R<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rmi8 : IMulOpMI8_R<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rmi8 : IMulOpMI8_R<Xi64, WriteIMul64Imm>; +def IMUL16rmi : IMulOpMI_R<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rmi : IMulOpMI_R<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rmi32 : IMulOpMI_R<Xi64, WriteIMul64Imm>; -let Constraints = "$src1 = $dst" in { -def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>, OpSize16; -def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>, OpSize32; -def INC8r : INCDECR<MRM0r, "inc", Xi8, X86add_flag_nocf>; -def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>, OpSize16; -def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>, OpSize32; -def INC64r : INCDECR<MRM0r, "inc", Xi64, X86add_flag_nocf>; - -def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>, OpSize16; -def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>, OpSize32; -def DEC8r : INCDECR<MRM1r, "dec", Xi8, X86sub_flag_nocf>; -def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>, OpSize16; -def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>, OpSize32; -def DEC64r : INCDECR<MRM1r, "dec", Xi64, X86sub_flag_nocf>; +//===----------------------------------------------------------------------===// +// INC and DEC Instructions +// +class IncOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM0r, "inc", t, null_frag> { + let Pattern = [(set t.RegClass:$dst, EFLAGS, + (X86add_flag_nocf t.RegClass:$src1, 1))]; +} +class DecOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM1r, "dec", t, null_frag> { + let Pattern = [(set t.RegClass:$dst, EFLAGS, + (X86sub_flag_nocf t.RegClass:$src1, 1))]; +} +class IncOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM0m, "inc", t, null_frag> { + let Pattern = [(store (add (t.LoadNode addr:$src1), 1), addr:$src1), + (implicit EFLAGS)]; +} +class DecOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM1m, "dec", t, null_frag> { + let Pattern = [(store (add (t.LoadNode addr:$src1), -1), addr:$src1), + (implicit EFLAGS)]; +} +// IncDec_Alt - Instructions like "inc reg" short forms. +// Short forms only valid in 32-bit mode. Selected during MCInst lowering. +class IncDec_Alt<bits<8> o, string m, X86TypeInfo t> + : UnaryOpR_RF<o, AddRegFrm, m, t, null_frag>, Requires<[Not64BitMode]>; + +let isConvertibleToThreeAddress = 1 in { +def INC16r_alt : IncDec_Alt<0x40, "inc", Xi16>, OpSize16; +def INC32r_alt : IncDec_Alt<0x40, "inc", Xi32>, OpSize32; +def DEC16r_alt : IncDec_Alt<0x48, "dec", Xi16>, OpSize16; +def DEC32r_alt : IncDec_Alt<0x48, "dec", Xi32>, OpSize32; +def INC8r : IncOpR_RF<Xi8>; +def INC16r : IncOpR_RF<Xi16>, OpSize16; +def INC32r : IncOpR_RF<Xi32>, OpSize32; +def INC64r : IncOpR_RF<Xi64>; +def DEC8r : DecOpR_RF<Xi8>; +def DEC16r : DecOpR_RF<Xi16>, OpSize16; +def DEC32r : DecOpR_RF<Xi32>, OpSize32; +def DEC64r : DecOpR_RF<Xi64>; } - let Predicates = [UseIncDec] in { -def INC8m : INCDECM<MRM0m, "inc", Xi8, 1>; -def INC16m : INCDECM<MRM0m, "inc", Xi16, 1>, OpSize16; -def INC32m : INCDECM<MRM0m, "inc", Xi32, 1>, OpSize32; -def DEC8m : INCDECM<MRM1m, "dec", Xi8, -1>; -def DEC16m : INCDECM<MRM1m, "dec", Xi16, -1>, OpSize16; -def DEC32m : INCDECM<MRM1m, "dec", Xi32, -1>, OpSize32; +def INC8m : IncOpM_M<Xi8>; +def INC16m : IncOpM_M<Xi16>, OpSize16; +def INC32m : IncOpM_M<Xi32>, OpSize32; +def DEC8m : DecOpM_M<Xi8>; +def DEC16m : DecOpM_M<Xi16>, OpSize16; +def DEC32m : DecOpM_M<Xi32>, OpSize32; } let Predicates = [UseIncDec, In64BitMode] in { -def INC64m : INCDECM<MRM0m, "inc", Xi64, 1>; -def DEC64m : INCDECM<MRM1m, "dec", Xi64, -1>; +def INC64m : IncOpM_M<Xi64>; +def DEC64m : DecOpM_M<Xi64>; } -// Extra precision multiplication - -// AL is really implied by AX, but the registers in Defs must match the -// SDNode results (i8, i32). -// AL,AH = AL*GR8 -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def MUL8r : MulOpR<0xF6, MRM4r, "mul", Xi8, WriteIMul8, - // FIXME: Used for 8-bit mul, ignore result upper 8 bits. - // This probably ought to be moved to a def : Pat<> if the - // syntax can be accepted. - [(set AL, (mul AL, GR8:$src)), (implicit EFLAGS)]>; -// AX,DX = AX*GR16 -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def MUL16r : MulOpR<0xF7, MRM4r, "mul", Xi16, WriteIMul16, []>, OpSize16; -// EAX,EDX = EAX*GR32 -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def MUL32r : MulOpR<0xF7, MRM4r, "mul", Xi32, WriteIMul32, - [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>, OpSize32; -// RAX,RDX = RAX*GR64 -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def MUL64r : MulOpR<0xF7, MRM4r, "mul", Xi64, WriteIMul64, - [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>; -// AL,AH = AL*[mem8] -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def MUL8m : MulOpM<0xF6, MRM4m, "mul", Xi8, WriteIMul8, - // FIXME: Used for 8-bit mul, ignore result upper 8 bits. - // This probably ought to be moved to a def : Pat<> if the - // syntax can be accepted. - [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)]>; -// AX,DX = AX*[mem16] -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def MUL16m : MulOpM<0xF7, MRM4m, "mul", Xi16, WriteIMul16, []>, OpSize16; -// EAX,EDX = EAX*[mem32] -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def MUL32m : MulOpM<0xF7, MRM4m, "mul", Xi32, WriteIMul32, []>, OpSize32; -// RAX,RDX = RAX*[mem64] -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def MUL64m : MulOpM<0xF7, MRM4m, "mul", Xi64, WriteIMul64, []>, - Requires<[In64BitMode]>; - -// AL,AH = AL*GR8 -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8r : MulOpR<0xF6, MRM5r, "imul", Xi8, WriteIMul8, []>; -// AX,DX = AX*GR16 -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16r : MulOpR<0xF7, MRM5r, "imul", Xi16, WriteIMul16, []>, OpSize16; -// EAX,EDX = EAX*GR32 -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32r : MulOpR<0xF7, MRM5r, "imul", Xi32, WriteIMul32, []>, OpSize32; -// RAX,RDX = RAX*GR64 -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def IMUL64r : MulOpR<0xF7, MRM5r, "imul", Xi64, WriteIMul64, []>; - -// AL,AH = AL*[mem8] -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8m : MulOpM<0xF6, MRM5m, "imul", Xi8, WriteIMul8, []>; -// AX,DX = AX*[mem16] -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16m : MulOpM<0xF7, MRM5m, "imul", Xi16, WriteIMul16, []>, OpSize16; -// EAX,EDX = EAX*[mem32] -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32m : MulOpM<0xF7, MRM5m, "imul", Xi32, WriteIMul32, []>, OpSize32; -// RAX,RDX = RAX*[mem64] -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def IMUL64m : MulOpM<0xF7, MRM5m, "imul", Xi64, WriteIMul64, []>, - Requires<[In64BitMode]>; - -let Constraints = "$src1 = $dst" in { -// Register-Register Signed Integer Multiply -def IMUL16rr : IMulOpRR<0xAF, "imul", Xi16, WriteIMul16Reg>, OpSize16; -def IMUL32rr : IMulOpRR<0xAF, "imul", Xi32, WriteIMul32Reg>, OpSize32; -def IMUL64rr : IMulOpRR<0xAF, "imul", Xi64, WriteIMul64Reg>; - -// Register-Memory Signed Integer Multiply -def IMUL16rm : IMulOpRM<0xAF, "imul", Xi16, WriteIMul16Reg>, OpSize16; -def IMUL32rm : IMulOpRM<0xAF, "imul", Xi32, WriteIMul32Reg>, OpSize32; -def IMUL64rm : IMulOpRM<0xAF, "imul", Xi64, WriteIMul64Reg>; +//===----------------------------------------------------------------------===// +// NEG and NOT Instructions +// +class NegOpR_R<X86TypeInfo t, bit ndd = 0> + : UnaryOpR_R<0xF7, MRM3r, "neg", t, ineg, ndd>; +class NegOpR_RF<X86TypeInfo t, bit ndd = 0> + : UnaryOpR_RF<0xF7, MRM3r, "neg", t, ineg, ndd>; +class NegOpM_M<X86TypeInfo t> : UnaryOpM_M<0xF7, MRM3m, "neg", t, null_frag>; +class NegOpM_MF<X86TypeInfo t> : UnaryOpM_MF<0xF7, MRM3m, "neg", t, ineg>; +class NegOpM_R<X86TypeInfo t> : UnaryOpM_R<0xF7, MRM3m, "neg", t, null_frag>; +class NegOpM_RF<X86TypeInfo t> : UnaryOpM_RF<0xF7, MRM3m, "neg", t, ineg>; + +class NotOpR_R<X86TypeInfo t, bit ndd = 0> + : UnaryOpR_R<0xF7, MRM2r, "not", t, not, ndd>; +class NotOpM_M<X86TypeInfo t> : UnaryOpM_M<0xF7, MRM2m, "not", t, not>; +class NotOpM_R<X86TypeInfo t> : UnaryOpM_R<0xF7, MRM2m, "not", t, not>; + +let Predicates = [NoNDD] in { +def NEG8r : NegOpR_RF<Xi8>; +def NEG16r : NegOpR_RF<Xi16>, OpSize16; +def NEG32r : NegOpR_RF<Xi32>, OpSize32; +def NEG64r : NegOpR_RF<Xi64>; +def NOT8r : NotOpR_R<Xi8>; +def NOT16r : NotOpR_R<Xi16>, OpSize16; +def NOT32r : NotOpR_R<Xi32>, OpSize32; +def NOT64r : NotOpR_R<Xi64>; } -// Surprisingly enough, these are not two address instructions! -// NOTE: These are order specific, we want the ri8 forms to be listed -// first so that they are slightly preferred to the ri forms. - -// Register-Integer Signed Integer Multiply -// GR16 = GR16*I8 -def IMUL16rri8 : IMulOpRRI8<0x6B, "imul", Xi16, WriteIMul16Imm>, OpSize16; -// GR16 = GR16*I16 -def IMUL16rri : IMulOpRRI<0x69, "imul", Xi16, WriteIMul16Imm>, OpSize16; -// GR32 = GR32*I8 -def IMUL32rri8 : IMulOpRRI8<0x6B, "imul", Xi32, WriteIMul32Imm>, OpSize32; -// GR32 = GR32*I32 -def IMUL32rri : IMulOpRRI<0x69, "imul", Xi32, WriteIMul32Imm>, OpSize32; -// GR64 = GR64*I8 -def IMUL64rri8 : IMulOpRRI8<0x6B, "imul", Xi64, WriteIMul64Imm>; -// GR64 = GR64*I32 -def IMUL64rri32 : IMulOpRRI<0x69, "imul", Xi64, WriteIMul64Imm>; - -// Memory-Integer Signed Integer Multiply -// GR16 = [mem16]*I8 -def IMUL16rmi8 : IMulOpRMI8<0x6B, "imul", Xi16, WriteIMul16Imm>, OpSize16; -// GR16 = [mem16]*I16 -def IMUL16rmi : IMulOpRMI<0x69, "imul", Xi16, WriteIMul16Imm>, OpSize16; -// GR32 = [mem32]*I8 -def IMUL32rmi8 : IMulOpRMI8<0x6B, "imul", Xi32, WriteIMul32Imm>, OpSize32; -// GR32 = [mem32]*I32 -def IMUL32rmi : IMulOpRMI<0x69, "imul", Xi32, WriteIMul32Imm>, OpSize32; -// GR64 = [mem64]*I8 -def IMUL64rmi8 : IMulOpRMI8<0x6B, "imul", Xi64, WriteIMul64Imm>; -// GR64 = [mem64]*I32 -def IMUL64rmi32 : IMulOpRMI<0x69, "imul", Xi64, WriteIMul64Imm>; - -// unsigned division/remainder -let hasSideEffects = 1 in { // so that we don't speculatively execute -let Defs = [AL,AH,EFLAGS], Uses = [AX] in -// AX/r8 = AL,AH -def DIV8r : MulOpR<0xF6, MRM6r, "div", Xi8, WriteDiv8, []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -// DX:AX/r16 = AX,DX -def DIV16r : MulOpR<0xF7, MRM6r, "div", Xi16, WriteDiv16, []>, OpSize16; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -// EDX:EAX/r32 = EAX,EDX -def DIV32r : MulOpR<0xF7, MRM6r, "div", Xi32, WriteDiv32, []>, OpSize32; -// RDX:RAX/r64 = RAX,RDX -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in -def DIV64r : MulOpR<0xF7, MRM6r, "div", Xi64, WriteDiv64, []>; - -let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in -// AX/[mem8] = AL,AH -def DIV8m : MulOpM<0xF6, MRM6m, "div", Xi8, WriteDiv8, []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -// DX:AX/[mem16] = AX,DX -def DIV16m : MulOpM<0xF7, MRM6m, "div", Xi16, WriteDiv16, []>, OpSize16; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX -def DIV32m : MulOpM<0xF7, MRM6m, "div", Xi32, WriteDiv32, []>, OpSize32; -// RDX:RAX/[mem64] = RAX,RDX -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in -def DIV64m : MulOpM<0xF7, MRM6m, "div", Xi64, WriteDiv64, []>, - Requires<[In64BitMode]>; +let Predicates = [HasNDD, In64BitMode] in { +def NEG8r_ND : NegOpR_RF<Xi8, 1>; +def NEG16r_ND : NegOpR_RF<Xi16, 1>, PD; +def NEG32r_ND : NegOpR_RF<Xi32, 1>; +def NEG64r_ND : NegOpR_RF<Xi64, 1>; + +def NOT8r_ND : NotOpR_R<Xi8, 1>; +def NOT16r_ND : NotOpR_R<Xi16, 1>, PD; +def NOT32r_ND : NotOpR_R<Xi32, 1>; +def NOT64r_ND : NotOpR_R<Xi64, 1>; + +def NEG8r_NF_ND : NegOpR_R<Xi8, 1>, EVEX_NF; +def NEG16r_NF_ND : NegOpR_R<Xi16, 1>, EVEX_NF, PD; +def NEG32r_NF_ND : NegOpR_R<Xi32, 1>, EVEX_NF; +def NEG64r_NF_ND : NegOpR_R<Xi64, 1>, EVEX_NF; } -// Signed division/remainder. -let Defs = [AL,AH,EFLAGS], Uses = [AX] in -// AX/r8 = AL,AH -def IDIV8r : MulOpR<0xF6, MRM7r, "idiv", Xi8, WriteIDiv8, []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -// DX:AX/r16 = AX,DX -def IDIV16r: MulOpR<0xF7, MRM7r, "idiv", Xi16, WriteIDiv16, []>, OpSize16; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -// EDX:EAX/r32 = EAX,EDX -def IDIV32r: MulOpR<0xF7, MRM7r, "idiv", Xi32, WriteIDiv32, []>, OpSize32; -// RDX:RAX/r64 = RAX,RDX -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in -def IDIV64r: MulOpR<0xF7, MRM7r, "idiv", Xi64, WriteIDiv64, []>; - -let Defs = [AL,AH,EFLAGS], Uses = [AX] in -// AX/[mem8] = AL,AH -def IDIV8m : MulOpM<0xF6, MRM7m, "idiv", Xi8, WriteIDiv8, []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -// DX:AX/[mem16] = AX,DX -def IDIV16m: MulOpM<0xF7, MRM7m, "idiv", Xi16, WriteIDiv16, []>, OpSize16; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -// EDX:EAX/[mem32] = EAX,EDX -def IDIV32m: MulOpM<0xF7, MRM7m, "idiv", Xi32, WriteIDiv32, []>, OpSize32; -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX -// RDX:RAX/[mem64] = RAX,RDX -def IDIV64m: MulOpM<0xF7, MRM7m, "idiv", Xi64, WriteIDiv64, []>, - Requires<[In64BitMode]>; -} // hasSideEffects = 1 - -let Constraints = "$src1 = $dst" in { -def NEG8r : NegOpR<0xF6, "neg", Xi8>; -def NEG16r : NegOpR<0xF7, "neg", Xi16>, OpSize16; -def NEG32r : NegOpR<0xF7, "neg", Xi32>, OpSize32; -def NEG64r : NegOpR<0xF7, "neg", Xi64>; +def NEG8m : NegOpM_MF<Xi8>; +def NEG16m : NegOpM_MF<Xi16>, OpSize16; +def NEG32m : NegOpM_MF<Xi32>, OpSize32; +def NEG64m : NegOpM_MF<Xi64>, Requires<[In64BitMode]>; + +let Predicates = [HasNDD, In64BitMode] in { +def NEG8m_ND : NegOpM_RF<Xi8>; +def NEG16m_ND : NegOpM_RF<Xi16>, PD; +def NEG32m_ND : NegOpM_RF<Xi32>; +def NEG64m_ND : NegOpM_RF<Xi64>; + +def NEG8m_NF_ND : NegOpM_R<Xi8>, EVEX_NF; +def NEG16m_NF_ND : NegOpM_R<Xi16>, EVEX_NF, PD; +def NEG32m_NF_ND : NegOpM_R<Xi32>, EVEX_NF; +def NEG64m_NF_ND : NegOpM_R<Xi64>, EVEX_NF; } -def NEG8m : NegOpM<0xF6, "neg", Xi8>; -def NEG16m : NegOpM<0xF7, "neg", Xi16>, OpSize16; -def NEG32m : NegOpM<0xF7, "neg", Xi32>, OpSize32; -def NEG64m : NegOpM<0xF7, "neg", Xi64>, Requires<[In64BitMode]>; +def NOT8m : NotOpM_M<Xi8>; +def NOT16m : NotOpM_M<Xi16>, OpSize16; +def NOT32m : NotOpM_M<Xi32>, OpSize32; +def NOT64m : NotOpM_M<Xi64>, Requires<[In64BitMode]>; -let Constraints = "$src1 = $dst" in { -def NOT8r : NotOpR<0xF6, "not", Xi8>; -def NOT16r : NotOpR<0xF7, "not", Xi16>, OpSize16; -def NOT32r : NotOpR<0xF7, "not", Xi32>, OpSize32; -def NOT64r : NotOpR<0xF7, "not", Xi64>; +let Predicates = [HasNDD, In64BitMode] in { +def NOT8m_ND : NotOpM_R<Xi8>; +def NOT16m_ND : NotOpM_R<Xi16>, PD; +def NOT32m_ND : NotOpM_R<Xi32>; +def NOT64m_ND : NotOpM_R<Xi64>; } -def NOT8m : NotOpM<0xF6, "not", Xi8>; -def NOT16m : NotOpM<0xF7, "not", Xi16>, OpSize16; -def NOT32m : NotOpM<0xF7, "not", Xi32>, OpSize32; -def NOT64m : NotOpM<0xF7, "not", Xi64>, Requires<[In64BitMode]>; +let Predicates = [In64BitMode], Pattern = [(null_frag)] in { +def NEG8r_NF : NegOpR_R<Xi8>, NF; +def NEG16r_NF : NegOpR_R<Xi16>, NF, PD; +def NEG32r_NF : NegOpR_R<Xi32>, NF; +def NEG64r_NF : NegOpR_R<Xi64>, NF; +def NEG8m_NF : NegOpM_M<Xi8>, NF; +def NEG16m_NF : NegOpM_M<Xi16>, NF, PD; +def NEG32m_NF : NegOpM_M<Xi32>, NF; +def NEG64m_NF : NegOpM_M<Xi64>, NF; + +def NEG8r_EVEX : NegOpR_RF<Xi8>, PL; +def NEG16r_EVEX : NegOpR_RF<Xi16>, PL, PD; +def NEG32r_EVEX : NegOpR_RF<Xi32>, PL; +def NEG64r_EVEX : NegOpR_RF<Xi64>, PL; + +def NOT8r_EVEX : NotOpR_R<Xi8>, PL; +def NOT16r_EVEX : NotOpR_R<Xi16>, PL, PD; +def NOT32r_EVEX : NotOpR_R<Xi32>, PL; +def NOT64r_EVEX : NotOpR_R<Xi64>, PL; + +def NEG8m_EVEX : NegOpM_MF<Xi8>, PL; +def NEG16m_EVEX : NegOpM_MF<Xi16>, PL, PD; +def NEG32m_EVEX : NegOpM_MF<Xi32>, PL; +def NEG64m_EVEX : NegOpM_MF<Xi64>, PL; + +def NOT8m_EVEX : NotOpM_M<Xi8>, PL; +def NOT16m_EVEX : NotOpM_M<Xi16>, PL, PD; +def NOT32m_EVEX : NotOpM_M<Xi32>, PL; +def NOT64m_EVEX : NotOpM_M<Xi64>, PL; +} /// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is /// defined with "(set GPR:$dst, EFLAGS, (...". @@ -640,61 +347,204 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, SDNode opnodeflag, SDNode opnode, bit CommutableRR, bit ConvertibleToThreeAddress, bit ConvertibleToThreeAddressRR> { - let Constraints = "$src1 = $dst" in { - let isCommutable = CommutableRR, - isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { - def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; - def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + let isCommutable = CommutableRR, + isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { + let Predicates = [NoNDD] in { + def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; + def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; + def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; + def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + } + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>; + def NAME#16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD; + def NAME#32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>; + def NAME#64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>; + def NAME#8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF; + def NAME#16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD; + def NAME#32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF; + def NAME#64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF; + } + let Predicates = [In64BitMode] in { + def NAME#8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF; + def NAME#16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD; + def NAME#32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF; + def NAME#64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF; + def NAME#8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def NAME#16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def NAME#32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def NAME#64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } + } - def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; + def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; + let Predicates = [In64BitMode] in { + def NAME#8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def NAME#16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def NAME#32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def NAME#64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + def NAME#8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def NAME#16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def NAME#32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def NAME#64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def NAME#8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF; + def NAME#16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD; + def NAME#32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF; + def NAME#64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF; + def NAME#8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def NAME#16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def NAME#32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def NAME#64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + } - def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; - def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + let Predicates = [NoNDD] in { + def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; + def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; + def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; + def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + } + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>; + def NAME#16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD; + def NAME#32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>; + def NAME#64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>; + def NAME#8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def NAME#16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def NAME#32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def NAME#64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + } + let Predicates = [In64BitMode] in { + def NAME#8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF; + def NAME#16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD; + def NAME#32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF; + def NAME#64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF; + def NAME#8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL; + def NAME#16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD; + def NAME#32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL; + def NAME#64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL; + } let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - // NOTE: These are order specific, we want the ri8 forms to be listed - // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; - - def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; - def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; + let Predicates = [NoNDD] in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; + def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; + def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; + def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; + } + let Predicates = [HasNDD, In64BitMode] in { + def NAME#16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def NAME#32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def NAME#64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def NAME#8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>; + def NAME#16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD; + def NAME#32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>; + def NAME#64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>; + def NAME#16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def NAME#32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def NAME#64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + def NAME#8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF; + def NAME#16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def NAME#32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def NAME#64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + } + let Predicates = [In64BitMode] in { + def NAME#16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD; + def NAME#32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF; + def NAME#64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF; + def NAME#8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF; + def NAME#16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD; + def NAME#32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF; + def NAME#64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF; + def NAME#16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def NAME#32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def NAME#64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def NAME#8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL; + def NAME#16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD; + def NAME#32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL; + def NAME#64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL; + } } - } // Constraints = "$src1 = $dst" - def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; + def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def NAME#32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>; + def NAME#64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>; + def NAME#8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF; + def NAME#16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD; + def NAME#32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF; + def NAME#64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF; + } + let Predicates = [In64BitMode] in { + def NAME#8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF; + def NAME#16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD; + def NAME#32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF; + def NAME#64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF; + def NAME#8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def NAME#16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def NAME#32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def NAME#64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. def NAME#16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; def NAME#32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; - + def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; def NAME#8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; def NAME#16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; def NAME#32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + let Predicates = [HasNDD, In64BitMode] in { + def NAME#16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD; + def NAME#32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>; + def NAME#64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>; + def NAME#8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def NAME#32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def NAME#64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def NAME#16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD; + def NAME#32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF; + def NAME#64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF; + def NAME#8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF; + def NAME#16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def NAME#32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF; + def NAME#64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF; + } + let Predicates = [In64BitMode] in { + def NAME#16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD; + def NAME#32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF; + def NAME#64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF; + def NAME#8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF; + def NAME#16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def NAME#32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF; + def NAME#64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF; + def NAME#16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def NAME#32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL; + def NAME#64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL; + def NAME#8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL; + def NAME#16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD; + def NAME#32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL; + def NAME#64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL; + } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - let Constraints = "$src1 = $dst" in def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; def NAME#8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } @@ -719,62 +569,153 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, string mnemonic, Format RegMRM, Format MemMRM, SDNode opnode, bit CommutableRR, bit ConvertibleToThreeAddress> { - let Constraints = "$src1 = $dst" in { - let isCommutable = CommutableRR in { - def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + let isCommutable = CommutableRR in { + let Predicates = [NoNDD] in { + def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; - } // isConvertibleToThreeAddress + def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + } + } + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>; + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def NAME#16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD; + def NAME#32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>; + def NAME#64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>; + } + } } // isCommutable + let Predicates = [In64BitMode] in { + def NAME#8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def NAME#16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def NAME#32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def NAME#64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + } + def NAME#8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; def NAME#16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; def NAME#32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; def NAME#64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; + let Predicates = [In64BitMode] in { + def NAME#8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def NAME#16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def NAME#32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def NAME#64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def NAME#8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def NAME#16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def NAME#32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def NAME#64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + } + + let Predicates = [NoNDD] in { + def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; + def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; + } + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>; + def NAME#16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD; + def NAME#32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>; + def NAME#64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>; + } + let Predicates = [In64BitMode] in { + def NAME#8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL; + def NAME#16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD; + def NAME#32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL; + def NAME#64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL; + } - def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; + let Predicates = [NoNDD] in { + def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; + + def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; + } + } - def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - // NOTE: These are order specific, we want the ri8 forms to be listed - // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; - - def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; - def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; + def NAME#16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def NAME#32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def NAME#64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def NAME#16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD; + def NAME#32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>; + def NAME#64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>; } - } // Constraints = "$src1 = $dst" + } + let Predicates = [In64BitMode] in { + def NAME#8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL; + def NAME#16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def NAME#32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def NAME#64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def NAME#16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD; + def NAME#32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL; + def NAME#64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL; + } def NAME#8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; def NAME#16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; def NAME#32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; def NAME#64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def NAME#32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>; + def NAME#64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + } + let Predicates = [In64BitMode] in { + def NAME#8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def NAME#16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def NAME#32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def NAME#64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. + def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; def NAME#16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; def NAME#32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; - - def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; def NAME#16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; def NAME#32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD; + def NAME#32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>; + def NAME#64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>; + def NAME#16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def NAME#32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def NAME#64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + } + let Predicates = [In64BitMode] in { + def NAME#8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL; + def NAME#16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def NAME#32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL; + def NAME#64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL; + def NAME#16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD; + def NAME#32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL; + def NAME#64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL; + } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - let Constraints = "$src1 = $dst" in def NAME#8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; def NAME#8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } @@ -1089,36 +1030,30 @@ def : Pat<(X86testpat (loadi64 addr:$src1), i64relocImmSExt32_su:$src2), //===----------------------------------------------------------------------===// // ANDN Instruction // -multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, - PatFrag ld_frag, X86FoldableSchedWrite sched> { -let Predicates = [HasBMI, NoEGPR] in { - def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, - VEX, VVVV, Sched<[sched]>; - def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, - VEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; -} -let Predicates = [HasBMI, HasEGPR, In64BitMode] in { - def rr_EVEX : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, - EVEX, VVVV, Sched<[sched]>; - def rm_EVEX : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, - EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; -} +multiclass AndN<X86TypeInfo t, string suffix> { + defvar andn_rr_p = + [(set t.RegClass:$dst, EFLAGS, (X86and_flag (not t.RegClass:$src1), + t.RegClass:$src2))]; + defvar andn_rm_p = + [(set t.RegClass:$dst, EFLAGS, (X86and_flag (not t.RegClass:$src1), + (t.LoadNode addr:$src2)))]; + def rr#suffix : ITy<0xF2, MRMSrcReg, t, (outs t.RegClass:$dst), + (ins t.RegClass:$src1, t.RegClass:$src2), "andn", + binop_ndd_args, andn_rr_p>, VVVV, Sched<[WriteALU]>, + T8, DefEFLAGS; + def rm#suffix : ITy<0xF2, MRMSrcMem, t, (outs t.RegClass:$dst), + (ins t.RegClass:$src1, t.MemOperand:$src2), "andn", + binop_ndd_args, andn_rm_p>, VVVV, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, + T8, DefEFLAGS; } // Complexity is reduced to give and with immediate a chance to match first. -let Defs = [EFLAGS], AddedComplexity = -6 in { - defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8; - defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8, REX_W; +let AddedComplexity = -6 in { +defm ANDN32 : AndN<Xi32, "">, VEX, Requires<[HasBMI, NoEGPR]>; +defm ANDN64 : AndN<Xi64, "">, VEX, REX_W, Requires<[HasBMI, NoEGPR]>; +defm ANDN32 : AndN<Xi32, "_EVEX">, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode]>; +defm ANDN64 : AndN<Xi64, "_EVEX">, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>; } let Predicates = [HasBMI], AddedComplexity = -6 in { @@ -1135,78 +1070,63 @@ let Predicates = [HasBMI], AddedComplexity = -6 in { //===----------------------------------------------------------------------===// // MULX Instruction // -multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop, - X86FoldableSchedWrite sched> { -let hasSideEffects = 0 in { -let Predicates = [HasBMI2, NoEGPR] in { - def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), - !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8, XD, VEX, VVVV, Sched<[WriteIMulH, sched]>; - +multiclass MulX<X86TypeInfo t, X86FoldableSchedWrite sched> { + defvar mulx_args = "{$src, $dst2, $dst1|$dst1, $dst2, $src}"; + defvar mulx_rm_sched = + [WriteIMulHLd, sched.Folded, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Implicit read of EDX/RDX + sched.ReadAfterFold]; + + def rr : ITy<0xF6, MRMSrcReg, t, (outs t.RegClass:$dst1, t.RegClass:$dst2), + (ins t.RegClass:$src), "mulx", mulx_args, []>, T8, XD, VEX, + VVVV, Sched<[WriteIMulH, sched]>; let mayLoad = 1 in - def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), - !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8, XD, VEX, VVVV, - Sched<[WriteIMulHLd, sched.Folded, - // Memory operand. - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - // Implicit read of EDX/RDX - sched.ReadAfterFold]>; - + def rm : ITy<0xF6, MRMSrcMem, t, (outs t.RegClass:$dst1, t.RegClass:$dst2), + (ins t.MemOperand:$src), "mulx", mulx_args, []>, T8, XD, VEX, + VVVV, Sched<mulx_rm_sched>; + + let Predicates = [In64BitMode] in { + def rr_EVEX : ITy<0xF6, MRMSrcReg, t, + (outs t.RegClass:$dst1, t.RegClass:$dst2), + (ins t.RegClass:$src), "mulx", mulx_args, []>, T8, XD, + EVEX, VVVV, Sched<[WriteIMulH, sched]>; + let mayLoad = 1 in + def rm_EVEX : ITy<0xF6, MRMSrcMem, t, + (outs t.RegClass:$dst1, t.RegClass:$dst2), + (ins t.MemOperand:$src), "mulx", mulx_args, []>, T8, XD, + EVEX, VVVV, Sched<mulx_rm_sched>; + } // Pseudo instructions to be used when the low result isn't used. The // instruction is defined to keep the high if both destinations are the same. - def Hrr : PseudoI<(outs RC:$dst), (ins RC:$src), - []>, Sched<[sched]>; - + def Hrr : PseudoI<(outs t.RegClass:$dst), (ins t.RegClass:$src), []>, + Sched<[sched]>; let mayLoad = 1 in - def Hrm : PseudoI<(outs RC:$dst), (ins x86memop:$src), - []>, Sched<[sched.Folded]>; -} -let Predicates = [HasBMI2, HasEGPR, In64BitMode] in - def rr#_EVEX : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), - !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8, XD, EVEX, VVVV, Sched<[WriteIMulH, sched]>; -let Predicates = [HasBMI2, HasEGPR, In64BitMode], mayLoad = 1 in - def rm#_EVEX : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), - !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8, XD, EVEX, VVVV, - Sched<[WriteIMulHLd, sched.Folded, - // Memory operand. - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - // Implicit read of EDX/RDX - sched.ReadAfterFold]>; -} + def Hrm : PseudoI<(outs t.RegClass:$dst), (ins t.MemOperand:$src), []>, + Sched<[sched.Folded]>; } let Uses = [EDX] in - defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>; +defm MULX32 : MulX<Xi32, WriteMULX32>; + let Uses = [RDX] in - defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W; +defm MULX64 : MulX<Xi64, WriteMULX64>, REX_W; //===----------------------------------------------------------------------===// // ADCX and ADOX Instructions // // We don't have patterns for these as there is no advantage over ADC for // most code. -class ADCOXOpRR <string m, X86TypeInfo t> - : BinOpRRF_RF<0xF6, m, t, null_frag> { - let Form = MRMSrcReg; - let isCommutable = 1; +let Form = MRMSrcReg in { +def ADCX32rr : BinOpRRF_RF<0xF6, "adcx", Xi32, null_frag>, T8, PD; +def ADCX64rr : BinOpRRF_RF<0xF6, "adcx", Xi64, null_frag>, T8, PD; +def ADOX32rr : BinOpRRF_RF<0xF6, "adox", Xi32, null_frag>, T8, XS; +def ADOX64rr : BinOpRRF_RF<0xF6, "adox", Xi64, null_frag>, T8, XS; } - -class ADCOXOpRM <string m, X86TypeInfo t> - : BinOpRMF_RF<0xF6, m, t, null_frag> { - let Form = MRMSrcMem; -} - -let OpSize = OpSizeFixed, Constraints = "$src1 = $dst", - Predicates = [HasADX] in { -def ADCX32rr : ADCOXOpRR<"adcx", Xi32>, T8, PD; -def ADCX64rr : ADCOXOpRR<"adcx", Xi64>, T8, PD; -def ADOX32rr : ADCOXOpRR<"adox", Xi32>, T8, XS; -def ADOX64rr : ADCOXOpRR<"adox", Xi64>, T8, XS; -def ADCX32rm : ADCOXOpRM<"adcx", Xi32>, T8, PD; -def ADCX64rm : ADCOXOpRM<"adcx", Xi64>, T8, PD; -def ADOX32rm : ADCOXOpRM<"adox", Xi32>, T8, XS; -def ADOX64rm : ADCOXOpRM<"adox", Xi64>, T8, XS; +let Form = MRMSrcMem in { +def ADCX32rm : BinOpRMF_RF<0xF6, "adcx", Xi32, null_frag>, T8, PD; +def ADCX64rm : BinOpRMF_RF<0xF6, "adcx", Xi64, null_frag>, T8, PD; +def ADOX32rm : BinOpRMF_RF<0xF6, "adox", Xi32, null_frag>, T8, XS; +def ADOX64rm : BinOpRMF_RF<0xF6, "adox", Xi64, null_frag>, T8, XS; } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td index 07e5576960d6..6e76b44b66a3 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td @@ -256,6 +256,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasEVEX_Z = 0; // Does this inst set the EVEX_Z field? bit hasEVEX_L2 = 0; // Does this inst set the EVEX_L2 field? bit hasEVEX_B = 0; // Does this inst set the EVEX_B field? + bit hasEVEX_NF = 0; // Does this inst set the EVEX_NF field? bits<3> CD8_Form = 0; // Compressed disp8 form - vector-width. // Declare it int rather than bits<4> so that all bits are defined when // assigning to bits<7>. @@ -309,4 +310,5 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{48} = hasEVEX_RC; let TSFlags{49} = hasNoTrackPrefix; let TSFlags{51-50} = explicitOpPrefixBits; + let TSFlags{52} = hasEVEX_NF; } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td index 8653f15d8602..94fa6e45ded9 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td @@ -10,6 +10,8 @@ def TruePredicate : Predicate<"true">; def HasEGPR : Predicate<"Subtarget->hasEGPR()">; def NoEGPR : Predicate<"!Subtarget->hasEGPR()">; +def HasNDD : Predicate<"Subtarget->hasNDD()">; +def NoNDD : Predicate<"!Subtarget->hasNDD()">; def HasCMOV : Predicate<"Subtarget->canUseCMOV()">; def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">; def HasNOPL : Predicate<"Subtarget->hasNOPL()">; @@ -100,7 +102,6 @@ def HasIFMA : Predicate<"Subtarget->hasIFMA()">; def HasAVXIFMA : Predicate<"Subtarget->hasAVXIFMA()">; def NoVLX_Or_NoIFMA : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasIFMA()">; def HasRTM : Predicate<"Subtarget->hasRTM()">; -def HasADX : Predicate<"Subtarget->hasADX()">; def HasSHA : Predicate<"Subtarget->hasSHA()">; def HasSHA512 : Predicate<"Subtarget->hasSHA512()">; def HasSGX : Predicate<"Subtarget->hasSGX()">; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td index df1f0b5b4ca7..e8a1a2b83886 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td @@ -6655,49 +6655,51 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { // SSE4.2 - CRC Instructions //===----------------------------------------------------------------------===// +// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly +// controlled by the SSE42 flag. +// // No CRC instructions have AVX equivalents -// crc intrinsic instruction -// This set of instructions are only rm, the only difference is the size -// of r and m. -class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, - RegisterClass RCIn, SDPatternOperator Int> : - CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), - !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), - [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>, - Sched<[WriteCRC32]>; - -class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, - X86MemOperand x86memop, SDPatternOperator Int> : - CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), - !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), - [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>, - Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>; - -let Constraints = "$src1 = $dst" in { - def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, - int_x86_sse42_crc32_32_8>; - def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, - int_x86_sse42_crc32_32_8>; - def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, - int_x86_sse42_crc32_32_16>, OpSize16; - def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, - int_x86_sse42_crc32_32_16>, OpSize16; - def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, - int_x86_sse42_crc32_32_32>, OpSize32; - def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, - int_x86_sse42_crc32_32_32>, OpSize32; - def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, - int_x86_sse42_crc32_64_64>, REX_W; - def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, - int_x86_sse42_crc32_64_64>, REX_W; - let hasSideEffects = 0 in { - let mayLoad = 1 in - def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, - null_frag>, REX_W; - def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, - null_frag>, REX_W; - } +class Crc32r<X86TypeInfo t, RegisterClass rc, SDPatternOperator node> + : ITy<0xF1, MRMSrcReg, t, (outs rc:$dst), (ins rc:$src1, t.RegClass:$src2), + "crc32", binop_args, [(set rc:$dst, (node rc:$src1, t.RegClass:$src2))]>, + Sched<[WriteCRC32]>, NoCD8 { + let Constraints = "$src1 = $dst"; +} + +class Crc32m<X86TypeInfo t, RegisterClass rc, SDPatternOperator node> + : ITy<0xF1, MRMSrcMem, t, (outs rc:$dst), (ins rc:$src1, t.MemOperand:$src2), + "crc32", binop_args, [(set rc:$dst, (node rc:$src1, (load addr:$src2)))]>, + Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>, NoCD8 { + let Constraints = "$src1 = $dst"; +} + +let Predicates = [HasCRC32, NoEGPR], OpMap = T8, OpPrefix = XD in { + def CRC32r32r8 : Crc32r<Xi8, GR32, int_x86_sse42_crc32_32_8>; + def CRC32r32m8 : Crc32m<Xi8, GR32, int_x86_sse42_crc32_32_8>; + def CRC32r32r16 : Crc32r<Xi16, GR32, int_x86_sse42_crc32_32_16>, OpSize16; + def CRC32r32m16 : Crc32m<Xi16, GR32, int_x86_sse42_crc32_32_16>, OpSize16; + def CRC32r32r32 : Crc32r<Xi32, GR32, int_x86_sse42_crc32_32_32>, OpSize32; + def CRC32r32m32 : Crc32m<Xi32, GR32, int_x86_sse42_crc32_32_32>, OpSize32; + def CRC32r64r64 : Crc32r<Xi64, GR64, int_x86_sse42_crc32_64_64>; + def CRC32r64m64 : Crc32m<Xi64, GR64, int_x86_sse42_crc32_64_64>; + def CRC32r64r8 : Crc32r<Xi8, GR64, null_frag>, REX_W; + let mayLoad = 1 in + def CRC32r64m8 : Crc32m<Xi8, GR64, null_frag>, REX_W; +} + +let Predicates = [HasCRC32, HasEGPR, In64BitMode], OpMap = T_MAP4, OpEnc = EncEVEX in { + def CRC32r32r8_EVEX : Crc32r<Xi8, GR32, int_x86_sse42_crc32_32_8>; + def CRC32r32m8_EVEX : Crc32m<Xi8, GR32, int_x86_sse42_crc32_32_8>; + def CRC32r32r16_EVEX : Crc32r<Xi16, GR32, int_x86_sse42_crc32_32_16>, PD; + def CRC32r32m16_EVEX : Crc32m<Xi16, GR32, int_x86_sse42_crc32_32_16>, PD; + def CRC32r32r32_EVEX : Crc32r<Xi32, GR32, int_x86_sse42_crc32_32_32>; + def CRC32r32m32_EVEX : Crc32m<Xi32, GR32, int_x86_sse42_crc32_32_32>; + def CRC32r64r64_EVEX : Crc32r<Xi64, GR64, int_x86_sse42_crc32_64_64>; + def CRC32r64m64_EVEX : Crc32m<Xi64, GR64, int_x86_sse42_crc32_64_64>; + def CRC32r64r8_EVEX : Crc32r<Xi8, GR64, null_frag>, REX_W; + let mayLoad = 1 in + def CRC32r64m8_EVEX : Crc32m<Xi8, GR64, null_frag>, REX_W; } //===----------------------------------------------------------------------===// @@ -7160,6 +7162,10 @@ def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128rm addr:$src)>; } +let Predicates = [HasAVXNECONVERT, NoVLX] in + def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTF128rm addr:$src)>; + //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // @@ -7905,6 +7911,9 @@ let Predicates = [HasAVX2, NoVLX] in { defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; } +let Predicates = [HasAVXNECONVERT, NoVLX] in + defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8bf16, v16bf16, loadv8bf16, loadv16bf16>; + //===----------------------------------------------------------------------===// // VEXTRACTI128 - Extract packed integer values // @@ -7927,6 +7936,9 @@ let Predicates = [HasAVX2, NoVLX] in { defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; } +let Predicates = [HasAVXNECONVERT, NoVLX] in + defm : vextract_lowering<"VEXTRACTI128", v16bf16, v8bf16>; + //===----------------------------------------------------------------------===// // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores // diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td index efb58c6102dd..699e5847e63f 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td @@ -446,11 +446,11 @@ let Predicates = [HasUSERMSR], mayLoad = 1 in { } let Predicates = [HasUSERMSR], mayStore = 1 in { def UWRMSRrr : I<0xf8, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), - "uwrmsr\t{$src1, $src2|$src2, $src1}", + "uwrmsr\t{$src2, $src1|$src1, $src2}", [(int_x86_uwrmsr GR64:$src1, GR64:$src2)]>, T8, XS; def UWRMSRir : Ii32<0xf8, MRM0r, (outs), (ins GR64:$src, i64i32imm:$imm), "uwrmsr\t{$src, $imm|$imm, $src}", - [(int_x86_uwrmsr GR64:$src, i64immSExt32_su:$imm)]>, T_MAP7, XS, VEX; + [(int_x86_uwrmsr i64immSExt32_su:$imm, GR64:$src)]>, T_MAP7, XS, VEX; } let Defs = [RAX, RDX], Uses = [ECX] in def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td index 9499753143d9..da85922a018d 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td @@ -39,17 +39,19 @@ class PS { Prefix OpPrefix = PS; } class PD { Prefix OpPrefix = PD; } class XD { Prefix OpPrefix = XD; } class XS { Prefix OpPrefix = XS; } -class VEX { Encoding OpEnc = EncVEX; } +class XOP { Encoding OpEnc = EncXOP; } +class VEX { Encoding OpEnc = EncVEX; } +class EVEX { Encoding OpEnc = EncEVEX; } class WIG { bit IgnoresW = 1; } // Special version of REX_W that can be changed to VEX.W==0 for EVEX2VEX. class VEX_W1X { bit hasREX_W = 1; bit EVEX_W1_VEX_W0 = 1; } class VEX_L { bit hasVEX_L = 1; } class VEX_LIG { bit ignoresVEX_L = 1; } class VVVV { bit hasVEX_4V = 1; } -class EVEX { Encoding OpEnc = EncEVEX; } class EVEX_K { bit hasEVEX_K = 1; } class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; } class EVEX_B { bit hasEVEX_B = 1; } +class EVEX_NF { bit hasEVEX_NF = 1; } class EVEX_RC { bit hasEVEX_RC = 1; } class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; } class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; } @@ -63,7 +65,7 @@ class EVEX_CD8<int esize, CD8VForm form> { bits<3> CD8_Form = form.Value; } class NoCD8 { bits<7> CD8_Scale = 0; } -class XOP { Encoding OpEnc = EncXOP; } + class EVEX2VEXOverride<string VEXInstrName> { string EVEX2VEXOverride = VEXInstrName; } @@ -99,16 +101,24 @@ class DisassembleOnly { bit ForceDisassemble = 1; } - -// SchedModel info for instruction that loads one value and gets the second -// (and possibly third) value from a register. -// This is used for instructions that put the memory operands before other -// uses. -class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded, - // Memory operand. - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - // Register reads (implicit or explicit). - Sched.ReadAfterFold, Sched.ReadAfterFold]>; +defvar unaryop_args = "$src1"; +defvar unaryop_ndd_args = "{$src1, $dst|$dst, $src1}"; +defvar binop_args = "{$src2, $src1|$src1, $src2}"; +defvar binop_ndd_args = "{$src2, $src1, $dst|$dst, $src1, $src2}"; +defvar tie_dst_src1 = "$src1 = $dst"; + +// NDD - Helper for new data destination instructions +class NDD<bit ndd> { + string Constraints = !if(!eq(ndd, 0), tie_dst_src1, ""); + Encoding OpEnc = !if(!eq(ndd, 0), EncNormal, EncEVEX); + bit hasEVEX_B = ndd; + bit hasVEX_4V = ndd; + Map OpMap = !if(!eq(ndd, 0), OB, T_MAP4); +} +// NF - Helper for NF (no flags update) instructions +class NF: T_MAP4, EVEX, EVEX_NF, NoCD8; +// PL - Helper for promoted legacy instructions +class PL: T_MAP4, EVEX, NoCD8, ExplicitEVEXPrefix; //===----------------------------------------------------------------------===// // X86 Type infomation definitions @@ -723,13 +733,6 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD, Requires<[UseSSE42]>; -// CRC32I - SSE 4.2 CRC32 instructions. -// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly -// controlled by the SSE42 flag. -class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, XD, Requires<[HasCRC32]>; - // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) // @@ -957,15 +960,380 @@ class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, /// 2. Infers whether the instruction should have a 0x40 REX_W prefix. /// 3. Infers whether the low bit of the opcode should be 0 (for i8 operations) /// or 1 (for i16,i32,i64 operations). -class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, - string mnemonic, string args, list<dag> pattern> - : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4}, - opcode{3}, opcode{2}, opcode{1}, - !if(!eq(typeinfo.HasEvenOpcode, 1), 0, opcode{0})}, f, outs, ins, - !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> { - +class ITy<bits<8> o, Format f, X86TypeInfo t, dag outs, dag ins, string m, + string args, list<dag> p> + : I<{o{7}, o{6}, o{5}, o{4}, o{3}, o{2}, o{1}, + !if(!eq(t.HasEvenOpcode, 1), 0, o{0})}, f, outs, ins, + !strconcat(m, "{", t.InstrSuffix, "}\t", args), p> { let hasSideEffects = 0; - let hasREX_W = typeinfo.HasREX_W; + let hasREX_W = t.HasREX_W; } -defvar binop_args = "{$src2, $src1|$src1, $src2}"; +// BinOpRR - Instructions that read "reg, reg". +class BinOpRR<bits<8> o, string m, string args, X86TypeInfo t, dag out, list<dag> p> + : ITy<o, MRMDestReg, t, out, (ins t.RegClass:$src1, t.RegClass:$src2), m, + args, p>, Sched<[WriteALU]>; +// BinOpRR_F - Instructions that read "reg, reg" and write EFLAGS only. +class BinOpRR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpRR<o, m, binop_args, t, (outs), + [(set EFLAGS, (node t.RegClass:$src1, t.RegClass:$src2))]>, + DefEFLAGS; +// BinOpRR_F_Rev - Reversed encoding of BinOpRR_F +class BinOpRR_F_Rev<bits<8> o, string m, X86TypeInfo t> + : BinOpRR_F<o, m, t, null_frag>, DisassembleOnly { + let Form = MRMSrcReg; +} +// BinOpRR_R - Instructions that read "reg, reg" and write "reg". +class BinOpRR_R<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> + : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, + (outs t.RegClass:$dst), []>, NDD<ndd>; +// BinOpRR_R_Rev - Reversed encoding of BinOpRR_R +class BinOpRR_R_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> + : BinOpRR_R<o, m, t, ndd>, DisassembleOnly { + let Form = MRMSrcReg; +} +// BinOpRR_RF - Instructions that read "reg, reg", and write "reg", EFLAGS. +class BinOpRR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> + : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, + (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.RegClass:$src2))]>, DefEFLAGS, NDD<ndd>; +// BinOpRR_RF_Rev - Reversed encoding of BinOpRR_RF. +class BinOpRR_RF_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> + : BinOpRR_RF<o, m, t, null_frag, ndd>, DisassembleOnly { + let Form = MRMSrcReg; +} +// BinOpRRF_RF - Instructions that read "reg, reg", write "reg" and read/write +// EFLAGS. +class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> + : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.RegClass:$src2, + EFLAGS))]>, DefEFLAGS, UseEFLAGS, NDD<ndd> { + let SchedRW = [WriteADC]; +} +// BinOpRRF_RF_Rev - Reversed encoding of BinOpRRF_RF +class BinOpRRF_RF_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> + : BinOpRRF_RF<o, m, t, null_frag, ndd>, DisassembleOnly { + let Form = MRMSrcReg; +} + +// BinOpRM - Instructions that read "reg, [mem]". +class BinOpRM<bits<8> o, string m, string args, X86TypeInfo t, dag out, list<dag> p> + : ITy<o, MRMSrcMem, t, out, (ins t.RegClass:$src1, t.MemOperand:$src2), m, + args, p>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]> { + let mayLoad = 1; +} +// BinOpRM_F - Instructions that read "reg, [mem]" and write EFLAGS only. +class BinOpRM_F<bits<8> o, string m, X86TypeInfo t, SDNode node> + : BinOpRM<o, m, binop_args, t, (outs), + [(set EFLAGS, (node t.RegClass:$src1, + (t.LoadNode addr:$src2)))]>, DefEFLAGS; +// BinOpRM_R - Instructions that read "reg, [mem]", and write "reg". +class BinOpRM_R<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> + : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), + []>, NDD<ndd>; +// BinOpRM_RF - Instructions that read "reg, [mem]", and write "reg", EFLAGS. +class BinOpRM_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> + : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, + (t.LoadNode addr:$src2)))]>, DefEFLAGS, NDD<ndd>; +// BinOpRMF_RF - Instructions that read "reg, [mem]", write "reg" and read/write +// EFLAGS. +class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> + : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, (t.LoadNode addr:$src2), EFLAGS))]>, + DefEFLAGS, UseEFLAGS, NDD<ndd> { + let SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold, + // base, scale, index, offset, segment. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // implicit register read. + WriteADC.ReadAfterFold]; +} + +// BinOpRI - Instructions that read "reg, imm". +class BinOpRI<bits<8> o, string m, string args, X86TypeInfo t, Format f, dag out, list<dag> p> + : ITy<o, f, t, out, (ins t.RegClass:$src1, t.ImmOperand:$src2), m, + args, p>, Sched<[WriteALU]> { + let ImmT = t.ImmEncoding; +} +// BinOpRI_F - Instructions that read "reg, imm" and write EFLAGS only. +class BinOpRI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + Format f> + : BinOpRI<o, m, binop_args, t, f, (outs), + [(set EFLAGS, (node t.RegClass:$src1, + t.ImmOperator:$src2))]>, DefEFLAGS; +// BinOpRI_R - Instructions that read "reg, imm" and write "reg". +class BinOpRI_R<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0> + : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst), + []>, NDD<ndd>; +// BinOpRI_RF - Instructions that read "reg, imm" and write "reg", EFLAGS. +class BinOpRI_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, Format f, bit ndd = 0> + : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.ImmOperator:$src2))]>, DefEFLAGS, NDD<ndd>; +// BinOpRIF_RF - Instructions that read "reg, imm", write "reg" and read/write +// EFLAGS. +class BinOpRIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f, bit ndd = 0> + : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.ImmOperator:$src2, + EFLAGS))]>, DefEFLAGS, UseEFLAGS, NDD<ndd> { + let SchedRW = [WriteADC]; +} +// BinOpRI8 - Instructions that read "reg, imm8". +class BinOpRI8<bits<8> o, string m, string args, X86TypeInfo t, Format f, dag out> + : ITy<o, f, t, out, (ins t.RegClass:$src1, t.Imm8Operand:$src2), m, + args, []>, Sched<[WriteALU]> { + let ImmT = Imm8; +} +// BinOpRI8_F - Instructions that read "reg, imm8" and write EFLAGS only. +class BinOpRI8_F<bits<8> o, string m, X86TypeInfo t, Format f> + : BinOpRI8<o, m, binop_args, t, f, (outs)>, DefEFLAGS; +// BinOpRI8_R - Instructions that read "reg, imm8" and write "reg". +class BinOpRI8_R<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0> + : BinOpRI8<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst)>, NDD<ndd>; +// BinOpRI8_RF - Instructions that read "reg, imm8" and write "reg", EFLAGS. +class BinOpRI8_RF<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0> + : BinOpRI8<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst)>, DefEFLAGS, NDD<ndd>; +// BinOpRI8F_RF - Instructions that read "reg, imm", write "reg" and read/write +// EFLAGS. +class BinOpRI8F_RF<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0> + : BinOpRI8<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst)>, DefEFLAGS, UseEFLAGS, NDD<ndd> { + let SchedRW = [WriteADC]; +} + +// BinOpMR - Instructions that read "[mem], reg". +class BinOpMR<bits<8> o, string m, string args, X86TypeInfo t, dag out, list<dag> p> + : ITy<o, MRMDestMem, t, out, (ins t.MemOperand:$src1, t.RegClass:$src2), m, + args, p> { + let mayLoad = 1; + let SchedRW = [WriteALU.Folded, WriteALU.ReadAfterFold]; +} +// BinOpMR_R - Instructions that read "[mem], reg", and write "reg". +class BinOpMR_R<bits<8> o, string m, X86TypeInfo t> + : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), []>, NDD<1>; +// BinOpMR_RF - Instructions that read "[mem], reg", and write "reg", EFLAGS. +class BinOpMR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1), + t.RegClass:$src2))]>, DefEFLAGS, NDD<1>; +// BinOpMR_F - Instructions that read "[mem], imm8" and write EFLAGS only. +class BinOpMR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, binop_args, t, (outs), + [(set EFLAGS, (node (t.LoadNode addr:$src1), t.RegClass:$src2))]>, + Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>, DefEFLAGS; +// BinOpMR_M - Instructions that read "[mem], reg" and write "[mem]". +class BinOpMR_M<bits<8> o, string m, X86TypeInfo t> + : BinOpMR<o, m, binop_args, t, (outs), []>, + Sched<[WriteALURMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault]> { + let mayStore = 1; +} +// BinOpMR_MF - Instructions that read "[mem], reg" and write "[mem]", EFLAGS. +class BinOpMR_MF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, binop_args, t, (outs), + [(store (node (load addr:$src1), t.RegClass:$src2), addr:$src1), + (implicit EFLAGS)]>, + Sched<[WriteALURMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + WriteALU.ReadAfterFold]>, // reg + DefEFLAGS { + let mayStore = 1; +} +// BinOpMRF_RF - Instructions that read "[mem], reg", write "reg" and +// read/write EFLAGS. +class BinOpMRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (load addr:$src1), + t.RegClass:$src2, EFLAGS))]>, DefEFLAGS, UseEFLAGS, NDD<1>, + Sched<[WriteADC.Folded, WriteADC.ReadAfterFold]>; +// BinOpMRF_MF - Instructions that read "[mem], reg", write "[mem]" and +// read/write EFLAGS. +class BinOpMRF_MF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, binop_args, t, (outs), + [(store (node (load addr:$src1), t.RegClass:$src2, EFLAGS), + addr:$src1), (implicit EFLAGS)]>, + Sched<[WriteADCRMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, + WriteALU.ReadAfterFold, // reg + WriteALU.ReadAfterFold]>, // EFLAGS + DefEFLAGS, UseEFLAGS { + let mayStore = 1; +} + +// BinOpMI - Instructions that read "[mem], imm". +class BinOpMI<bits<8> o, string m, string args, X86TypeInfo t, Format f, dag out, list<dag> p> + : ITy<o, f, t, out, (ins t.MemOperand:$src1, t.ImmOperand:$src2), m, + args, p> { + let ImmT = t.ImmEncoding; + let mayLoad = 1; +} +// BinOpMI_F - Instructions that read "[mem], imm" and write EFLAGS only. +class BinOpMI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + Format f> + : BinOpMI<o, m, binop_args, t, f, (outs), + [(set EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>, + Sched<[WriteALU.Folded]>, DefEFLAGS; +// BinOpMI_R - Instructions that read "[mem], imm" and write "reg". +class BinOpMI_R<bits<8> o, string m, X86TypeInfo t, Format f> + : BinOpMI<o, m, binop_ndd_args, t, f, (outs t.RegClass:$dst), []>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>; +// BinOpMI_R - Instructions that read "[mem], imm" and write "reg", EFLAGS. +class BinOpMI_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + Format f> + : BinOpMI<o, m, binop_ndd_args, t, f, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>; +// BinOpMI_M - Instructions that read "[mem], imm" and write "[mem]". +class BinOpMI_M<bits<8> o, string m, X86TypeInfo t, Format f> + : BinOpMI<o, m, binop_args, t, f, (outs), []>, Sched<[WriteALURMW]> { + let mayStore = 1; +} +// BinOpMI_MF - Instructions that read "[mem], imm" and write "[mem]", EFLAGS. +class BinOpMI_MF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, Format f> + : BinOpMI<o, m, binop_args, t, f, (outs), + [(store (node (t.VT (load addr:$src1)), + t.ImmOperator:$src2), addr:$src1), (implicit EFLAGS)]>, + Sched<[WriteALURMW]>, DefEFLAGS { + let mayStore = 1; +} +// BinOpMIF_RF - Instructions that read "[mem], imm", write "reg" and +// read/write EFLAGS. +class BinOpMIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> + : BinOpMI<o, m, binop_ndd_args, t, f, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.VT (load addr:$src1)), + t.ImmOperator:$src2, EFLAGS))]>, + Sched<[WriteADC.Folded, WriteADC.ReadAfterFold]>, DefEFLAGS, UseEFLAGS, NDD<1>; +// BinOpMIF_MF - Instructions that read "[mem], imm", write "[mem]" and +// read/write EFLAGS. +class BinOpMIF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> + : BinOpMI<o, m, binop_args, t, f, (outs), + [(store (node (t.VT (load addr:$src1)), + t.ImmOperator:$src2, EFLAGS), addr:$src1), (implicit EFLAGS)]>, + Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS { + let mayStore = 1; +} + +// BinOpMI8 - Instructions that read "[mem], imm8". +class BinOpMI8<string m, string args, X86TypeInfo t, Format f, dag out> + : ITy<0x83, f, t, out, (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m, + args, []> { + let ImmT = Imm8; + let mayLoad = 1; +} +// BinOpMI8_F - Instructions that read "[mem], imm8" and write EFLAGS only. +class BinOpMI8_F<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALU.Folded]>, DefEFLAGS; +// BinOpMI8_R - Instructions that read "[mem], imm8" and write "reg". +class BinOpMI8_R<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>; +// BinOpMI8_RF - Instructions that read "[mem], imm8" and write "reg"/EFLAGS. +class BinOpMI8_RF<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>; +// BinOpMI8_M - Instructions that read "[mem], imm8" and write "[mem]". +class BinOpMI8_M<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALURMW]> { + let mayStore = 1; +} +// BinOpMI8_MF - Instructions that read "[mem], imm8" and write "[mem]", EFLAGS. +class BinOpMI8_MF<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALURMW]>, DefEFLAGS { + let mayStore = 1; +} +// BinOpMI8F_RF - Instructions that read "[mem], imm8", write "reg" and +// read/write EFLAGS. +class BinOpMI8F_RF<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, + Sched<[WriteADC.Folded, WriteADC.ReadAfterFold]>, DefEFLAGS, UseEFLAGS, NDD<1>; +// BinOpMI8F_MF - Instructions that read "[mem], imm8", write "[mem]" and +// read/write EFLAGS. +class BinOpMI8F_MF<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS { + let mayStore = 1; +} + +// BinOpAI - Instructions that read "a-reg imm" (Accumulator register). +class BinOpAI<bits<8> o, string m, X86TypeInfo t, Register areg, string args> + : ITy<o, RawFrm, t, (outs), (ins t.ImmOperand:$src), m, args, []>, + Sched<[WriteALU]> { + let ImmT = t.ImmEncoding; + let Uses = [areg]; +} +// BinOpAI_F - Instructions that read "a-reg imm" and write EFLAGS only. +class BinOpAI_F<bits<8> o, string m, X86TypeInfo t, Register areg, string args> + : BinOpAI<o, m, t, areg, args>, DefEFLAGS; + +// BinOpAI_AF - Instructions that read "a-reg imm" and write a-reg/EFLAGS. +class BinOpAI_AF<bits<8> o, string m, X86TypeInfo t, Register areg, + string args> : BinOpAI<o, m, t, areg, args> { + let Defs = [areg, EFLAGS]; +} +// BinOpAIF_AF - Instructions that read "a-reg imm", write a-reg and read/write +// EFLAGS. +class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg, + string args> : BinOpAI<o, m, t, areg, args> { + let Uses = [areg, EFLAGS]; + let Defs = [areg, EFLAGS]; + let SchedRW = [WriteADC]; +} + +// UnaryOpR - Instructions that read "reg". +class UnaryOpR<bits<8> o, Format f, string m, string args, X86TypeInfo t, + dag out, list<dag> p> + : ITy<o, f, t, out, (ins t.RegClass:$src1), m, args, p>, Sched<[WriteALU]>; +// UnaryOpR_R - Instructions that read "reg" and write "reg". +class UnaryOpR_R<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node, bit ndd = 0> + : UnaryOpR<o, f, m, !if(!eq(ndd, 0), unaryop_args, unaryop_ndd_args), t, + (outs t.RegClass:$dst), + [(set t.RegClass:$dst, (node t.RegClass:$src1))]>, NDD<ndd>; +// UnaryOpR_RF - Instructions that read "reg" and write "reg"/EFLAGS. +class UnaryOpR_RF<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node, bit ndd = 0> + : UnaryOpR<o, f, m, !if(!eq(ndd, 0), unaryop_args, unaryop_ndd_args), t, + (outs t.RegClass:$dst), + [(set t.RegClass:$dst, (node t.RegClass:$src1)), + (implicit EFLAGS)]>, DefEFLAGS, NDD<ndd>; + +// UnaryOpM - Instructions that read "[mem]". +class UnaryOpM<bits<8> o, Format f, string m, string args, X86TypeInfo t, + dag out, list<dag> p> + : ITy<o, f, t, out, (ins t.MemOperand:$src1), m, args, p> { + let mayLoad = 1; +} +// UnaryOpM_R - Instructions that read "[mem]" and writes "reg". +class UnaryOpM_R<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node> + : UnaryOpM<o, f, m, unaryop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, (node (t.LoadNode addr:$src1)))]>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>; +// UnaryOpM_RF - Instructions that read "[mem]" and writes "reg"/EFLAGS. +class UnaryOpM_RF<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node> + : UnaryOpM<o, f, m, unaryop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1)))]>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>; +// UnaryOpM_M - Instructions that read "[mem]" and writes "[mem]". +class UnaryOpM_M<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node> + : UnaryOpM<o, f, m, unaryop_args, t, (outs), + [(store (node (t.LoadNode addr:$src1)), addr:$src1)]>, + Sched<[WriteALURMW]>{ + let mayStore = 1; +} +// UnaryOpM_MF - Instructions that read "[mem]" and writes "[mem]"/EFLAGS. +class UnaryOpM_MF<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node> + : UnaryOpM<o, f, m, unaryop_args, t, (outs), + [(store (node (t.LoadNode addr:$src1)), addr:$src1), + (implicit EFLAGS)]>, Sched<[WriteALURMW]>, DefEFLAGS { + let mayStore = 1; +} diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVecCompiler.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVecCompiler.td index 70bd77bba03a..bbd19cf8d5b2 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVecCompiler.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVecCompiler.td @@ -130,6 +130,9 @@ let Predicates = [HasAVX, NoVLX] in { defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, sub_xmm>; } +let Predicates = [HasAVXNECONVERT, NoVLX] in + defm : subvec_zero_lowering<"DQA", VR128, v16bf16, v8bf16, sub_xmm>; + let Predicates = [HasVLX] in { defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, sub_xmm>; defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, sub_xmm>; @@ -175,6 +178,12 @@ let Predicates = [HasFP16, HasVLX] in { defm : subvec_zero_lowering<"APSZ256", VR256X, v32f16, v16f16, sub_ymm>; } +let Predicates = [HasBF16, HasVLX] in { + defm : subvec_zero_lowering<"APSZ128", VR128X, v16bf16, v8bf16, sub_xmm>; + defm : subvec_zero_lowering<"APSZ128", VR128X, v32bf16, v8bf16, sub_xmm>; + defm : subvec_zero_lowering<"APSZ256", VR256X, v32bf16, v16bf16, sub_ymm>; +} + class maskzeroupper<ValueType vt, RegisterClass RC> : PatLeaf<(vt RC:$src), [{ return isMaskZeroExtended(N); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 8a04987e768a..49631f38017a 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1459,6 +1459,15 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( Args, CxtI); } +InstructionCost +X86TTIImpl::getAltInstrCost(VectorType *VecTy, unsigned Opcode0, + unsigned Opcode1, const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind) const { + if (isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) + return TTI::TCC_Basic; + return InstructionCost::getInvalid(); +} + InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, ArrayRef<int> Mask, @@ -3724,10 +3733,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::BITREVERSE, MVT::v8i16, { 8, 13, 10, 16 } }, { ISD::BITREVERSE, MVT::v32i8, { 13, 15, 17, 26 } }, // 2 x 128-bit Op + extract/insert { ISD::BITREVERSE, MVT::v16i8, { 7, 7, 9, 13 } }, - { ISD::BSWAP, MVT::v4i64, { 5, 7, 5, 10 } }, - { ISD::BSWAP, MVT::v2i64, { 2, 3, 1, 3 } }, - { ISD::BSWAP, MVT::v8i32, { 5, 7, 5, 10 } }, - { ISD::BSWAP, MVT::v4i32, { 2, 3, 1, 3 } }, + { ISD::BSWAP, MVT::v4i64, { 5, 6, 5, 10 } }, + { ISD::BSWAP, MVT::v2i64, { 2, 2, 1, 3 } }, + { ISD::BSWAP, MVT::v8i32, { 5, 6, 5, 10 } }, + { ISD::BSWAP, MVT::v4i32, { 2, 2, 1, 3 } }, { ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } }, { ISD::BSWAP, MVT::v8i16, { 2, 2, 1, 3 } }, { ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } }, // 2 x 128-bit Op + extract/insert @@ -3804,6 +3813,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } }, // sqrtpd }; static const CostKindTblEntry SLMCostTbl[] = { + { ISD::BSWAP, MVT::v2i64, { 5, 5, 1, 5 } }, + { ISD::BSWAP, MVT::v4i32, { 5, 5, 1, 5 } }, + { ISD::BSWAP, MVT::v8i16, { 5, 5, 1, 5 } }, { ISD::FSQRT, MVT::f32, { 20, 20, 1, 1 } }, // sqrtss { ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } }, // sqrtps { ISD::FSQRT, MVT::f64, { 35, 35, 1, 1 } }, // sqrtsd @@ -3842,9 +3854,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::BITREVERSE, MVT::v4i32, { 16, 20, 11, 21 } }, { ISD::BITREVERSE, MVT::v8i16, { 16, 20, 11, 21 } }, { ISD::BITREVERSE, MVT::v16i8, { 11, 12, 10, 16 } }, - { ISD::BSWAP, MVT::v2i64, { 5, 5, 1, 5 } }, - { ISD::BSWAP, MVT::v4i32, { 5, 5, 1, 5 } }, - { ISD::BSWAP, MVT::v8i16, { 5, 5, 1, 5 } }, + { ISD::BSWAP, MVT::v2i64, { 2, 3, 1, 5 } }, + { ISD::BSWAP, MVT::v4i32, { 2, 3, 1, 5 } }, + { ISD::BSWAP, MVT::v8i16, { 2, 3, 1, 5 } }, { ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } }, { ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } }, { ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } }, diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h index 0fa0d240a548..07a3fff4f84b 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -140,6 +140,11 @@ public: TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, ArrayRef<const Value *> Args = ArrayRef<const Value *>(), const Instruction *CxtI = nullptr); + InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, + unsigned Opcode1, + const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind) const; + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 1288597fc6b0..05003ec304ad 100644 --- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -250,7 +250,7 @@ bool XCoreDAGToDAGISel::tryBRIND(SDNode *N) { SDValue Addr = N->getOperand(1); if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN) return false; - unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue(); + unsigned IntNo = Addr->getConstantOperandVal(1); if (IntNo != Intrinsic::xcore_checkevent) return false; SDValue nextAddr = Addr->getOperand(2); diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 7736adab19e8..18feeaadb03c 100644 --- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -767,7 +767,7 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, // An index of zero corresponds to the current function's frame address. // An index of one to the parent's frame address, and so on. // Depths > 0 not supported yet! - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) + if (Op.getConstantOperandVal(0) > 0) return SDValue(); MachineFunction &MF = DAG.getMachineFunction(); @@ -783,7 +783,7 @@ LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { // An index of zero corresponds to the current function's return address. // An index of one to the parent's return address, and so on. // Depths > 0 not supported yet! - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) + if (Op.getConstantOperandVal(0) > 0) return SDValue(); MachineFunction &MF = DAG.getMachineFunction(); @@ -905,7 +905,7 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue XCoreTargetLowering:: LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); switch (IntNo) { case Intrinsic::xcore_crc8: EVT VT = Op.getValueType(); @@ -1497,7 +1497,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case ISD::INTRINSIC_VOID: - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { case Intrinsic::xcore_outt: case Intrinsic::xcore_outct: case Intrinsic::xcore_chkct: { @@ -1733,30 +1733,30 @@ void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - switch (IntNo) { - case Intrinsic::xcore_getts: - // High bits are known to be zero. - Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), - Known.getBitWidth() - 16); - break; - case Intrinsic::xcore_int: - case Intrinsic::xcore_inct: - // High bits are known to be zero. - Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), - Known.getBitWidth() - 8); - break; - case Intrinsic::xcore_testct: - // Result is either 0 or 1. - Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), - Known.getBitWidth() - 1); - break; - case Intrinsic::xcore_testwct: - // Result is in the range 0 - 4. - Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), - Known.getBitWidth() - 3); - break; - } + unsigned IntNo = Op.getConstantOperandVal(1); + switch (IntNo) { + case Intrinsic::xcore_getts: + // High bits are known to be zero. + Known.Zero = + APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 16); + break; + case Intrinsic::xcore_int: + case Intrinsic::xcore_inct: + // High bits are known to be zero. + Known.Zero = + APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 8); + break; + case Intrinsic::xcore_testct: + // Result is either 0 or 1. + Known.Zero = + APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 1); + break; + case Intrinsic::xcore_testwct: + // Result is in the range 0 - 4. + Known.Zero = + APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 3); + break; + } } break; } diff --git a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp index 27d168020ce6..ce640f5b8d45 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp @@ -348,11 +348,7 @@ StringRef ARM::getArchExtName(uint64_t ArchExtKind) { } static bool stripNegationPrefix(StringRef &Name) { - if (Name.starts_with("no")) { - Name = Name.substr(2); - return true; - } - return false; + return Name.consume_front("no"); } StringRef ARM::getArchExtFeature(StringRef ArchExt) { diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp index 11c5000acc07..2e08c7b12d9d 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp @@ -1160,7 +1160,7 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 25: CPU = "znver3"; *Type = X86::AMDFAM19H; - if ((Model >= 0x00 && Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) || + if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) || (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || (Model >= 0x50 && Model <= 0x5f)) { // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp index d475650c2d18..e93502187b54 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp @@ -1208,8 +1208,7 @@ static VersionTuple parseVersionFromName(StringRef Name) { VersionTuple Triple::getEnvironmentVersion() const { StringRef EnvironmentName = getEnvironmentName(); StringRef EnvironmentTypeName = getEnvironmentTypeName(getEnvironment()); - if (EnvironmentName.starts_with(EnvironmentTypeName)) - EnvironmentName = EnvironmentName.substr(EnvironmentTypeName.size()); + EnvironmentName.consume_front(EnvironmentTypeName); return parseVersionFromName(EnvironmentName); } diff --git a/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp index 085554f18b2b..d46ff07ec734 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp @@ -162,8 +162,6 @@ constexpr FeatureBitset FeaturesAlderlake = constexpr FeatureBitset FeaturesSierraforest = FeaturesAlderlake | FeatureCMPCCXADD | FeatureAVXIFMA | FeatureUINTR | FeatureENQCMD | FeatureAVXNECONVERT | FeatureAVXVNNIINT8; -constexpr FeatureBitset FeaturesGrandridge = - FeaturesSierraforest | FeatureRAOINT; constexpr FeatureBitset FeaturesArrowlakeS = FeaturesSierraforest | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4; constexpr FeatureBitset FeaturesPantherlake = @@ -369,7 +367,7 @@ constexpr ProcInfo Processors[] = { // Sierraforest microarchitecture based processors. { {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false }, // Grandridge microarchitecture based processors. - { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesGrandridge, 'p', false }, + { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesSierraforest, 'p', false }, // Granite Rapids microarchitecture based processors. { {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512BF16, FeaturesGraniteRapids, 'n', false }, // Granite Rapids D microarchitecture based processors. diff --git a/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp b/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp index 2f82bc03480b..aea772dbc4be 100644 --- a/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp +++ b/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp @@ -201,8 +201,9 @@ Expected<StubT> getRequiredValue( template <typename JsonT, typename StubT = JsonT> Expected<StubT> getRequiredValue( TBDKey Key, const Object *Obj, - std::function<std::optional<JsonT>(const Object *, StringRef)> GetValue, - StubT DefaultValue, std::function<std::optional<StubT>(JsonT)> Validate) { + std::function<std::optional<JsonT>(const Object *, StringRef)> const + GetValue, + StubT DefaultValue, function_ref<std::optional<StubT>(JsonT)> Validate) { std::optional<JsonT> Val = GetValue(Obj, Keys[Key]); if (!Val) return DefaultValue; @@ -215,7 +216,7 @@ Expected<StubT> getRequiredValue( } Error collectFromArray(TBDKey Key, const Object *Obj, - std::function<void(StringRef)> Append, + function_ref<void(StringRef)> Append, bool IsRequired = false) { const auto *Values = Obj->getArray(Keys[Key]); if (!Values) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index f37b4dc938d3..529f7309a1a2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2951,9 +2951,11 @@ void coro::salvageDebugInfo( // dbg.declare does. if (isa<DbgDeclareInst>(DVI)) { std::optional<BasicBlock::iterator> InsertPt; - if (auto *I = dyn_cast<Instruction>(Storage)) + if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); - else if (isa<Argument>(Storage)) + if (!OptimizeFrame && I->getDebugLoc()) + DVI.setDebugLoc(I->getDebugLoc()); + } else if (isa<Argument>(Storage)) InsertPt = F->getEntryBlock().begin(); if (InsertPt) DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 7c277518b21d..7ebf265e17ba 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -76,6 +76,7 @@ STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); STATISTIC(NumNoAlias, "Number of function returns marked noalias"); STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); +STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef"); STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); STATISTIC(NumNoFree, "Number of functions marked as nofree"); @@ -1279,6 +1280,45 @@ static void addNonNullAttrs(const SCCNodeSet &SCCNodes, } } +/// Deduce noundef attributes for the SCC. +static void addNoUndefAttrs(const SCCNodeSet &SCCNodes, + SmallSet<Function *, 8> &Changed) { + // Check each function in turn, determining which functions return noundef + // values. + for (Function *F : SCCNodes) { + // Already noundef. + if (F->getAttributes().hasRetAttr(Attribute::NoUndef)) + continue; + + // We can infer and propagate function attributes only when we know that the + // definition we'll get at link time is *exactly* the definition we see now. + // For more details, see GlobalValue::mayBeDerefined. + if (!F->hasExactDefinition()) + return; + + // MemorySanitizer assumes that the definition and declaration of a + // function will be consistent. A function with sanitize_memory attribute + // should be skipped from inference. + if (F->hasFnAttribute(Attribute::SanitizeMemory)) + continue; + + if (F->getReturnType()->isVoidTy()) + continue; + + if (all_of(*F, [](BasicBlock &BB) { + if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { + // TODO: perform context-sensitive analysis? + return isGuaranteedNotToBeUndefOrPoison(Ret->getReturnValue()); + } + return true; + })) { + F->addRetAttr(Attribute::NoUndef); + ++NumNoUndefReturn; + Changed.insert(F); + } + } +} + namespace { /// Collects a set of attribute inference requests and performs them all in one @@ -1629,7 +1669,10 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, for (auto &I : BB.instructionsWithoutDebug()) if (auto *CB = dyn_cast<CallBase>(&I)) { Function *Callee = CB->getCalledFunction(); - if (!Callee || Callee == F || !Callee->doesNotRecurse()) + if (!Callee || Callee == F || + (!Callee->doesNotRecurse() && + !(Callee->isDeclaration() && + Callee->hasFnAttribute(Attribute::NoCallback)))) // Function calls a potentially recursive function. return; } @@ -1785,6 +1828,7 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, inferConvergent(Nodes.SCCNodes, Changed); addNoReturnAttrs(Nodes.SCCNodes, Changed); addWillReturn(Nodes.SCCNodes, Changed); + addNoUndefAttrs(Nodes.SCCNodes, Changed); // If we have no external nodes participating in the SCC, we can deduce some // more precise attributes as well. diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 719a2678fc18..556fde37efeb 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1685,8 +1685,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { assert(NotLHS != nullptr && NotRHS != nullptr && "isFreeToInvert desynced with getFreelyInverted"); Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS); - return BinaryOperator::CreateSub(ConstantInt::get(RHS->getType(), -2), - LHSPlusRHS); + return BinaryOperator::CreateSub( + ConstantInt::getSigned(RHS->getType(), -2), LHSPlusRHS); } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 63b1e0f64a88..c03f50d75814 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3513,9 +3513,13 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(Op0, C); // ((B | C) & A) | B -> B | (A & C) - if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) + if (match(Op0, m_c_And(m_c_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C)); + // B | ((B | C) & A) -> B | (A & C) + if (match(Op1, m_c_And(m_c_Or(m_Specific(Op0), m_Value(C)), m_Value(A)))) + return BinaryOperator::CreateOr(Op0, Builder.CreateAnd(A, C)); + if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this)) return DeMorgan; @@ -3872,6 +3876,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { } } + // (X & C1) | C2 -> X & (C1 | C2) iff (X & C2) == C2 + if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C1)))) && + match(Op1, m_APInt(C2))) { + KnownBits KnownX = computeKnownBits(X, /*Depth*/ 0, &I); + if ((KnownX.One & *C2) == *C2) + return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2)); + } + return nullptr; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 3b7fe7fa2266..43d4496571be 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3850,6 +3850,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { if (Callee->hasFnAttribute("thunk")) return false; + // If this is a call to a naked function, the assembly might be + // using an argument, or otherwise rely on the frame layout, + // the function prototype will mismatch. + if (Callee->hasFnAttribute(Attribute::Naked)) + return false; + // If this is a musttail call, the callee's prototype must match the caller's // prototype with the exception of pointee types. The code below doesn't // implement that, so we can't do this transform. diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 289976718e52..3875e59c3ede 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -111,8 +111,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() || - GV->getValueType() != GEP->getSourceElementType() || - !GV->isConstant() || !GV->hasDefinitiveInitializer()) + GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() || + !GV->hasDefinitiveInitializer()) return nullptr; Constant *Init = GV->getInitializer(); @@ -128,8 +128,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( // the simple index into a single-dimensional array. // // Require: GEP GV, 0, i {{, constant indices}} - if (GEP->getNumOperands() < 3 || - !isa<ConstantInt>(GEP->getOperand(1)) || + if (GEP->getNumOperands() < 3 || !isa<ConstantInt>(GEP->getOperand(1)) || !cast<ConstantInt>(GEP->getOperand(1))->isZero() || isa<Constant>(GEP->getOperand(2))) return nullptr; @@ -142,15 +141,18 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Type *EltTy = Init->getType()->getArrayElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (!Idx) return nullptr; // Variable index. + if (!Idx) + return nullptr; // Variable index. uint64_t IdxVal = Idx->getZExtValue(); - if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. + if ((unsigned)IdxVal != IdxVal) + return nullptr; // Too large array index. if (StructType *STy = dyn_cast<StructType>(EltTy)) EltTy = STy->getElementType(IdxVal); else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { - if (IdxVal >= ATy->getNumElements()) return nullptr; + if (IdxVal >= ATy->getNumElements()) + return nullptr; EltTy = ATy->getElementType(); } else { return nullptr; // Unknown type. @@ -191,7 +193,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { Constant *Elt = Init->getAggregateElement(i); - if (!Elt) return nullptr; + if (!Elt) + return nullptr; // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) { @@ -214,16 +217,17 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( if (isa<UndefValue>(C)) { // Extend range state machines to cover this element in case there is an // undef in the middle of the range. - if (TrueRangeEnd == (int)i-1) + if (TrueRangeEnd == (int)i - 1) TrueRangeEnd = i; - if (FalseRangeEnd == (int)i-1) + if (FalseRangeEnd == (int)i - 1) FalseRangeEnd = i; continue; } // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. - if (!isa<ConstantInt>(C)) return nullptr; + if (!isa<ConstantInt>(C)) + return nullptr; // Otherwise, we know if the comparison is true or false for this element, // update our state machines. @@ -233,7 +237,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( if (IsTrueForElt) { // Update the TrueElement state machine. if (FirstTrueElement == Undefined) - FirstTrueElement = TrueRangeEnd = i; // First true element. + FirstTrueElement = TrueRangeEnd = i; // First true element. else { // Update double-compare state machine. if (SecondTrueElement == Undefined) @@ -242,7 +246,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( SecondTrueElement = Overdefined; // Update range state machine. - if (TrueRangeEnd == (int)i-1) + if (TrueRangeEnd == (int)i - 1) TrueRangeEnd = i; else TrueRangeEnd = Overdefined; @@ -259,7 +263,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( SecondFalseElement = Overdefined; // Update range state machine. - if (FalseRangeEnd == (int)i-1) + if (FalseRangeEnd == (int)i - 1) FalseRangeEnd = i; else FalseRangeEnd = Overdefined; @@ -348,7 +352,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( // False for two elements -> 'i != 47 & i != 72'. Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx); - Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); + Value *SecondFalseIdx = + ConstantInt::get(Idx->getType(), SecondFalseElement); Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } @@ -365,8 +370,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Idx = Builder.CreateAdd(Idx, Offs); } - Value *End = ConstantInt::get(Idx->getType(), - TrueRangeEnd-FirstTrueElement+1); + Value *End = + ConstantInt::get(Idx->getType(), TrueRangeEnd - FirstTrueElement + 1); return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); } @@ -380,8 +385,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Idx = Builder.CreateAdd(Idx, Offs); } - Value *End = ConstantInt::get(Idx->getType(), - FalseRangeEnd-FirstFalseElement); + Value *End = + ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement); return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } @@ -4624,27 +4629,35 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, } bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; - if (BO0 && isa<OverflowingBinaryOperator>(BO0)) - NoOp0WrapProblem = - ICmpInst::isEquality(Pred) || - (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || - (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); - if (BO1 && isa<OverflowingBinaryOperator>(BO1)) - NoOp1WrapProblem = - ICmpInst::isEquality(Pred) || - (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || - (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); - + bool Op0HasNUW = false, Op1HasNUW = false; + bool Op0HasNSW = false, Op1HasNSW = false; // Analyze the case when either Op0 or Op1 is an add instruction. // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). + auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred, + bool &HasNSW, bool &HasNUW) -> bool { + if (isa<OverflowingBinaryOperator>(BO)) { + HasNUW = BO.hasNoUnsignedWrap(); + HasNSW = BO.hasNoSignedWrap(); + return ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && HasNUW) || + (CmpInst::isSigned(Pred) && HasNSW); + } else if (BO.getOpcode() == Instruction::Or) { + HasNUW = true; + HasNSW = true; + return true; + } else { + return false; + } + }; Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; - if (BO0 && BO0->getOpcode() == Instruction::Add) { - A = BO0->getOperand(0); - B = BO0->getOperand(1); + + if (BO0) { + match(BO0, m_AddLike(m_Value(A), m_Value(B))); + NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW); } - if (BO1 && BO1->getOpcode() == Instruction::Add) { - C = BO1->getOperand(0); - D = BO1->getOperand(1); + if (BO1) { + match(BO1, m_AddLike(m_Value(C), m_Value(D))); + NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW); } // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow. @@ -4764,17 +4777,15 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, APInt AP2Abs = AP2->abs(); if (AP1Abs.uge(AP2Abs)) { APInt Diff = *AP1 - *AP2; - bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1); - bool HasNSW = BO0->hasNoSignedWrap(); Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff); - Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW); + Value *NewAdd = Builder.CreateAdd( + A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW); return new ICmpInst(Pred, NewAdd, C); } else { APInt Diff = *AP2 - *AP1; - bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2); - bool HasNSW = BO1->hasNoSignedWrap(); Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff); - Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW); + Value *NewAdd = Builder.CreateAdd( + C, C3, "", Op1HasNUW && Diff.ule(*AP2), Op1HasNSW); return new ICmpInst(Pred, A, NewAdd); } } @@ -4868,16 +4879,14 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); // if Z != 0 and nsw(X * Z) and nsw(Y * Z) // X * Z eq/ne Y * Z -> X eq/ne Y - if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() && - BO1->hasNoSignedWrap()) + if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW) return new ICmpInst(Pred, X, Y); } else NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); // If Z != 0 and nuw(X * Z) and nuw(Y * Z) // X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y - if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() && - BO1->hasNoUnsignedWrap()) + if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW) return new ICmpInst(Pred, X, Y); } } @@ -4966,7 +4975,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); case Instruction::SDiv: - if (!I.isEquality() || !BO0->isExact() || !BO1->isExact()) + if (!(I.isEquality() || match(BO0->getOperand(1), m_NonNegative())) || + !BO0->isExact() || !BO1->isExact()) break; return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); @@ -4976,8 +4986,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); case Instruction::Shl: { - bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); - bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); + bool NUW = Op0HasNUW && Op1HasNUW; + bool NSW = Op0HasNSW && Op1HasNSW; if (!NUW && !NSW) break; if (!NSW && I.isSigned()) @@ -5029,10 +5039,10 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, } /// Fold icmp Pred min|max(X, Y), Z. -Instruction * -InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I, - MinMaxIntrinsic *MinMax, Value *Z, - ICmpInst::Predicate Pred) { +Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I, + MinMaxIntrinsic *MinMax, + Value *Z, + ICmpInst::Predicate Pred) { Value *X = MinMax->getLHS(); Value *Y = MinMax->getRHS(); if (ICmpInst::isSigned(Pred) && !MinMax->isSigned()) @@ -5161,24 +5171,6 @@ InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I, return nullptr; } -Instruction *InstCombinerImpl::foldICmpWithMinMax(ICmpInst &Cmp) { - ICmpInst::Predicate Pred = Cmp.getPredicate(); - Value *Lhs = Cmp.getOperand(0); - Value *Rhs = Cmp.getOperand(1); - - if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Lhs)) { - if (Instruction *Res = foldICmpWithMinMaxImpl(Cmp, MinMax, Rhs, Pred)) - return Res; - } - - if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Rhs)) { - if (Instruction *Res = foldICmpWithMinMaxImpl( - Cmp, MinMax, Lhs, ICmpInst::getSwappedPredicate(Pred))) - return Res; - } - - return nullptr; -} // Canonicalize checking for a power-of-2-or-zero value: static Instruction *foldICmpPow2Test(ICmpInst &I, @@ -6843,6 +6835,34 @@ static Instruction *foldReductionIdiom(ICmpInst &I, return nullptr; } +// This helper will be called with icmp operands in both orders. +Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred, + Value *Op0, Value *Op1, + ICmpInst &CxtI) { + // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'. + if (auto *GEP = dyn_cast<GEPOperator>(Op0)) + if (Instruction *NI = foldGEPICmp(GEP, Op1, Pred, CxtI)) + return NI; + + if (auto *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI)) + return NI; + + if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0)) + if (Instruction *Res = foldICmpWithMinMax(CxtI, MinMax, Op1, Pred)) + return Res; + + { + Value *X; + const APInt *C; + // icmp X+Cst, X + if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X) + return foldICmpAddOpConst(X, *C, Pred); + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { bool Changed = false; const SimplifyQuery Q = SQ.getWithInstruction(&I); @@ -6966,20 +6986,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpInstWithConstantNotInt(I)) return Res; - // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'. - if (auto *GEP = dyn_cast<GEPOperator>(Op0)) - if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I)) - return NI; - if (auto *GEP = dyn_cast<GEPOperator>(Op1)) - if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I)) - return NI; - - if (auto *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *NI = foldSelectICmp(I.getPredicate(), SI, Op1, I)) - return NI; - if (auto *SI = dyn_cast<SelectInst>(Op1)) - if (Instruction *NI = foldSelectICmp(I.getSwappedPredicate(), SI, Op0, I)) - return NI; + if (Instruction *Res = foldICmpCommutative(I.getPredicate(), Op0, Op1, I)) + return Res; + if (Instruction *Res = + foldICmpCommutative(I.getSwappedPredicate(), Op1, Op0, I)) + return Res; // In case of a comparison with two select instructions having the same // condition, check whether one of the resulting branches can be simplified. @@ -7030,9 +7041,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *R = foldICmpWithCastOp(I)) return R; - if (Instruction *Res = foldICmpWithMinMax(I)) - return Res; - { Value *X, *Y; // Transform (X & ~Y) == 0 --> (X & Y) != 0 @@ -7134,18 +7142,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { !ACXI->isWeak()) return ExtractValueInst::Create(ACXI, 1); - { - Value *X; - const APInt *C; - // icmp X+Cst, X - if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X) - return foldICmpAddOpConst(X, *C, I.getPredicate()); - - // icmp X, X+Cst - if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X) - return foldICmpAddOpConst(X, *C, I.getSwappedPredicate()); - } - if (Instruction *Res = foldICmpWithHighBitMask(I, Builder)) return Res; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 9e76a0cf17b1..bdaf7550b4b4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -648,9 +648,8 @@ public: Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp, const APInt &C); Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ); - Instruction *foldICmpWithMinMaxImpl(Instruction &I, MinMaxIntrinsic *MinMax, - Value *Z, ICmpInst::Predicate Pred); - Instruction *foldICmpWithMinMax(ICmpInst &Cmp); + Instruction *foldICmpWithMinMax(Instruction &I, MinMaxIntrinsic *MinMax, + Value *Z, ICmpInst::Predicate Pred); Instruction *foldICmpEquality(ICmpInst &Cmp); Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I); Instruction *foldSignBitTest(ICmpInst &I); @@ -708,6 +707,8 @@ public: const APInt &C); Instruction *foldICmpBitCast(ICmpInst &Cmp); Instruction *foldICmpWithTrunc(ICmpInst &Cmp); + Instruction *foldICmpCommutative(ICmpInst::Predicate Pred, Value *Op0, + Value *Op1, ICmpInst &CxtI); // Helpers of visitSelectInst(). Instruction *foldSelectOfBools(SelectInst &SI); diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 20bf00344b14..ab55f235920a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1171,14 +1171,15 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, return nullptr; } -static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, - InstCombinerImpl &IC) { +static Value *canonicalizeSPF(ICmpInst &Cmp, Value *TrueVal, Value *FalseVal, + InstCombinerImpl &IC) { Value *LHS, *RHS; // TODO: What to do with pointer min/max patterns? - if (!Sel.getType()->isIntOrIntVectorTy()) + if (!TrueVal->getType()->isIntOrIntVectorTy()) return nullptr; - SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor; + SelectPatternFlavor SPF = + matchDecomposedSelectPattern(&Cmp, TrueVal, FalseVal, LHS, RHS).Flavor; if (SPF == SelectPatternFlavor::SPF_ABS || SPF == SelectPatternFlavor::SPF_NABS) { if (!Cmp.hasOneUse() && !RHS->hasOneUse()) @@ -1188,13 +1189,13 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && match(RHS, m_NSWNeg(m_Specific(LHS))); Constant *IntMinIsPoisonC = - ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); + ConstantInt::get(Type::getInt1Ty(Cmp.getContext()), IntMinIsPoison); Instruction *Abs = IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); if (SPF == SelectPatternFlavor::SPF_NABS) - return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! - return IC.replaceInstUsesWith(Sel, Abs); + return IC.Builder.CreateNeg(Abs); // Always without NSW flag! + return Abs; } if (SelectPatternResult::isMinOrMax(SPF)) { @@ -1215,8 +1216,7 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, default: llvm_unreachable("Unexpected SPF"); } - return IC.replaceInstUsesWith( - Sel, IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS)); + return IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS); } return nullptr; @@ -1677,8 +1677,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI)) return NewSel; - if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this)) - return NewSPF; + if (Value *V = + canonicalizeSPF(*ICI, SI.getTrueValue(), SI.getFalseValue(), *this)) + return replaceInstUsesWith(SI, V); if (Value *V = foldSelectInstWithICmpConst(SI, ICI, Builder)) return replaceInstUsesWith(SI, V); @@ -2363,6 +2364,9 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel, Value *FVal = Sel.getFalseValue(); Type *SelType = Sel.getType(); + if (ICmpInst::makeCmpResultType(TVal->getType()) != Cond->getType()) + return nullptr; + // Match select ?, TC, FC where the constants are equal but negated. // TODO: Generalize to handle a negated variable operand? const APFloat *TC, *FC; @@ -3790,5 +3794,50 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *I = foldBitCeil(SI, Builder)) return I; + // Fold: + // (select A && B, T, F) -> (select A, (select B, T, F), F) + // (select A || B, T, F) -> (select A, T, (select B, T, F)) + // if (select B, T, F) is foldable. + // TODO: preserve FMF flags + auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A, + Value *B) -> Instruction * { + if (Value *V = simplifySelectInst(B, TrueVal, FalseVal, + SQ.getWithInstruction(&SI))) + return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V); + + // Is (select B, T, F) a SPF? + if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) { + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(B)) + if (Value *V = canonicalizeSPF(*Cmp, TrueVal, FalseVal, *this)) + return SelectInst::Create(A, IsAnd ? V : TrueVal, + IsAnd ? FalseVal : V); + } + + return nullptr; + }; + + Value *LHS, *RHS; + if (match(CondVal, m_And(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS)) + return I; + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, RHS, LHS)) + return I; + } else if (match(CondVal, m_Or(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS)) + return I; + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, RHS, LHS)) + return I; + } else { + // We cannot swap the operands of logical and/or. + // TODO: Can we swap the operands by inserting a freeze? + if (match(CondVal, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS)) + return I; + } else if (match(CondVal, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS)) + return I; + } + } + return nullptr; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 7f5a7b666903..351fc3b0174f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2469,31 +2469,43 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { DL.getIndexSizeInBits(AS)) { uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue(); - bool Matched = false; - uint64_t C; - Value *V = nullptr; if (TyAllocSize == 1) { - V = GEP.getOperand(1); - Matched = true; - } else if (match(GEP.getOperand(1), - m_AShr(m_Value(V), m_ConstantInt(C)))) { - if (TyAllocSize == 1ULL << C) - Matched = true; - } else if (match(GEP.getOperand(1), - m_SDiv(m_Value(V), m_ConstantInt(C)))) { - if (TyAllocSize == C) - Matched = true; + // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), + // but only if the result pointer is only used as if it were an integer, + // or both point to the same underlying object (otherwise provenance is + // not necessarily retained). + Value *X = GEP.getPointerOperand(); + Value *Y; + if (match(GEP.getOperand(1), + m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) && + GEPType == Y->getType()) { + bool HasSameUnderlyingObject = + getUnderlyingObject(X) == getUnderlyingObject(Y); + bool Changed = false; + GEP.replaceUsesWithIf(Y, [&](Use &U) { + bool ShouldReplace = HasSameUnderlyingObject || + isa<ICmpInst>(U.getUser()) || + isa<PtrToIntInst>(U.getUser()); + Changed |= ShouldReplace; + return ShouldReplace; + }); + return Changed ? &GEP : nullptr; + } + } else { + // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V) + Value *V; + if ((has_single_bit(TyAllocSize) && + match(GEP.getOperand(1), + m_Exact(m_AShr(m_Value(V), + m_SpecificInt(countr_zero(TyAllocSize)))))) || + match(GEP.getOperand(1), + m_Exact(m_SDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) { + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + Builder.getInt8Ty(), GEP.getPointerOperand(), V); + NewGEP->setIsInBounds(GEP.isInBounds()); + return NewGEP; + } } - - // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but - // only if both point to the same underlying object (otherwise provenance - // is not necessarily retained). - Value *Y; - Value *X = GEP.getOperand(0); - if (Matched && - match(V, m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) && - getUnderlyingObject(X) == getUnderlyingObject(Y)) - return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType); } } // We do not handle pointer-vector geps here. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 899d7e0a11e6..06c87bd6dc37 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -366,6 +366,13 @@ struct Decomposition { append_range(Vars, Other.Vars); } + void sub(const Decomposition &Other) { + Decomposition Tmp = Other; + Tmp.mul(-1); + add(Tmp.Offset); + append_range(Vars, Tmp.Vars); + } + void mul(int64_t Factor) { Offset = multiplyWithOverflow(Offset, Factor); for (auto &Var : Vars) @@ -569,10 +576,12 @@ static Decomposition decompose(Value *V, return Result; } - if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI)) - return {-1 * CI->getSExtValue(), {{1, Op0}}}; - if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) - return {0, {{1, Op0}, {-1, Op1}}}; + if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) { + auto ResA = decompose(Op0, Preconditions, IsSigned, DL); + auto ResB = decompose(Op1, Preconditions, IsSigned, DL); + ResA.sub(ResB); + return ResA; + } return {V, IsKnownNonNegative}; } @@ -1010,22 +1019,14 @@ void State::addInfoFor(BasicBlock &BB) { continue; } - if (match(&I, m_Intrinsic<Intrinsic::ssub_with_overflow>())) { - WorkList.push_back( - FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I))); - continue; - } - - if (isa<MinMaxIntrinsic>(&I)) { - WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I)); - continue; - } - - Value *A, *B; - CmpInst::Predicate Pred; - // For now, just handle assumes with a single compare as condition. - if (match(&I, m_Intrinsic<Intrinsic::assume>( - m_ICmp(Pred, m_Value(A), m_Value(B))))) { + auto *II = dyn_cast<IntrinsicInst>(&I); + Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic; + switch (ID) { + case Intrinsic::assume: { + Value *A, *B; + CmpInst::Predicate Pred; + if (!match(I.getOperand(0), m_ICmp(Pred, m_Value(A), m_Value(B)))) + break; if (GuaranteedToExecute) { // The assume is guaranteed to execute when BB is entered, hence Cond // holds on entry to BB. @@ -1035,7 +1036,23 @@ void State::addInfoFor(BasicBlock &BB) { WorkList.emplace_back( FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I)); } + break; + } + // Enqueue ssub_with_overflow for simplification. + case Intrinsic::ssub_with_overflow: + WorkList.push_back( + FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I))); + break; + // Enqueue the intrinsics to add extra info. + case Intrinsic::abs: + case Intrinsic::umin: + case Intrinsic::umax: + case Intrinsic::smin: + case Intrinsic::smax: + WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I)); + break; } + GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I); } @@ -1693,6 +1710,13 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, ICmpInst::Predicate Pred; if (!CB.isConditionFact()) { + Value *X; + if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) { + // TODO: Add CB.Inst >= 0 fact. + AddFact(CmpInst::ICMP_SGE, CB.Inst, X); + continue; + } + if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) { Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate()); AddFact(Pred, MinMax, MinMax->getLHS()); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp index fb4d82885377..282c44563466 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp @@ -29,9 +29,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" @@ -66,19 +67,6 @@ class CanonicalizeFreezeInLoopsImpl { ScalarEvolution &SE; DominatorTree &DT; - struct FrozenIndPHIInfo { - // A freeze instruction that uses an induction phi - FreezeInst *FI = nullptr; - // The induction phi, step instruction, the operand idx of StepInst which is - // a step value - PHINode *PHI; - BinaryOperator *StepInst; - unsigned StepValIdx = 0; - - FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst) - : PHI(PHI), StepInst(StepInst) {} - }; - // Can freeze instruction be pushed into operands of I? // In order to do this, I should not create a poison after I's flags are // stripped. @@ -99,6 +87,46 @@ public: } // anonymous namespace +namespace llvm { + +struct FrozenIndPHIInfo { + // A freeze instruction that uses an induction phi + FreezeInst *FI = nullptr; + // The induction phi, step instruction, the operand idx of StepInst which is + // a step value + PHINode *PHI; + BinaryOperator *StepInst; + unsigned StepValIdx = 0; + + FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst) + : PHI(PHI), StepInst(StepInst) {} + + bool operator==(const FrozenIndPHIInfo &Other) { return FI == Other.FI; } +}; + +template <> struct DenseMapInfo<FrozenIndPHIInfo> { + static inline FrozenIndPHIInfo getEmptyKey() { + return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getEmptyKey(), + DenseMapInfo<BinaryOperator *>::getEmptyKey()); + } + + static inline FrozenIndPHIInfo getTombstoneKey() { + return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getTombstoneKey(), + DenseMapInfo<BinaryOperator *>::getTombstoneKey()); + } + + static unsigned getHashValue(const FrozenIndPHIInfo &Val) { + return DenseMapInfo<FreezeInst *>::getHashValue(Val.FI); + }; + + static bool isEqual(const FrozenIndPHIInfo &LHS, + const FrozenIndPHIInfo &RHS) { + return LHS.FI == RHS.FI; + }; +}; + +} // end namespace llvm + // Given U = (value, user), replace value with freeze(value), and let // SCEV forget user. The inserted freeze is placed in the preheader. void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) { @@ -126,7 +154,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() { if (!L->isLoopSimplifyForm()) return false; - SmallVector<FrozenIndPHIInfo, 4> Candidates; + SmallSetVector<FrozenIndPHIInfo, 4> Candidates; for (auto &PHI : L->getHeader()->phis()) { InductionDescriptor ID; @@ -155,7 +183,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() { if (auto *FI = dyn_cast<FreezeInst>(U)) { LLVM_DEBUG(dbgs() << "canonfr: found: " << *FI << "\n"); Info.FI = FI; - Candidates.push_back(Info); + Candidates.insert(Info); } }; for_each(PHI.users(), Visit); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index a758fb306982..c76cc9db16d7 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -3593,8 +3593,9 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C, if (isa<ConstantInt>(C)) return createIntegerExpression(C); - if (Ty.isFloatTy() || Ty.isDoubleTy()) { - const APFloat &APF = cast<ConstantFP>(&C)->getValueAPF(); + auto *FP = dyn_cast<ConstantFP>(&C); + if (FP && (Ty.isFloatTy() || Ty.isDoubleTy())) { + const APFloat &APF = FP->getValueAPF(); return DIB.createConstantValueExpression( APF.bitcastToAPInt().getZExtValue()); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f82e161fb846..8e135d80f4f2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8174,13 +8174,20 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, bool Consecutive = Reverse || Decision == LoopVectorizationCostModel::CM_Widen; + VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1]; + if (Consecutive) { + auto *VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), + Reverse, I->getDebugLoc()); + Builder.getInsertBlock()->appendRecipe(VectorPtr); + Ptr = VectorPtr; + } if (LoadInst *Load = dyn_cast<LoadInst>(I)) - return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask, - Consecutive, Reverse); + return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive, + Reverse); StoreInst *Store = cast<StoreInst>(I); - return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0], - Mask, Consecutive, Reverse); + return new VPWidenMemoryInstructionRecipe(*Store, Ptr, Operands[0], Mask, + Consecutive, Reverse); } /// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also @@ -9475,8 +9482,8 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF); bool isMaskRequired = getMask(); if (isMaskRequired) { - // Mask reversal is only neede for non-all-one (null) masks, as reverse of a - // null all-one mask is a null mask. + // Mask reversal is only needed for non-all-one (null) masks, as reverse of + // a null all-one mask is a null mask. for (unsigned Part = 0; Part < State.UF; ++Part) { Value *Mask = State.get(getMask(), Part); if (isReverse()) @@ -9485,44 +9492,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { } } - const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * { - // Calculate the pointer for the specific unroll-part. - Value *PartPtr = nullptr; - - // Use i32 for the gep index type when the value is constant, - // or query DataLayout for a more suitable index type otherwise. - const DataLayout &DL = - Builder.GetInsertBlock()->getModule()->getDataLayout(); - Type *IndexTy = State.VF.isScalable() && (isReverse() || Part > 0) - ? DL.getIndexType(PointerType::getUnqual( - ScalarDataTy->getContext())) - : Builder.getInt32Ty(); - bool InBounds = false; - if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) - InBounds = gep->isInBounds(); - if (isReverse()) { - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - // RunTimeVF = VScale * VF.getKnownMinValue() - // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() - Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); - // NumElt = -Part * RunTimeVF - Value *NumElt = - Builder.CreateMul(ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); - // LastLane = 1 - RunTimeVF - Value *LastLane = - Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); - PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds); - PartPtr = - Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds); - } else { - Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); - PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds); - } - - return PartPtr; - }; - // Handle Stores: if (SI) { State.setDebugLocFrom(SI->getDebugLoc()); @@ -9543,8 +9512,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { // We don't want to update the value in the map as it might be used in // another expression. So don't call resetVectorValue(StoredVal). } - auto *VecPtr = - CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + auto *VecPtr = State.get(getAddr(), Part); if (isMaskRequired) NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, BlockInMaskParts[Part]); @@ -9568,8 +9536,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { nullptr, "wide.masked.gather"); State.addMetadata(NewLI, LI); } else { - auto *VecPtr = - CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + auto *VecPtr = State.get(getAddr(), Part); if (isMaskRequired) NewLI = Builder.CreateMaskedLoad( DataTy, VecPtr, Alignment, BlockInMaskParts[Part], diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 32913b3f5569..304991526064 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4925,36 +4925,34 @@ void BoUpSLP::buildExternalUses( LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); Instruction *UserInst = dyn_cast<Instruction>(U); - if (!UserInst) + if (!UserInst || isDeleted(UserInst)) continue; - if (isDeleted(UserInst)) + // Ignore users in the user ignore list. + if (UserIgnoreList && UserIgnoreList->contains(UserInst)) continue; // Skip in-tree scalars that become vectors if (TreeEntry *UseEntry = getTreeEntry(U)) { - Value *UseScalar = UseEntry->Scalars[0]; // Some in-tree scalars will remain as scalar in vectorized - // instructions. If that is the case, the one in Lane 0 will + // instructions. If that is the case, the one in FoundLane will // be used. - if (UseScalar != U || - UseEntry->State == TreeEntry::ScatterVectorize || + if (UseEntry->State == TreeEntry::ScatterVectorize || UseEntry->State == TreeEntry::PossibleStridedVectorize || - !doesInTreeUserNeedToExtract(Scalar, UserInst, TLI)) { + !doesInTreeUserNeedToExtract( + Scalar, cast<Instruction>(UseEntry->Scalars.front()), TLI)) { LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U << ".\n"); assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state"); continue; } + U = nullptr; } - // Ignore users in the user ignore list. - if (UserIgnoreList && UserIgnoreList->contains(UserInst)) - continue; - - LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " - << Lane << " from " << *Scalar << ".\n"); - ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane)); + LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst + << " from lane " << Lane << " from " << *Scalar + << ".\n"); + ExternalUses.emplace_back(Scalar, U, FoundLane); } } } @@ -8384,6 +8382,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, (void)E; return TTI->getInstructionCost(VI, CostKind); }; + // FIXME: Workaround for syntax error reported by MSVC buildbots. + TargetTransformInfo &TTIRef = *TTI; // Need to clear CommonCost since the final shuffle cost is included into // vector cost. auto GetVectorCost = [&](InstructionCost) { @@ -8398,14 +8398,15 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, // No need to add new vector costs here since we're going to reuse // same main/alternate vector ops, just do different shuffling. } else if (Instruction::isBinaryOp(E->getOpcode())) { - VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); + VecCost = + TTIRef.getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); VecCost += - TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); + TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) { auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); - VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - CI0->getPredicate(), CostKind, VL0); - VecCost += TTI->getCmpSelInstrCost( + VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, + CI0->getPredicate(), CostKind, VL0); + VecCost += TTIRef.getCmpSelInstrCost( E->getOpcode(), VecTy, MaskTy, cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind, E->getAltOp()); @@ -8414,10 +8415,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size()); auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size()); - VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, - TTI::CastContextHint::None, CostKind); - VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, - TTI::CastContextHint::None, CostKind); + VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, + TTI::CastContextHint::None, CostKind); + VecCost += + TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, + TTI::CastContextHint::None, CostKind); } SmallVector<int> Mask; E->buildAltOpShuffleMask( @@ -8426,8 +8428,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, return I->getOpcode() == E->getAltOpcode(); }, Mask); - VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - FinalVecTy, Mask); + VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + FinalVecTy, Mask); + // Patterns like [fadd,fsub] can be combined into a single instruction + // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we + // need to take into account their order when looking for the most used + // order. + unsigned Opcode0 = E->getOpcode(); + unsigned Opcode1 = E->getAltOpcode(); + // The opcode mask selects between the two opcodes. + SmallBitVector OpcodeMask(E->Scalars.size(), false); + for (unsigned Lane : seq<unsigned>(0, E->Scalars.size())) + if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1) + OpcodeMask.set(Lane); + // If this pattern is supported by the target then we consider the + // order. + if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) { + InstructionCost AltVecCost = TTIRef.getAltInstrCost( + VecTy, Opcode0, Opcode1, OpcodeMask, CostKind); + return AltVecCost < VecCost ? AltVecCost : VecCost; + } + // TODO: Check the reverse order too. return VecCost; }; return GetCostDiff(GetScalarCost, GetVectorCost); @@ -11493,17 +11514,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *PO = LI->getPointerOperand(); if (E->State == TreeEntry::Vectorize) { NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign()); - - // The pointer operand uses an in-tree scalar so we add the new - // LoadInst to ExternalUses list to make sure that an extract will - // be generated in the future. - if (isa<Instruction>(PO)) { - if (TreeEntry *Entry = getTreeEntry(PO)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(PO); - ExternalUses.emplace_back(PO, NewLI, FoundLane); - } - } } else { assert((E->State == TreeEntry::ScatterVectorize || E->State == TreeEntry::PossibleStridedVectorize) && @@ -11539,17 +11549,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { StoreInst *ST = Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign()); - // The pointer operand uses an in-tree scalar, so add the new StoreInst to - // ExternalUses to make sure that an extract will be generated in the - // future. - if (isa<Instruction>(Ptr)) { - if (TreeEntry *Entry = getTreeEntry(Ptr)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(Ptr); - ExternalUses.push_back(ExternalUser(Ptr, ST, FoundLane)); - } - } - Value *V = propagateMetadata(ST, E->Scalars); E->VectorizedValue = V; @@ -11597,10 +11596,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { CallInst *CI = cast<CallInst>(VL0); setInsertPointAfterBundle(E); - Intrinsic::ID IID = Intrinsic::not_intrinsic; - if (Function *FI = CI->getCalledFunction()) - IID = FI->getIntrinsicID(); - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); @@ -11611,18 +11606,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { SmallVector<Value *> OpVecs; SmallVector<Type *, 2> TysForDecl; // Add return type if intrinsic is overloaded on it. - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1)) + if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); for (unsigned I : seq<unsigned>(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. - if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, I)) { + if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { CallInst *CEI = cast<CallInst>(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I)) + if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(ScalarArg->getType()); continue; } @@ -11634,7 +11629,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I)) + if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(OpVec->getType()); } @@ -11654,18 +11649,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { CI->getOperandBundlesAsDefs(OpBundles); Value *V = Builder.CreateCall(CF, OpVecs, OpBundles); - // The scalar argument uses an in-tree scalar so we add the new vectorized - // call to ExternalUses list to make sure that an extract will be - // generated in the future. - if (isa_and_present<Instruction>(ScalarArg)) { - if (TreeEntry *Entry = getTreeEntry(ScalarArg)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(ScalarArg); - ExternalUses.push_back( - ExternalUser(ScalarArg, cast<User>(V), FoundLane)); - } - } - propagateIRFlags(V, E->Scalars, VL0); V = FinalShuffle(V, E, VecTy, IsSigned); @@ -11877,6 +11860,7 @@ Value *BoUpSLP::vectorizeTree( DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs; SmallDenseSet<Value *, 4> UsedInserts; DenseMap<Value *, Value *> VectorCasts; + SmallDenseSet<Value *, 4> ScalarsWithNullptrUser; // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -11947,13 +11931,27 @@ Value *BoUpSLP::vectorizeTree( VectorToInsertElement.try_emplace(Vec, IE); return Vec; }; - // If User == nullptr, the Scalar is used as extra arg. Generate - // ExtractElement instruction and update the record for this scalar in - // ExternallyUsedValues. + // If User == nullptr, the Scalar remains as scalar in vectorized + // instructions or is used as extra arg. Generate ExtractElement instruction + // and update the record for this scalar in ExternallyUsedValues. if (!User) { - assert(ExternallyUsedValues.count(Scalar) && - "Scalar with nullptr as an external user must be registered in " - "ExternallyUsedValues map"); + if (!ScalarsWithNullptrUser.insert(Scalar).second) + continue; + assert((ExternallyUsedValues.count(Scalar) || + any_of(Scalar->users(), + [&](llvm::User *U) { + TreeEntry *UseEntry = getTreeEntry(U); + return UseEntry && + UseEntry->State == TreeEntry::Vectorize && + E->State == TreeEntry::Vectorize && + doesInTreeUserNeedToExtract( + Scalar, + cast<Instruction>(UseEntry->Scalars.front()), + TLI); + })) && + "Scalar with nullptr User must be registered in " + "ExternallyUsedValues map or remain as scalar in vectorized " + "instructions"); if (auto *VecI = dyn_cast<Instruction>(Vec)) { if (auto *PHI = dyn_cast<PHINode>(VecI)) Builder.SetInsertPoint(PHI->getParent(), @@ -16222,7 +16220,7 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { for (auto *V : Candidates) { auto *GEP = cast<GetElementPtrInst>(V); auto *GEPIdx = GEP->idx_begin()->get(); - assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx)); + assert(GEP->getNumIndices() == 1 && !isa<Constant>(GEPIdx)); Bundle[BundleIndex++] = GEPIdx; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h index 94cb76889813..7d33baac52c9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1357,6 +1357,36 @@ public: #endif }; +/// A recipe to compute the pointers for widened memory accesses of IndexTy for +/// all parts. If IsReverse is true, compute pointers for accessing the input in +/// reverse order per part. +class VPVectorPointerRecipe : public VPRecipeBase, public VPValue { + Type *IndexedTy; + bool IsReverse; + +public: + VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, + DebugLoc DL) + : VPRecipeBase(VPDef::VPVectorPointerSC, {Ptr}, DL), VPValue(this), + IndexedTy(IndexedTy), IsReverse(IsReverse) {} + + VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) + + void execute(VPTransformState &State) override; + + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A pure virtual base class for all recipes modeling header phis, including /// phis for first order recurrences, pointer inductions and reductions. The /// start value is the first operand of the recipe and the incoming value from diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 02e400d590be..76961629aece 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -498,16 +498,17 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { "DbgInfoIntrinsic should have been dropped during VPlan construction"); State.setDebugLocFrom(CI.getDebugLoc()); + bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; FunctionType *VFTy = nullptr; if (Variant) VFTy = Variant->getFunctionType(); for (unsigned Part = 0; Part < State.UF; ++Part) { SmallVector<Type *, 2> TysForDecl; // Add return type if intrinsic is overloaded on it. - if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) { + if (UseIntrinsic && + isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) TysForDecl.push_back( VectorType::get(CI.getType()->getScalarType(), State.VF)); - } SmallVector<Value *, 4> Args; for (const auto &I : enumerate(operands())) { // Some intrinsics have a scalar argument - don't replace it with a @@ -516,18 +517,19 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { // e.g. linear parameters for pointers. Value *Arg; if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) || - (VectorIntrinsicID != Intrinsic::not_intrinsic && + (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))) Arg = State.get(I.value(), VPIteration(0, 0)); else Arg = State.get(I.value(), Part); - if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) + if (UseIntrinsic && + isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) TysForDecl.push_back(Arg->getType()); Args.push_back(Arg); } Function *VectorF; - if (VectorIntrinsicID != Intrinsic::not_intrinsic) { + if (UseIntrinsic) { // Use vector version of the intrinsic. Module *M = State.Builder.GetInsertBlock()->getModule(); VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); @@ -1209,6 +1211,59 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +void VPVectorPointerRecipe ::execute(VPTransformState &State) { + auto &Builder = State.Builder; + State.setDebugLocFrom(getDebugLoc()); + for (unsigned Part = 0; Part < State.UF; ++Part) { + // Calculate the pointer for the specific unroll-part. + Value *PartPtr = nullptr; + // Use i32 for the gep index type when the value is constant, + // or query DataLayout for a more suitable index type otherwise. + const DataLayout &DL = + Builder.GetInsertBlock()->getModule()->getDataLayout(); + Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0) + ? DL.getIndexType(IndexedTy->getPointerTo()) + : Builder.getInt32Ty(); + Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); + bool InBounds = false; + if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) + InBounds = GEP->isInBounds(); + if (IsReverse) { + // If the address is consecutive but reversed, then the + // wide store needs to start at the last vector element. + // RunTimeVF = VScale * VF.getKnownMinValue() + // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() + Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); + // NumElt = -Part * RunTimeVF + Value *NumElt = Builder.CreateMul( + ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); + // LastLane = 1 - RunTimeVF + Value *LastLane = + Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); + PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); + PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds); + } else { + Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); + PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); + } + + State.set(this, PartPtr, Part); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << " = vector-pointer "; + if (IsReverse) + O << "(reverse) "; + + printOperands(O, SlotTracker); +} +#endif + void VPBlendRecipe::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); // We know that all PHIs in non-header blocks are converted into diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h index 116acad8e8f3..8cc98f4abf93 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -351,6 +351,7 @@ public: VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, + VPVectorPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, diff --git a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp index fcb6392a1d95..299b7856ec0b 100644 --- a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp @@ -1287,8 +1287,7 @@ static const char *matchFlagWithArg(StringRef Expected, ArrayRef<const char *> Args) { StringRef Arg = *ArgIt; - if (Arg.starts_with("--")) - Arg = Arg.substr(2); + Arg.consume_front("--"); size_t len = Expected.size(); if (Arg == Expected) { diff --git a/contrib/llvm-project/llvm/tools/llvm-diff/llvm-diff.cpp b/contrib/llvm-project/llvm/tools/llvm-diff/llvm-diff.cpp index 6fe18a51c9f5..3e77b1ed89b0 100644 --- a/contrib/llvm-project/llvm/tools/llvm-diff/llvm-diff.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-diff/llvm-diff.cpp @@ -42,8 +42,7 @@ static std::unique_ptr<Module> readModule(LLVMContext &Context, static void diffGlobal(DifferenceEngine &Engine, Module &L, Module &R, StringRef Name) { // Drop leading sigils from the global name. - if (Name.starts_with("@")) - Name = Name.substr(1); + Name.consume_front("@"); Function *LFn = L.getFunction(Name); Function *RFn = R.getFunction(Name); diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp index 06e7ec3b9230..9ee1472bdf5c 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -563,6 +563,13 @@ static inline bool inheritsFrom(InstructionContext child, case IC_EVEX_L2_W_XD_KZ_B: case IC_EVEX_L2_W_OPSIZE_KZ_B: return false; + case IC_EVEX_NF: + case IC_EVEX_B_NF: + case IC_EVEX_OPSIZE_NF: + case IC_EVEX_OPSIZE_B_NF: + case IC_EVEX_W_NF: + case IC_EVEX_W_B_NF: + return false; default: errs() << "Unknown instruction class: " << stringForContext((InstructionContext)parent) << "\n"; @@ -889,7 +896,19 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const { if ((index & ATTR_EVEX) && (index & ATTR_OPSIZE) && (index & ATTR_ADSIZE)) o << "IC_EVEX_OPSIZE_ADSIZE"; - else if ((index & ATTR_EVEX) || (index & ATTR_VEX) || (index & ATTR_VEXL)) { + else if (index & ATTR_EVEXNF) { + o << "IC_EVEX"; + if (index & ATTR_REXW) + o << "_W"; + else if (index & ATTR_OPSIZE) + o << "_OPSIZE"; + + if (index & ATTR_EVEXB) + o << "_B"; + + o << "_NF"; + } else if ((index & ATTR_EVEX) || (index & ATTR_VEX) || + (index & ATTR_VEXL)) { if (index & ATTR_EVEX) o << "IC_EVEX"; else diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index d3299e281031..101b75e2f087 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -348,7 +348,9 @@ public: // memory form: broadcast if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B)) return false; - if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B)) + // EVEX_B indicates NDD for MAP4 instructions + if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) && + RegRI.OpMap != X86Local::T_MAP4) return false; if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form)) @@ -369,7 +371,8 @@ public: RegRI.OpMap, RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, RegRI.HasVEX_4V, RegRI.HasVEX_L, RegRI.IgnoresVEX_L, RegRI.IgnoresW, RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, - RegRI.HasEVEX_L2, RegRec->getValueAsBit("hasEVEX_RC"), + RegRI.HasEVEX_L2, RegRI.HasEVEX_NF, + RegRec->getValueAsBit("hasEVEX_RC"), RegRec->getValueAsBit("hasLockPrefix"), RegRec->getValueAsBit("hasNoTrackPrefix"), RegRec->getValueAsBit("EVEX_W1_VEX_W0")) != @@ -377,7 +380,8 @@ public: MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L, MemRI.IgnoresW, MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, - MemRI.HasEVEX_L2, MemRec->getValueAsBit("hasEVEX_RC"), + MemRI.HasEVEX_L2, MemRI.HasEVEX_NF, + MemRec->getValueAsBit("hasEVEX_RC"), MemRec->getValueAsBit("hasLockPrefix"), MemRec->getValueAsBit("hasNoTrackPrefix"), MemRec->getValueAsBit("EVEX_W1_VEX_W0"))) @@ -668,6 +672,14 @@ void X86FoldTablesEmitter::run(raw_ostream &O) { if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end()) continue; + // Promoted legacy instruction is in EVEX space, and has REX2-encoding + // alternative. It's added due to HW design and never emitted by compiler. + if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) == + X86Local::T_MAP4 && + byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) == + X86Local::ExplicitEVEX) + continue; + // - Instructions including RST register class operands are not relevant // for memory folding (for further details check the explanation in // lib/Target/X86/X86InstrFPStack.td file). diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp index 47ee9544f323..fb430676c504 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -125,6 +125,7 @@ RecognizableInstrBase::RecognizableInstrBase(const CodeGenInstruction &insn) { HasEVEX_K = Rec->getValueAsBit("hasEVEX_K"); HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z"); HasEVEX_B = Rec->getValueAsBit("hasEVEX_B"); + HasEVEX_NF = Rec->getValueAsBit("hasEVEX_NF"); IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly"); IsAsmParserOnly = Rec->getValueAsBit("isAsmParserOnly"); ForceDisassemble = Rec->getValueAsBit("ForceDisassemble"); @@ -185,6 +186,9 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables, : (HasEVEX_KZ ? n##_KZ \ : (HasEVEX_K ? n##_K : (HasEVEX_B ? n##_B : n))))) +#define EVEX_NF(n) (HasEVEX_NF ? n##_NF : n) +#define EVEX_B_NF(n) (HasEVEX_B ? EVEX_NF(n##_B) : EVEX_NF(n)) + InstructionContext RecognizableInstr::insnContext() const { InstructionContext insnContext; @@ -193,8 +197,15 @@ InstructionContext RecognizableInstr::insnContext() const { errs() << "Don't support VEX.L if EVEX_L2 is enabled: " << Name << "\n"; llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled"); } - // VEX_L & VEX_W - if (!EncodeRC && HasVEX_L && HasREX_W) { + if (HasEVEX_NF) { + if (OpPrefix == X86Local::PD) + insnContext = EVEX_B_NF(IC_EVEX_OPSIZE); + else if (HasREX_W) + insnContext = EVEX_B_NF(IC_EVEX_W); + else + insnContext = EVEX_B_NF(IC_EVEX); + } else if (!EncodeRC && HasVEX_L && HasREX_W) { + // VEX_L & VEX_W if (OpPrefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE); else if (OpPrefix == X86Local::XS) @@ -486,6 +497,7 @@ void RecognizableInstr::emitInstructionSpecifier() { ++additionalOperands; #endif + bool IsND = OpMap == X86Local::T_MAP4 && HasEVEX_B && HasVEX_4V; switch (Form) { default: llvm_unreachable("Unhandled form"); @@ -536,11 +548,14 @@ void RecognizableInstr::emitInstructionSpecifier() { numPhysicalOperands <= 3 + additionalOperands && "Unexpected number of operands for MRMDestReg"); + if (IsND) + HANDLE_OPERAND(vvvvRegister) + HANDLE_OPERAND(rmRegister) if (HasEVEX_K) HANDLE_OPERAND(writemaskRegister) - if (HasVEX_4V) + if (!IsND && HasVEX_4V) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field HANDLE_OPERAND(vvvvRegister) @@ -570,12 +585,15 @@ void RecognizableInstr::emitInstructionSpecifier() { numPhysicalOperands <= 3 + additionalOperands && "Unexpected number of operands for MRMDestMemFrm with VEX_4V"); + if (IsND) + HANDLE_OPERAND(vvvvRegister) + HANDLE_OPERAND(memory) if (HasEVEX_K) HANDLE_OPERAND(writemaskRegister) - if (HasVEX_4V) + if (!IsND && HasVEX_4V) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field HANDLE_OPERAND(vvvvRegister) @@ -594,12 +612,15 @@ void RecognizableInstr::emitInstructionSpecifier() { numPhysicalOperands <= 4 + additionalOperands && "Unexpected number of operands for MRMSrcRegFrm"); + if (IsND) + HANDLE_OPERAND(vvvvRegister) + HANDLE_OPERAND(roRegister) if (HasEVEX_K) HANDLE_OPERAND(writemaskRegister) - if (HasVEX_4V) + if (!IsND && HasVEX_4V) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field HANDLE_OPERAND(vvvvRegister) @@ -641,13 +662,15 @@ void RecognizableInstr::emitInstructionSpecifier() { assert(numPhysicalOperands >= 2 + additionalOperands && numPhysicalOperands <= 4 + additionalOperands && "Unexpected number of operands for MRMSrcMemFrm"); + if (IsND) + HANDLE_OPERAND(vvvvRegister) HANDLE_OPERAND(roRegister) if (HasEVEX_K) HANDLE_OPERAND(writemaskRegister) - if (HasVEX_4V) + if (!IsND && HasVEX_4V) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field HANDLE_OPERAND(vvvvRegister) @@ -1216,6 +1239,8 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s, OperandEncoding RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s, uint8_t OpSize) { + ENCODING("GR8", ENCODING_VVVV) + ENCODING("GR16", ENCODING_VVVV) ENCODING("GR32", ENCODING_VVVV) ENCODING("GR64", ENCODING_VVVV) ENCODING("FR32", ENCODING_VVVV) diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.h b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.h index 61ad5e32b3fb..007c700cdfaf 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.h +++ b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.h @@ -172,7 +172,7 @@ enum { PD = 1, XS = 2, XD = 3, PS = 4 }; enum { VEX = 1, XOP = 2, EVEX = 3 }; enum { OpSize16 = 1, OpSize32 = 2 }; enum { AdSize16 = 1, AdSize32 = 2, AdSize64 = 3 }; -enum { ExplicitREX2 = 1 }; +enum { ExplicitREX2 = 1, ExplicitEVEX = 3 }; } // namespace X86Local namespace X86Disassembler { @@ -212,6 +212,8 @@ struct RecognizableInstrBase { bool HasEVEX_KZ; /// The hasEVEX_B field from the record bool HasEVEX_B; + /// The hasEVEX_NF field from the record + bool HasEVEX_NF; /// Indicates that the instruction uses the L and L' fields for RC. bool EncodeRC; /// The isCodeGenOnly field from the record |