diff options
Diffstat (limited to 'contrib/llvm-project/llvm')
53 files changed, 2201 insertions, 701 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h index 90ec742f18e6..f46e66641c08 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h @@ -744,6 +744,10 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// minimum/maximum flavor. CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF); + /// Return the minimum or maximum constant value for the specified integer + /// min/max flavor and type. + APInt getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth); + /// Check if the values in \p VL are select instructions that can be converted /// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a /// conversion is possible, together with a bool indicating whether all select diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h index 692dc4d7d4cf..a4955e2a973a 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1396,6 +1396,11 @@ public: return NVT; } + virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, + bool AllowUnknown = false) const { + return getValueType(DL, Ty, AllowUnknown); + } + /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM /// operations except for the pointer size. If AllowUnknown is true, this /// will return MVT::Other for types with no EVT counterpart (e.g. structs), diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td index 0e88e705e16b..8bacf687ac76 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td @@ -216,6 +216,7 @@ def untyped : ValueType<8, 174>; // Produces an untyped value def funcref : ValueType<0, 175>; // WebAssembly's funcref type def externref : ValueType<0, 176>; // WebAssembly's externref type def x86amx : ValueType<8192, 177>; // X86 AMX value +def i64x8 : ValueType<512, 178>; // 8 Consecutive GPRs (AArch64) def token : ValueType<0, 248>; // TokenTy diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Module.h b/contrib/llvm-project/llvm/include/llvm/IR/Module.h index 81e29d9b86e8..97aea5aedf22 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/Module.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/Module.h @@ -324,6 +324,9 @@ public: /// name is not found. GlobalValue *getNamedValue(StringRef Name) const; + /// Return the number of global values in the module. + unsigned getNumNamedValues() const; + /// Return a unique non-zero ID for the specified metadata kind. This ID is /// uniqued across modules in the current LLVMContext. unsigned getMDKindID(StringRef Name) const; diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h index 08a934e6985f..c0cedb23bdcf 100644 --- a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h +++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h @@ -1104,6 +1104,7 @@ namespace RawInstrProf { // Version 5: Bit 60 of FuncHash is reserved for the flag for the context // sensitive records. // Version 6: Added binary id. +// Version 7: Reorder binary id and include version in signature. const uint64_t Version = INSTR_PROF_RAW_VERSION; template <class IntPtrT> inline uint64_t getMagic(); diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc index 08a642469627..7d2097cfc297 100644 --- a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -129,6 +129,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \ #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) @@ -137,7 +138,6 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) -INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) #undef INSTR_PROF_RAW_HEADER /* INSTR_PROF_RAW_HEADER end */ @@ -646,7 +646,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 /* Raw profile format version (start from 1). */ -#define INSTR_PROF_RAW_VERSION 6 +#define INSTR_PROF_RAW_VERSION 7 /* Indexed profile format version (start from 1). */ #define INSTR_PROF_INDEX_VERSION 7 /* Coverage mapping format version (start from 0). */ diff --git a/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h b/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h index 31f2d5a48183..5c73cece85c3 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h @@ -270,9 +270,10 @@ namespace llvm { funcref = 175, // WebAssembly's funcref type externref = 176, // WebAssembly's externref type x86amx = 177, // This is an X86 AMX value + i64x8 = 178, // 8 Consecutive GPRs (AArch64) FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = x86amx, // This always remains at the end of the list. + LAST_VALUETYPE = i64x8, // This always remains at the end of the list. VALUETYPE_SIZE = LAST_VALUETYPE + 1, // This is the current maximum for LAST_VALUETYPE. @@ -987,6 +988,7 @@ namespace llvm { case nxv16f16: case nxv8f32: case nxv4f64: return TypeSize::Scalable(256); + case i64x8: case v512i1: case v64i8: case v32i16: diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h index c93b8adcc890..c3c12fd23746 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1855,6 +1855,10 @@ public: /// static void createShallowWrapper(Function &F); + /// Returns true if the function \p F can be internalized. i.e. it has a + /// compatible linkage. + static bool isInternalizable(Function &F); + /// Make another copy of the function \p F such that the copied version has /// internal linkage afterwards and can be analysed. Then we replace all uses /// of the original function to the copied one @@ -1870,6 +1874,22 @@ public: /// null pointer. static Function *internalizeFunction(Function &F, bool Force = false); + /// Make copies of each function in the set \p FnSet such that the copied + /// version has internal linkage afterwards and can be analysed. Then we + /// replace all uses of the original function to the copied one. The map + /// \p FnMap contains a mapping of functions to their internalized versions. + /// + /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` + /// linkage can be internalized because these linkages guarantee that other + /// definitions with the same name have the same semantics as this one. + /// + /// This version will internalize all the functions in the set \p FnSet at + /// once and then replace the uses. This prevents internalized functions being + /// called by external functions when there is an internalized version in the + /// module. + static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet, + DenseMap<Function *, Function *> &FnMap); + /// Return the data layout associated with the anchor scope. const DataLayout &getDataLayout() const { return InfoCache.DL; } diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/PredicateInfo.h index c4030735d965..c922476ac79d 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/PredicateInfo.h @@ -51,11 +51,13 @@ #define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" namespace llvm { @@ -176,7 +178,7 @@ public: class PredicateInfo { public: PredicateInfo(Function &, DominatorTree &, AssumptionCache &); - ~PredicateInfo() = default; + ~PredicateInfo(); void verifyPredicateInfo() const; @@ -203,6 +205,8 @@ private: // the Predicate Info, they belong to the ValueInfo structs in the ValueInfos // vector. DenseMap<const Value *, const PredicateBase *> PredicateMap; + // The set of ssa_copy declarations we created with our custom mangling. + SmallSet<AssertingVH<Function>, 20> CreatedDeclarations; }; // This pass does eager building and then printing of PredicateInfo. It is used diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp index 23083bc8178e..69ab0052b0a7 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4080,6 +4080,22 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, std::swap(TrueVal, FalseVal); } + // Check for integer min/max with a limit constant: + // X > MIN_INT ? X : MIN_INT --> X + // X < MAX_INT ? X : MAX_INT --> X + if (TrueVal->getType()->isIntOrIntVectorTy()) { + Value *X, *Y; + SelectPatternFlavor SPF = + matchDecomposedSelectPattern(cast<ICmpInst>(CondVal), TrueVal, FalseVal, + X, Y).Flavor; + if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) { + APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF), + X->getType()->getScalarSizeInBits()); + if (match(Y, m_SpecificInt(LimitC))) + return X; + } + } + if (Pred == ICmpInst::ICMP_EQ && match(CmpRHS, m_Zero())) { Value *X; const APInt *Y; diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp index 4a8818f2e2a8..c3a609ee4fe1 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -893,9 +893,10 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getReturnType()->isIntegerTy(32); case LibFunc_snprintf: - return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && - FTy.getParamType(2)->isPointerTy() && - FTy.getReturnType()->isIntegerTy(32)); + return NumParams == 3 && FTy.getParamType(0)->isPointerTy() && + IsSizeTTy(FTy.getParamType(1)) && + FTy.getParamType(2)->isPointerTy() && + FTy.getReturnType()->isIntegerTy(32); case LibFunc_snprintf_chk: return NumParams == 5 && FTy.getParamType(0)->isPointerTy() && diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp index 522d21812c6a..6e3ca5c4e08a 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp @@ -6253,6 +6253,16 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) { return getMinMaxPred(getInverseMinMaxFlavor(SPF)); } +APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) { + switch (SPF) { + case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth); + case SPF_SMIN: return APInt::getSignedMinValue(BitWidth); + case SPF_UMAX: return APInt::getMaxValue(BitWidth); + case SPF_UMIN: return APInt::getMinValue(BitWidth); + default: llvm_unreachable("Unexpected flavor"); + } +} + std::pair<Intrinsic::ID, bool> llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) { // Check if VL contains select instructions that can be folded into a min/max diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index bb4d41cfd69f..4ae427484945 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -325,7 +325,8 @@ bool InlineAsmLowering::lowerInlineAsm( return false; } - OpInfo.ConstraintVT = TLI->getValueType(DL, OpTy, true).getSimpleVT(); + OpInfo.ConstraintVT = + TLI->getAsmOperandValueType(DL, OpTy, true).getSimpleVT(); } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); @@ -334,13 +335,17 @@ bool InlineAsmLowering::lowerInlineAsm( TLI->getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = TLI->getSimpleValueType(DL, Call.getType()); + OpInfo.ConstraintVT = + TLI->getAsmOperandValueType(DL, Call.getType()).getSimpleVT(); } ++ResNo; } else { OpInfo.ConstraintVT = MVT::Other; } + if (OpInfo.ConstraintVT == MVT::i64x8) + return false; + // Compute the constraint code and ConstraintType to use. computeConstraintToUse(TLI, OpInfo); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b104e995019f..4f730b2cf372 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2439,9 +2439,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { N0.getOperand(0)); // fold (add (add (xor a, -1), b), 1) -> (sub b, a) - if (N0.getOpcode() == ISD::ADD || - N0.getOpcode() == ISD::UADDO || - N0.getOpcode() == ISD::SADDO) { + if (N0.getOpcode() == ISD::ADD) { SDValue A, Xor; if (isBitwiseNot(N0.getOperand(0))) { @@ -20562,8 +20560,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // otherwise => (extract_subvec V1, ExtIdx) uint64_t InsIdx = V.getConstantOperandVal(2); if (InsIdx * SmallVT.getScalarSizeInBits() == - ExtIdx * NVT.getScalarSizeInBits()) + ExtIdx * NVT.getScalarSizeInBits()) { + if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT)) + return SDValue(); + return DAG.getBitcast(NVT, V.getOperand(1)); + } return DAG.getNode( ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d56d4bcc9169..a08548393979 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8176,7 +8176,7 @@ public: } } - return TLI.getValueType(DL, OpTy, true); + return TLI.getAsmOperandValueType(DL, OpTy, true); } }; @@ -8479,8 +8479,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, DAG.getDataLayout(), STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = - TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType()); + OpInfo.ConstraintVT = TLI.getAsmOperandValueType( + DAG.getDataLayout(), Call.getType()).getSimpleVT(); } ++ResNo; } else { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1415cce3b1df..09627ee6a164 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1660,7 +1660,7 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) { // physical registers if there is debug info associated with the terminator // of our mbb. We want to include said debug info in our terminator // sequence, so we return true in that case. - return MI.isDebugValue(); + return MI.isDebugInstr(); // We have left the terminator sequence if we are not doing one of the // following: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1c1dae8f953f..5e1786958b6f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4687,7 +4687,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType()); + OpInfo.ConstraintVT = + getAsmOperandValueType(DL, Call.getType()).getSimpleVT(); } ++ResNo; break; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index add34eccc1f3..de096f95afcb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -677,8 +677,9 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName, } if (Retain) { - if (Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) + if ((Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && + !TM.getTargetTriple().isOSSolaris()) Flags |= ELF::SHF_GNU_RETAIN; return NextUniqueID++; } @@ -855,8 +856,10 @@ static MCSection *selectELFSectionForGlobal( EmitUniqueSection = true; Flags |= ELF::SHF_LINK_ORDER; } - if (Retain && (Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))) { + if (Retain && + (Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && + !TM.getTargetTriple().isOSSolaris()) { EmitUniqueSection = true; Flags |= ELF::SHF_GNU_RETAIN; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp index 9daebfd9e63d..4876b9e23717 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp @@ -167,6 +167,7 @@ std::string EVT::getEVTString() const { case MVT::Glue: return "glue"; case MVT::x86mmx: return "x86mmx"; case MVT::x86amx: return "x86amx"; + case MVT::i64x8: return "i64x8"; case MVT::Metadata: return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::funcref: return "funcref"; @@ -198,6 +199,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); case MVT::x86amx: return Type::getX86_AMXTy(Context); + case MVT::i64x8: return IntegerType::get(Context, 512); case MVT::externref: return PointerType::get(StructType::create(Context), 10); case MVT::funcref: diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp index dcea8f83d333..513fe0fdd608 100644 --- a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp +++ b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp @@ -349,200 +349,6 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, } } -/// Wrapper around getFoldedSizeOfImpl() that adds caching. -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded, - DenseMap<Type *, Constant *> &Cache); - -/// Return a ConstantExpr with type DestTy for sizeof on Ty, with any known -/// factors factored out. If Folded is false, return null if no factoring was -/// possible, to avoid endlessly bouncing an unfoldable expression back into the -/// top-level folder. -static Constant *getFoldedSizeOfImpl(Type *Ty, Type *DestTy, bool Folded, - DenseMap<Type *, Constant *> &Cache) { - // This is the actual implementation of getFoldedSizeOf(). To get the caching - // behavior, we need to call getFoldedSizeOf() when we recurse. - - if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - Constant *N = ConstantInt::get(DestTy, ATy->getNumElements()); - Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true, Cache); - return ConstantExpr::getNUWMul(E, N); - } - - if (StructType *STy = dyn_cast<StructType>(Ty)) - if (!STy->isPacked()) { - unsigned NumElems = STy->getNumElements(); - // An empty struct has size zero. - if (NumElems == 0) - return ConstantExpr::getNullValue(DestTy); - // Check for a struct with all members having the same size. - Constant *MemberSize = - getFoldedSizeOf(STy->getElementType(0), DestTy, true, Cache); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberSize != - getFoldedSizeOf(STy->getElementType(i), DestTy, true, Cache)) { - AllSame = false; - break; - } - if (AllSame) { - Constant *N = ConstantInt::get(DestTy, NumElems); - return ConstantExpr::getNUWMul(MemberSize, N); - } - } - - // Pointer size doesn't depend on the pointee type, so canonicalize them - // to an arbitrary pointee. - if (PointerType *PTy = dyn_cast<PointerType>(Ty)) - if (!PTy->getElementType()->isIntegerTy(1)) - return getFoldedSizeOf( - PointerType::get(IntegerType::get(PTy->getContext(), 1), - PTy->getAddressSpace()), - DestTy, true, Cache); - - // If there's no interesting folding happening, bail so that we don't create - // a constant that looks like it needs folding but really doesn't. - if (!Folded) - return nullptr; - - // Base case: Get a regular sizeof expression. - Constant *C = ConstantExpr::getSizeOf(Ty); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, false), - C, DestTy); - return C; -} - -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded, - DenseMap<Type *, Constant *> &Cache) { - // Check for previously generated folded size constant. - auto It = Cache.find(Ty); - if (It != Cache.end()) - return It->second; - return Cache[Ty] = getFoldedSizeOfImpl(Ty, DestTy, Folded, Cache); -} - -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) { - DenseMap<Type *, Constant *> Cache; - return getFoldedSizeOf(Ty, DestTy, Folded, Cache); -} - -/// Return a ConstantExpr with type DestTy for alignof on Ty, with any known -/// factors factored out. If Folded is false, return null if no factoring was -/// possible, to avoid endlessly bouncing an unfoldable expression back into the -/// top-level folder. -static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) { - // The alignment of an array is equal to the alignment of the - // array element. Note that this is not always true for vectors. - if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - Constant *C = ConstantExpr::getAlignOf(ATy->getElementType()); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, - false), - C, DestTy); - return C; - } - - if (StructType *STy = dyn_cast<StructType>(Ty)) { - // Packed structs always have an alignment of 1. - if (STy->isPacked()) - return ConstantInt::get(DestTy, 1); - - // Otherwise, struct alignment is the maximum alignment of any member. - // Without target data, we can't compare much, but we can check to see - // if all the members have the same alignment. - unsigned NumElems = STy->getNumElements(); - // An empty struct has minimal alignment. - if (NumElems == 0) - return ConstantInt::get(DestTy, 1); - // Check for a struct with all members having the same alignment. - Constant *MemberAlign = - getFoldedAlignOf(STy->getElementType(0), DestTy, true); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) { - AllSame = false; - break; - } - if (AllSame) - return MemberAlign; - } - - // Pointer alignment doesn't depend on the pointee type, so canonicalize them - // to an arbitrary pointee. - if (PointerType *PTy = dyn_cast<PointerType>(Ty)) - if (!PTy->getElementType()->isIntegerTy(1)) - return - getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(), - 1), - PTy->getAddressSpace()), - DestTy, true); - - // If there's no interesting folding happening, bail so that we don't create - // a constant that looks like it needs folding but really doesn't. - if (!Folded) - return nullptr; - - // Base case: Get a regular alignof expression. - Constant *C = ConstantExpr::getAlignOf(Ty); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, false), - C, DestTy); - return C; -} - -/// Return a ConstantExpr with type DestTy for offsetof on Ty and FieldNo, with -/// any known factors factored out. If Folded is false, return null if no -/// factoring was possible, to avoid endlessly bouncing an unfoldable expression -/// back into the top-level folder. -static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy, - bool Folded) { - if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false, - DestTy, false), - FieldNo, DestTy); - Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); - return ConstantExpr::getNUWMul(E, N); - } - - if (StructType *STy = dyn_cast<StructType>(Ty)) - if (!STy->isPacked()) { - unsigned NumElems = STy->getNumElements(); - // An empty struct has no members. - if (NumElems == 0) - return nullptr; - // Check for a struct with all members having the same size. - Constant *MemberSize = - getFoldedSizeOf(STy->getElementType(0), DestTy, true); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberSize != - getFoldedSizeOf(STy->getElementType(i), DestTy, true)) { - AllSame = false; - break; - } - if (AllSame) { - Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, - false, - DestTy, - false), - FieldNo, DestTy); - return ConstantExpr::getNUWMul(MemberSize, N); - } - } - - // If there's no interesting folding happening, bail so that we don't create - // a constant that looks like it needs folding but really doesn't. - if (!Folded) - return nullptr; - - // Base case: Get a regular offsetof expression. - Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, false), - C, DestTy); - return C; -} - Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, Type *DestTy) { if (isa<PoisonValue>(V)) @@ -666,53 +472,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, // Is it a null pointer value? if (V->isNullValue()) return ConstantInt::get(DestTy, 0); - // If this is a sizeof-like expression, pull out multiplications by - // known factors to expose them to subsequent folding. If it's an - // alignof-like expression, factor out known factors. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::GetElementPtr && - CE->getOperand(0)->isNullValue()) { - // FIXME: Looks like getFoldedSizeOf(), getFoldedOffsetOf() and - // getFoldedAlignOf() don't handle the case when DestTy is a vector of - // pointers yet. We end up in asserts in CastInst::getCastOpcode (see - // test/Analysis/ConstantFolding/cast-vector.ll). I've only seen this - // happen in one "real" C-code test case, so it does not seem to be an - // important optimization to handle vectors here. For now, simply bail - // out. - if (DestTy->isVectorTy()) - return nullptr; - GEPOperator *GEPO = cast<GEPOperator>(CE); - Type *Ty = GEPO->getSourceElementType(); - if (CE->getNumOperands() == 2) { - // Handle a sizeof-like expression. - Constant *Idx = CE->getOperand(1); - bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne(); - if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) { - Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true, - DestTy, false), - Idx, DestTy); - return ConstantExpr::getMul(C, Idx); - } - } else if (CE->getNumOperands() == 3 && - CE->getOperand(1)->isNullValue()) { - // Handle an alignof-like expression. - if (StructType *STy = dyn_cast<StructType>(Ty)) - if (!STy->isPacked()) { - ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2)); - if (CI->isOne() && - STy->getNumElements() == 2 && - STy->getElementType(0)->isIntegerTy(1)) { - return getFoldedAlignOf(STy->getElementType(1), DestTy, false); - } - } - // Handle an offsetof-like expression. - if (Ty->isStructTy() || Ty->isArrayTy()) { - if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2), - DestTy, false)) - return C; - } - } - } // Other pointer types cannot be casted return nullptr; case Instruction::UIToFP: diff --git a/contrib/llvm-project/llvm/lib/IR/Module.cpp b/contrib/llvm-project/llvm/lib/IR/Module.cpp index 7c18dc0ed299..63ea41fba89a 100644 --- a/contrib/llvm-project/llvm/lib/IR/Module.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Module.cpp @@ -114,6 +114,10 @@ GlobalValue *Module::getNamedValue(StringRef Name) const { return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name)); } +unsigned Module::getNumNamedValues() const { + return getValueSymbolTable().size(); +} + /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. /// This ID is uniqued across modules in the current LLVMContext. unsigned Module::getMDKindID(StringRef Name) const { diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp index 8a4470ae207d..a0460062f307 100644 --- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp @@ -366,6 +366,7 @@ Error RawInstrProfReader<IntPtrT>::readHeader( if (GET_VERSION(Version) != RawInstrProf::Version) return error(instrprof_error::unsupported_version); + BinaryIdsSize = swap(Header.BinaryIdsSize); CountersDelta = swap(Header.CountersDelta); NamesDelta = swap(Header.NamesDelta); auto DataSize = swap(Header.DataSize); @@ -374,7 +375,6 @@ Error RawInstrProfReader<IntPtrT>::readHeader( auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); NamesSize = swap(Header.NamesSize); ValueKindLast = swap(Header.ValueKindLast); - BinaryIdsSize = swap(Header.BinaryIdsSize); auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>); auto PaddingSize = getNumPaddingBytes(NamesSize); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index adefe3b37ee0..3ab9b250749a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -653,6 +653,9 @@ bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, case 'x': Reg = getXRegFromWReg(Reg); break; + case 't': + Reg = getXRegFromXRegTuple(Reg); + break; } O << AArch64InstPrinter::getRegisterName(Reg); @@ -749,6 +752,10 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, AArch64::GPR64allRegClass.contains(Reg)) return printAsmMRegister(MO, 'x', O); + // If this is an x register tuple, print an x register. + if (AArch64::GPR64x8ClassRegClass.contains(Reg)) + return printAsmMRegister(MO, 't', O); + unsigned AltName = AArch64::NoRegAltName; const TargetRegisterClass *RegClass; if (AArch64::ZPRRegClass.contains(Reg)) { diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e7282aad05e2..b27a02b8c182 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -246,6 +246,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); + if (Subtarget->hasLS64()) { + addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass); + setOperationAction(ISD::LOAD, MVT::i64x8, Custom); + setOperationAction(ISD::STORE, MVT::i64x8, Custom); + } + if (Subtarget->hasFPARMv8()) { addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass); @@ -2023,6 +2029,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::LASTA) MAKE_CASE(AArch64ISD::LASTB) MAKE_CASE(AArch64ISD::REINTERPRET_CAST) + MAKE_CASE(AArch64ISD::LS64_BUILD) + MAKE_CASE(AArch64ISD::LS64_EXTRACT) MAKE_CASE(AArch64ISD::TBL) MAKE_CASE(AArch64ISD::FADD_PRED) MAKE_CASE(AArch64ISD::FADDA_PRED) @@ -4345,9 +4353,17 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, if (IsFixedLength) { assert(Subtarget->useSVEForFixedLengthVectors() && "Cannot lower when not using SVE for fixed vectors"); - IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); - MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { + IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); + MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + } else { + MemVT = getContainerForFixedLengthVector(DAG, MemVT); + IndexVT = MemVT.changeTypeToInteger(); + } InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); + Mask = DAG.getNode( + ISD::ZERO_EXTEND, DL, + VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); } if (PassThru->isUndef() || isZerosVector(PassThru.getNode())) @@ -4442,8 +4458,13 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, if (IsFixedLength) { assert(Subtarget->useSVEForFixedLengthVectors() && "Cannot lower when not using SVE for fixed vectors"); - IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); - MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { + IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); + MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + } else { + MemVT = getContainerForFixedLengthVector(DAG, MemVT); + IndexVT = MemVT.changeTypeToInteger(); + } InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); StoreVal = @@ -4452,6 +4473,9 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, ISD::ANY_EXTEND, DL, VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal); StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal); + Mask = DAG.getNode( + ISD::ZERO_EXTEND, DL, + VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); } else if (VT.isFloatingPoint()) { // Handle FP data by casting the data so an integer scatter can be used. EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount()); @@ -4605,17 +4629,51 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, StoreNode->getMemoryVT(), StoreNode->getMemOperand()); return Result; + } else if (MemVT == MVT::i64x8) { + SDValue Value = StoreNode->getValue(); + assert(Value->getValueType(0) == MVT::i64x8); + SDValue Chain = StoreNode->getChain(); + SDValue Base = StoreNode->getBasePtr(); + EVT PtrVT = Base.getValueType(); + for (unsigned i = 0; i < 8; i++) { + SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64, + Value, DAG.getConstant(i, Dl, MVT::i32)); + SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base, + DAG.getConstant(i * 8, Dl, PtrVT)); + Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(), + StoreNode->getOriginalAlign()); + } + return Chain; } return SDValue(); } -// Custom lowering for extending v4i8 vector loads. SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *LoadNode = cast<LoadSDNode>(Op); assert(LoadNode && "Expected custom lowering of a load node"); + + if (LoadNode->getMemoryVT() == MVT::i64x8) { + SmallVector<SDValue, 8> Ops; + SDValue Base = LoadNode->getBasePtr(); + SDValue Chain = LoadNode->getChain(); + EVT PtrVT = Base.getValueType(); + for (unsigned i = 0; i < 8; i++) { + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base, + DAG.getConstant(i * 8, DL, PtrVT)); + SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr, + LoadNode->getPointerInfo(), + LoadNode->getOriginalAlign()); + Ops.push_back(Part); + Chain = SDValue(Part.getNode(), 1); + } + SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops); + return DAG.getMergeValues({Loaded, Chain}, DL); + } + + // Custom lowering for extending v4i8 vector loads. EVT VT = Op->getValueType(0); assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32"); @@ -8173,6 +8231,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( case 'r': if (VT.isScalableVector()) return std::make_pair(0U, nullptr); + if (Subtarget->hasLS64() && VT.getSizeInBits() == 512) + return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass); if (VT.getFixedSizeInBits() == 64) return std::make_pair(0U, &AArch64::GPR64commonRegClass); return std::make_pair(0U, &AArch64::GPR32commonRegClass); @@ -8260,6 +8320,15 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( return Res; } +EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL, + llvm::Type *Ty, + bool AllowUnknown) const { + if (Subtarget->hasLS64() && Ty->isIntegerTy(512)) + return EVT(MVT::i64x8); + + return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown); +} + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void AArch64TargetLowering::LowerAsmOperandForConstraint( diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 386e1c2d8400..2b337255fc27 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -330,6 +330,10 @@ enum NodeType : unsigned { // Cast between vectors of the same element type but differ in length. REINTERPRET_CAST, + // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa + LS64_BUILD, + LS64_EXTRACT, + LD1_MERGE_ZERO, LD1S_MERGE_ZERO, LDNF1_MERGE_ZERO, @@ -824,6 +828,9 @@ public: bool isAllActivePredicate(SDValue N) const; EVT getPromotedVTForPredicate(EVT VT) const; + EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, + bool AllowUnknown = false) const override; + private: /// Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b03d421d3e6d..091a62aa4ada 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, if (!MI.getOperand(1).isReg()) return false; + auto NormalizeCmpValue = [](int64_t Value) -> int { + // Comparison immediates may be 64-bit, but CmpValue is only an int. + // Normalize to 0/1/2 return value, where 2 indicates any value apart from + // 0 or 1. + // TODO: Switch CmpValue to int64_t in the API to avoid this. + if (Value == 0 || Value == 1) + return Value; + return 2; + }; + switch (MI.getOpcode()) { default: break; @@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME: In order to convert CmpValue to 0 or 1 - CmpValue = MI.getOperand(2).getImm() != 0; + CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm()); return true; case AArch64::ANDSWri: case AArch64::ANDSXri: @@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME:The return val type of decodeLogicalImmediate is uint64_t, - // while the type of CmpValue is int. When converting uint64_t to int, - // the high 32 bits of uint64_t will be lost. - // In fact it causes a bug in spec2006-483.xalancbmk - // CmpValue is only used to compare with zero in OptimizeCompareInstr - CmpValue = AArch64_AM::decodeLogicalImmediate( + CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate( MI.getOperand(2).getImm(), - MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; + MI.getOpcode() == AArch64::ANDSWri ? 32 : 64)); return true; } @@ -1462,10 +1466,9 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (CmpInstr.getOpcode() == AArch64::PTEST_PP) return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); - // Continue only if we have a "ri" where immediate is zero. - // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare - // function. - assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); + // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1. + assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) && + "CmpValue must be 0, 1, or 2!"); if (SrcReg2 != 0) return false; @@ -1473,9 +1476,10 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) return false; - if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) + if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) return true; - return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); + return (CmpValue == 0 || CmpValue == 1) && + removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); } /// Get opcode of S version of Instr. diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 682cec361728..12744e4de09b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8092,6 +8092,20 @@ let AddedComplexity = 10 in { // FIXME: add SVE dot-product patterns. } +// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, +// so that it can be used as input to inline asm, and vice versa. +def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; +def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; +def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, + GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), + (REG_SEQUENCE GPR64x8Class, + $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, + $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; +foreach i = 0-7 in { + def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), + (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>; +} + let Predicates = [HasLS64] in { def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), (outs GPR64x8:$Rt)>; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 07dee3ce1fbc..67680e356683 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -732,7 +732,9 @@ def Tuples8X : RegisterTuples< !foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)), !foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>; -def GPR64x8Class : RegisterClass<"AArch64", [i64], 64, (trunc Tuples8X, 12)>; +def GPR64x8Class : RegisterClass<"AArch64", [i64x8], 512, (trunc Tuples8X, 12)> { + let Size = 512; +} def GPR64x8AsmOp : AsmOperandClass { let Name = "GPR64x8"; let ParserMethod = "tryParseGPR64x8"; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index ce6866154242..d168c2a84bbe 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -106,6 +106,25 @@ inline static unsigned getXRegFromWReg(unsigned Reg) { return Reg; } +inline static unsigned getXRegFromXRegTuple(unsigned RegTuple) { + switch (RegTuple) { + case AArch64::X0_X1_X2_X3_X4_X5_X6_X7: return AArch64::X0; + case AArch64::X2_X3_X4_X5_X6_X7_X8_X9: return AArch64::X2; + case AArch64::X4_X5_X6_X7_X8_X9_X10_X11: return AArch64::X4; + case AArch64::X6_X7_X8_X9_X10_X11_X12_X13: return AArch64::X6; + case AArch64::X8_X9_X10_X11_X12_X13_X14_X15: return AArch64::X8; + case AArch64::X10_X11_X12_X13_X14_X15_X16_X17: return AArch64::X10; + case AArch64::X12_X13_X14_X15_X16_X17_X18_X19: return AArch64::X12; + case AArch64::X14_X15_X16_X17_X18_X19_X20_X21: return AArch64::X14; + case AArch64::X16_X17_X18_X19_X20_X21_X22_X23: return AArch64::X16; + case AArch64::X18_X19_X20_X21_X22_X23_X24_X25: return AArch64::X18; + case AArch64::X20_X21_X22_X23_X24_X25_X26_X27: return AArch64::X20; + case AArch64::X22_X23_X24_X25_X26_X27_X28_FP: return AArch64::X22; + } + // For anything else, return it unchanged. + return RegTuple; +} + static inline unsigned getBRegFromDReg(unsigned Reg) { switch (Reg) { case AArch64::D0: return AArch64::B0; diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2167ad5d7467..e68a3aa8bf47 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1647,7 +1647,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, "CMP_SWAP not expected to be custom expanded for Thumb1"); assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) && "ARMv8-M.baseline does not have t2UXTB/t2UXTH"); - assert(ARM::tGPRRegClass.contains(DesiredReg) && + assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) && "DesiredReg used for UXT op must be tGPR"); } diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetTransformInfo.h index 3bc5556a62f4..417e8b6ffec3 100644 --- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -54,6 +54,24 @@ public: return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } + + InstructionCost getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef<const Value *> Args = ArrayRef<const Value *>(), + const Instruction *CxtI = nullptr) { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput) + return SCEVCheapExpansionBudget.getValue() + 1; + + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, + Opd2PropInfo); + } }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index d5a7873bd056..abf5b213bbac 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -485,6 +485,9 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, case Intrinsic::experimental_constrained_sin: case Intrinsic::experimental_constrained_cos: return true; + // There is no corresponding FMA instruction for PPC double double. + // Thus, we need to disable CTR loop generation for this type. + case Intrinsic::fmuladd: case Intrinsic::copysign: if (CI->getArgOperand(0)->getType()->getScalarType()-> isPPC_FP128Ty()) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d37ed584d9d2..294532011650 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5814,6 +5814,13 @@ static SDValue performANY_EXTENDCombine(SDNode *N, break; } + // Only handle cases where the result is used by a CopyToReg that likely + // means the value is a liveout of the basic block. This helps prevent + // infinite combine loops like PR51206. + if (none_of(N->uses(), + [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; })) + return SDValue(); + SmallVector<SDNode *, 4> SetCCs; for (SDNode::use_iterator UI = Src.getNode()->use_begin(), UE = Src.getNode()->use_end(); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 342497150d49..8af3c8f5cfdb 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -78,6 +78,39 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT, } //===----------------------------------------------------------------------===// +// Scheduling definitions. +//===----------------------------------------------------------------------===// + +class VMVRSched<int n>: Sched <[!cast<SchedReadWrite>("WriteVMov" # n # "V"), + !cast<SchedReadWrite>("ReadVMov" # n # "V")]>; + +class VLESched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDE" # n), + ReadVLDX, ReadVMask]>; + +class VSESched<int n> : Sched <[!cast<SchedReadWrite>("WriteVSTE" # n), + !cast<SchedReadWrite>("ReadVSTE" # n # "V"), + ReadVSTX, ReadVMask]>; + +class VLSSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDS" # n), + ReadVLDX, ReadVLDSX, ReadVMask]>; + +class VSSSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVSTS" # n), + !cast<SchedReadWrite>("ReadVSTS" # n # "V"), + ReadVSTX, ReadVSTSX, ReadVMask]>; + +class VLXSched<int n, string o> : + Sched <[!cast<SchedReadWrite>("WriteVLD" # o # "X" # n), + ReadVLDX, !cast<SchedReadWrite>("ReadVLD" # o # "XV"), ReadVMask]>; + +class VSXSched<int n, string o> : + Sched <[!cast<SchedReadWrite>("WriteVST" # o # "X" # n), + !cast<SchedReadWrite>("ReadVST" # o # "X" # n), + ReadVSTX, !cast<SchedReadWrite>("ReadVST" # o # "XV"), ReadVMask]>; + +class VLFSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDFF" # n), + ReadVLDX, ReadVMask]>; + +//===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -328,106 +361,417 @@ class VAMONoWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> // Use these multiclasses to define instructions more easily. //===----------------------------------------------------------------------===// multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { - def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">; - def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">; - def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>; + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; } multiclass VALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">; - def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">; + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; } -multiclass VALUr_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUrVV<funct6, OPIVV, opcodestr # "." # vw # "v">; - def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">; +multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; } -multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { - def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">; - def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>; +multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; } -multiclass VALU_IV_V<string opcodestr, bits<6> funct6> { - def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">; +multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>; + def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>; } -multiclass VALUr_IV_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">; +multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>; + def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; } -multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">; - def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">; +multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; +} + +multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; +} + +multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> { + def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; + def IM : VALUmVI<funct6, opcodestr # ".vim">, + Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>; } -multiclass VALU_MV_V<string opcodestr, bits<6> funct6> { - def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">; +multiclass VMRG_IV_V_X_I<string opcodestr, bits<6> funct6> { + def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">, + Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; + def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">, + Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; + def IM : VALUmVI<funct6, opcodestr # ".vim">, + Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; } -multiclass VALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> { - def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">; +multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> { + def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; } -multiclass VALU_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">; +multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> { + def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>; + def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>; + def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>, + Sched<[WriteVICALUI, ReadVIALUCV]>; } -multiclass VALUr_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">; - def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">; +multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> { + def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>; + def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>; } -multiclass VALUr_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">; +multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFALUV, ReadVFALUV, ReadVFALUV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>; } -multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { - def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>; +multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>; } -multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> { - def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">; - def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">; - def IM : VALUmVI<funct6, opcodestr # ".vim">; +multiclass VWALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>; } -multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> { - def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">; - def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">; +multiclass VMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFMulV, ReadVFMulV, ReadVFMulV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFMulF, ReadVFMulV, ReadVFMulF, ReadVMask]>; } -multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> { - def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">; - def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">; - def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>; +multiclass VDIV_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFDivV, ReadVFDivV, ReadVFDivV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>; } -multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> { - def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">; - def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">; +multiclass VRDIV_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>; } -multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">; - def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">; +multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFWMulV, ReadVFWMulV, ReadVFWMulV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFWMulF, ReadVFWMulV, ReadVFWMulF, ReadVMask]>; } -multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> { - def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">; +multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVMask]>; + def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFMulAddF, ReadVFMulAddV, ReadVFMulAddF, ReadVMask]>; +} + +multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVMask]>; + def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFWMulAddF, ReadVFWMulAddV, ReadVFWMulAddF, ReadVMask]>; +} + +multiclass VSQR_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; +} + +multiclass VRCP_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; +} + +multiclass VCMP_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; +} + +multiclass VCMP_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; +} + +multiclass VSGNJ_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFSgnjV, ReadVFSgnjV, ReadVFSgnjV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFSgnjF, ReadVFSgnjV, ReadVFSgnjF, ReadVMask]>; +} + +multiclass VCLS_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; +} + +multiclass VCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>; +} + +multiclass VCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>; +} + +multiclass VWCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>; +} + +multiclass VWCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>; +} + +multiclass VWCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>; +} + +multiclass VNCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>; +} + +multiclass VNCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>; +} + +multiclass VNCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>; +} + +multiclass VRED_MV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">, + Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV0, ReadVMask]>; +} + +multiclass VWRED_IV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">, + Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV0, ReadVMask]>; +} + +multiclass VRED_FV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, + Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV0, ReadVMask]>; +} + +multiclass VREDO_FV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, + Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV0, ReadVMask]>; +} + +multiclass VWRED_FV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, + Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV0, ReadVMask]>; +} + +multiclass VWREDO_FV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, + Sched<[WriteVFWRedOV, ReadVFWRedOV, ReadVFWRedOV0, ReadVMask]>; } -multiclass VALUr_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">; - def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">; +multiclass VMALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> { + def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">, + Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>; } -multiclass VALU_FV_V<string opcodestr, bits<6> funct6> { - def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">; +multiclass VMSFS_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>, + Sched<[WriteVMSFSV, ReadVMSFSV, ReadVMask]>; } -multiclass VALU_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { - def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>; +multiclass VMIOT_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>, + Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; +} + +multiclass VSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>; +} + +multiclass VNSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>; +} + +multiclass VCMP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; +} + +multiclass VCMP_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; +} + +multiclass VCMP_IV_V_X<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; +} + +multiclass VMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>; + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>; +} + +multiclass VWMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>; + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>; +} + +multiclass VDIV_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>; + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>; +} + +multiclass VSALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVSALUI, ReadVSALUV, ReadVMask]>; +} + +multiclass VSALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>; +} + +multiclass VAALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVAALUV, ReadVAALUV, ReadVAALUV, ReadVMask]>; + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVAALUX, ReadVAALUV, ReadVAALUX, ReadVMask]>; +} + +multiclass VSMUL_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVSMulV, ReadVSMulV, ReadVSMulV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVSMulX, ReadVSMulV, ReadVSMulX, ReadVMask]>; +} + +multiclass VSSHF_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVSShiftV, ReadVSShiftV, ReadVSShiftV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVSShiftX, ReadVSShiftV, ReadVSShiftX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVSShiftI, ReadVSShiftV, ReadVMask]>; +} + +multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVNClipV, ReadVNClipV, ReadVNClipV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVNClipX, ReadVNClipV, ReadVNClipX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVNClipI, ReadVNClipV, ReadVMask]>; +} + +multiclass VSLD_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVISlideX, ReadVISlideV, ReadVISlideX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVISlideI, ReadVISlideV, ReadVMask]>; +} + +multiclass VSLD1_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVISlide1X, ReadVISlideV, ReadVISlideX, ReadVMask]>; +} + +multiclass VSLD1_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>; +} + +multiclass VGTR_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVGatherX, ReadVGatherV, ReadVGatherX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVGatherI, ReadVGatherV, ReadVMask]>; +} + +multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> { + def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">, + Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>; } multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> { @@ -435,11 +779,48 @@ multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> { def _UNWD : VAMONoWd<amoop, width, opcodestr>; } -multiclass VWholeLoad<bits<3> nf, string opcodestr, RegisterClass VRC> { - def E8_V : VWholeLoad<nf, LSWidth8, opcodestr # "e8.v", VRC>; - def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v", VRC>; - def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v", VRC>; - def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>; +multiclass VWholeLoad1<string opcodestr, RegisterClass VRC> { + def E8_V : VWholeLoad<0, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD1R8, ReadVLDX]>; + def E16_V : VWholeLoad<0, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD1R16, ReadVLDX]>; + def E32_V : VWholeLoad<0, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD1R32, ReadVLDX]>; + def E64_V : VWholeLoad<0, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD1R64, ReadVLDX]>; +} + +multiclass VWholeLoad2<string opcodestr, RegisterClass VRC> { + def E8_V : VWholeLoad<1, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD2R8, ReadVLDX]>; + def E16_V : VWholeLoad<1, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD2R16, ReadVLDX]>; + def E32_V : VWholeLoad<1, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD2R32, ReadVLDX]>; + def E64_V : VWholeLoad<1, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD2R64, ReadVLDX]>; +} + +multiclass VWholeLoad4<string opcodestr, RegisterClass VRC> { + def E8_V : VWholeLoad<3, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD4R8, ReadVLDX]>; + def E16_V : VWholeLoad<3, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD4R16, ReadVLDX]>; + def E32_V : VWholeLoad<3, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD4R32, ReadVLDX]>; + def E64_V : VWholeLoad<3, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD1R64, ReadVLDX]>; +} + +multiclass VWholeLoad8<string opcodestr, RegisterClass VRC> { + def E8_V : VWholeLoad<7, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD8R8, ReadVLDX]>; + def E16_V : VWholeLoad<7, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD8R16, ReadVLDX]>; + def E32_V : VWholeLoad<7, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD8R32, ReadVLDX]>; + def E64_V : VWholeLoad<7, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD8R64, ReadVLDX]>; } //===----------------------------------------------------------------------===// @@ -459,69 +840,94 @@ def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), } // hasSideEffects = 1, mayLoad = 0, mayStore = 0 // Vector Unit-Stride Instructions -def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">; -def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">; -def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">; -def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">; - -def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">; -def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">; -def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">; -def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">; - -def VLE1_V : VUnitStrideLoadMask<"vle1.v">; -def VSE1_V : VUnitStrideStoreMask<"vse1.v">; - -def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">; -def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">; -def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">; -def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">; +def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">, + VLESched<8>; +def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">, + VLESched<16>; +def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">, + VLESched<32>; +def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">, + VLESched<64>; + +def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">, + VLFSched<8>; +def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">, + VLFSched<16>; +def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">, + VLFSched<32>; +def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">, + VLFSched<64>; + +def VLE1_V : VUnitStrideLoadMask<"vle1.v">, + Sched<[WriteVLDM, ReadVLDX]>; +def VSE1_V : VUnitStrideStoreMask<"vse1.v">, + Sched<[WriteVSTM, ReadVSTM, ReadVSTX]>; + +def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">, + VSESched<8>; +def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">, + VSESched<16>; +def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">, + VSESched<32>; +def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">, + VSESched<64>; // Vector Strided Instructions -def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">; -def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">; -def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">; -def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">; - -def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">; -def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">; -def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">; -def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">; +def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">, + VLSSched<8>; +def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">, + VLSSched<16>; +def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">, + VLSSched<32>; +def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">, + VLSSched<32>; + +def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">, + VSSSched<8>; +def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">, + VSSSched<16>; +def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">, + VSSSched<32>; +def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">, + VSSSched<64>; // Vector Indexed Instructions -def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">; -def VLUXEI16_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth16, "vluxei16.v">; -def VLUXEI32_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth32, "vluxei32.v">; -def VLUXEI64_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth64, "vluxei64.v">; - -def VLOXEI8_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth8, "vloxei8.v">; -def VLOXEI16_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth16, "vloxei16.v">; -def VLOXEI32_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth32, "vloxei32.v">; -def VLOXEI64_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth64, "vloxei64.v">; - -def VSUXEI8_V : VIndexedStore<MOPSTIndexedUnord, LSWidth8, "vsuxei8.v">; -def VSUXEI16_V : VIndexedStore<MOPSTIndexedUnord, LSWidth16, "vsuxei16.v">; -def VSUXEI32_V : VIndexedStore<MOPSTIndexedUnord, LSWidth32, "vsuxei32.v">; -def VSUXEI64_V : VIndexedStore<MOPSTIndexedUnord, LSWidth64, "vsuxei64.v">; - -def VSOXEI8_V : VIndexedStore<MOPSTIndexedOrder, LSWidth8, "vsoxei8.v">; -def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">; -def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">; -def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">; - -defm VL1R : VWholeLoad<0, "vl1r", VR>; -defm VL2R : VWholeLoad<1, "vl2r", VRM2>; -defm VL4R : VWholeLoad<3, "vl4r", VRM4>; -defm VL8R : VWholeLoad<7, "vl8r", VRM8>; +foreach n = [8, 16, 32, 64] in { +defvar w = !cast<RISCVWidth>("LSWidth" # n); + +def VLUXEI # n # _V : + VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # n # ".v">, + VLXSched<n, "U">; +def VLOXEI # n # _V : + VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # n # ".v">, + VLXSched<n, "O">; + +def VSUXEI # n # _V : + VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # n # ".v">, + VSXSched<n, "U">; +def VSOXEI # n # _V : + VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # n # ".v">, + VSXSched<n, "O">; +} + +defm VL1R : VWholeLoad1<"vl1r", VR>; +defm VL2R : VWholeLoad2<"vl2r", VRM2>; +defm VL4R : VWholeLoad4<"vl4r", VRM4>; +defm VL8R : VWholeLoad8<"vl8r", VRM8>; + def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VRM2:$vd, GPR:$rs1)>; def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>; def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>; -def VS1R_V : VWholeStore<0, "vs1r.v", VR>; -def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>; -def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>; -def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>; +def VS1R_V : VWholeStore<0, "vs1r.v", VR>, + Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>; +def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>, + Sched<[WriteVST2R, ReadVST2R, ReadVSTX]>; +def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>, + Sched<[WriteVST4R, ReadVST4R, ReadVSTX]>; +def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>, + Sched<[WriteVST8R, ReadVST8R, ReadVSTX]>; // Vector Single-Width Integer Add and Subtract defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>; @@ -588,9 +994,9 @@ def : InstAlias<"vnot.v $vd, $vs$vm", (VXOR_VI VR:$vd, VR:$vs, -1, VMaskOp:$vm)>; // Vector Single-Width Bit Shift Instructions -defm VSLL_V : VALU_IV_V_X_I<"vsll", 0b100101, uimm5>; -defm VSRL_V : VALU_IV_V_X_I<"vsrl", 0b101000, uimm5>; -defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>; +defm VSLL_V : VSHT_IV_V_X_I<"vsll", 0b100101, uimm5>; +defm VSRL_V : VSHT_IV_V_X_I<"vsrl", 0b101000, uimm5>; +defm VSRA_V : VSHT_IV_V_X_I<"vsra", 0b101001, uimm5>; // Vector Narrowing Integer Right Shift Instructions // Refer to 11.3. Narrowing Vector Arithmetic Instructions @@ -598,8 +1004,8 @@ defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>; // vector register group (specified by vs2). The destination vector register // group cannot overlap the mask register if used, unless LMUL=1. let Constraints = "@earlyclobber $vd" in { -defm VNSRL_W : VALU_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">; -defm VNSRA_W : VALU_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">; +defm VNSRL_W : VNSHT_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">; +defm VNSRA_W : VNSHT_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">; } // Constraints = "@earlyclobber $vd" def : InstAlias<"vncvt.x.x.w $vd, $vs$vm", @@ -607,14 +1013,14 @@ def : InstAlias<"vncvt.x.x.w $vd, $vs$vm", // Vector Integer Comparison Instructions let RVVConstraint = NoConstraint in { -defm VMSEQ_V : VALU_IV_V_X_I<"vmseq", 0b011000>; -defm VMSNE_V : VALU_IV_V_X_I<"vmsne", 0b011001>; -defm VMSLTU_V : VALU_IV_V_X<"vmsltu", 0b011010>; -defm VMSLT_V : VALU_IV_V_X<"vmslt", 0b011011>; -defm VMSLEU_V : VALU_IV_V_X_I<"vmsleu", 0b011100>; -defm VMSLE_V : VALU_IV_V_X_I<"vmsle", 0b011101>; -defm VMSGTU_V : VALU_IV_X_I<"vmsgtu", 0b011110>; -defm VMSGT_V : VALU_IV_X_I<"vmsgt", 0b011111>; +defm VMSEQ_V : VCMP_IV_V_X_I<"vmseq", 0b011000>; +defm VMSNE_V : VCMP_IV_V_X_I<"vmsne", 0b011001>; +defm VMSLTU_V : VCMP_IV_V_X<"vmsltu", 0b011010>; +defm VMSLT_V : VCMP_IV_V_X<"vmslt", 0b011011>; +defm VMSLEU_V : VCMP_IV_V_X_I<"vmsleu", 0b011100>; +defm VMSLE_V : VCMP_IV_V_X_I<"vmsle", 0b011101>; +defm VMSGTU_V : VCMP_IV_X_I<"vmsgtu", 0b011110>; +defm VMSGT_V : VCMP_IV_X_I<"vmsgt", 0b011111>; } // RVVConstraint = NoConstraint def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm", @@ -672,84 +1078,87 @@ def PseudoVMSGE_VX_M_T : Pseudo<(outs VR:$vd, VRNoV0:$scratch), } // Vector Integer Min/Max Instructions -defm VMINU_V : VALU_IV_V_X<"vminu", 0b000100>; -defm VMIN_V : VALU_IV_V_X<"vmin", 0b000101>; -defm VMAXU_V : VALU_IV_V_X<"vmaxu", 0b000110>; -defm VMAX_V : VALU_IV_V_X<"vmax", 0b000111>; +defm VMINU_V : VCMP_IV_V_X<"vminu", 0b000100>; +defm VMIN_V : VCMP_IV_V_X<"vmin", 0b000101>; +defm VMAXU_V : VCMP_IV_V_X<"vmaxu", 0b000110>; +defm VMAX_V : VCMP_IV_V_X<"vmax", 0b000111>; // Vector Single-Width Integer Multiply Instructions -defm VMUL_V : VALU_MV_V_X<"vmul", 0b100101>; -defm VMULH_V : VALU_MV_V_X<"vmulh", 0b100111>; -defm VMULHU_V : VALU_MV_V_X<"vmulhu", 0b100100>; -defm VMULHSU_V : VALU_MV_V_X<"vmulhsu", 0b100110>; +defm VMUL_V : VMUL_MV_V_X<"vmul", 0b100101>; +defm VMULH_V : VMUL_MV_V_X<"vmulh", 0b100111>; +defm VMULHU_V : VMUL_MV_V_X<"vmulhu", 0b100100>; +defm VMULHSU_V : VMUL_MV_V_X<"vmulhsu", 0b100110>; // Vector Integer Divide Instructions -defm VDIVU_V : VALU_MV_V_X<"vdivu", 0b100000>; -defm VDIV_V : VALU_MV_V_X<"vdiv", 0b100001>; -defm VREMU_V : VALU_MV_V_X<"vremu", 0b100010>; -defm VREM_V : VALU_MV_V_X<"vrem", 0b100011>; +defm VDIVU_V : VDIV_MV_V_X<"vdivu", 0b100000>; +defm VDIV_V : VDIV_MV_V_X<"vdiv", 0b100001>; +defm VREMU_V : VDIV_MV_V_X<"vremu", 0b100010>; +defm VREM_V : VDIV_MV_V_X<"vrem", 0b100011>; // Vector Widening Integer Multiply Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VWMUL_V : VALU_MV_V_X<"vwmul", 0b111011>; -defm VWMULU_V : VALU_MV_V_X<"vwmulu", 0b111000>; -defm VWMULSU_V : VALU_MV_V_X<"vwmulsu", 0b111010>; +defm VWMUL_V : VWMUL_MV_V_X<"vwmul", 0b111011>; +defm VWMULU_V : VWMUL_MV_V_X<"vwmulu", 0b111000>; +defm VWMULSU_V : VWMUL_MV_V_X<"vwmulsu", 0b111010>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Single-Width Integer Multiply-Add Instructions -defm VMACC_V : VALUr_MV_V_X<"vmacc", 0b101101>; -defm VNMSAC_V : VALUr_MV_V_X<"vnmsac", 0b101111>; -defm VMADD_V : VALUr_MV_V_X<"vmadd", 0b101001>; -defm VNMSUB_V : VALUr_MV_V_X<"vnmsub", 0b101011>; +defm VMACC_V : VMAC_MV_V_X<"vmacc", 0b101101>; +defm VNMSAC_V : VMAC_MV_V_X<"vnmsac", 0b101111>; +defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>; +defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>; // Vector Widening Integer Multiply-Add Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VWMACCU_V : VALUr_MV_V_X<"vwmaccu", 0b111100>; -defm VWMACC_V : VALUr_MV_V_X<"vwmacc", 0b111101>; -defm VWMACCSU_V : VALUr_MV_V_X<"vwmaccsu", 0b111111>; -defm VWMACCUS_V : VALUr_MV_X<"vwmaccus", 0b111110>; +defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>; +defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>; +defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>; +defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Integer Merge Instructions -defm VMERGE_V : VALUm_IV_V_X_I<"vmerge", 0b010111>; +defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>; // Vector Integer Move Instructions let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1, RVVConstraint = NoConstraint in { // op vd, vs1 def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd), - (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">; + (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">, + Sched<[WriteVIMovV, ReadVIMovV]>; // op vd, rs1 def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd), - (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">; + (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">, + Sched<[WriteVIMovX, ReadVIMovX]>; // op vd, imm def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd), - (ins simm5:$imm), "vmv.v.i", "$vd, $imm">; + (ins simm5:$imm), "vmv.v.i", "$vd, $imm">, + Sched<[WriteVIMovI]>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Vector Fixed-Point Arithmetic Instructions -defm VSADDU_V : VALU_IV_V_X_I<"vsaddu", 0b100000>; -defm VSADD_V : VALU_IV_V_X_I<"vsadd", 0b100001>; -defm VSSUBU_V : VALU_IV_V_X<"vssubu", 0b100010>; -defm VSSUB_V : VALU_IV_V_X<"vssub", 0b100011>; +defm VSADDU_V : VSALU_IV_V_X_I<"vsaddu", 0b100000>; +defm VSADD_V : VSALU_IV_V_X_I<"vsadd", 0b100001>; +defm VSSUBU_V : VSALU_IV_V_X<"vssubu", 0b100010>; +defm VSSUB_V : VSALU_IV_V_X<"vssub", 0b100011>; // Vector Single-Width Averaging Add and Subtract -defm VAADDU_V : VALU_MV_V_X<"vaaddu", 0b001000>; -defm VAADD_V : VALU_MV_V_X<"vaadd", 0b001001>; -defm VASUBU_V : VALU_MV_V_X<"vasubu", 0b001010>; -defm VASUB_V : VALU_MV_V_X<"vasub", 0b001011>; +defm VAADDU_V : VAALU_MV_V_X<"vaaddu", 0b001000>; +defm VAADD_V : VAALU_MV_V_X<"vaadd", 0b001001>; +defm VASUBU_V : VAALU_MV_V_X<"vasubu", 0b001010>; +defm VASUB_V : VAALU_MV_V_X<"vasub", 0b001011>; // Vector Single-Width Fractional Multiply with Rounding and Saturation -defm VSMUL_V : VALU_IV_V_X<"vsmul", 0b100111>; +defm VSMUL_V : VSMUL_IV_V_X<"vsmul", 0b100111>; // Vector Single-Width Scaling Shift Instructions -defm VSSRL_V : VALU_IV_V_X_I<"vssrl", 0b101010, uimm5>; -defm VSSRA_V : VALU_IV_V_X_I<"vssra", 0b101011, uimm5>; +defm VSSRL_V : VSSHF_IV_V_X_I<"vssrl", 0b101010, uimm5>; +defm VSSRA_V : VSSHF_IV_V_X_I<"vssra", 0b101011, uimm5>; // Vector Narrowing Fixed-Point Clip Instructions let Constraints = "@earlyclobber $vd" in { -defm VNCLIPU_W : VALU_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">; -defm VNCLIP_W : VALU_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">; +defm VNCLIPU_W : VNCLP_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">; +defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">; } // Constraints = "@earlyclobber $vd" } // Predicates = [HasStdExtV] @@ -762,60 +1171,60 @@ defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>; // Vector Widening Floating-Point Add/Subtract Instructions let Constraints = "@earlyclobber $vd" in { let RVVConstraint = WidenV in { -defm VFWADD_V : VALU_FV_V_F<"vfwadd", 0b110000>; -defm VFWSUB_V : VALU_FV_V_F<"vfwsub", 0b110010>; +defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000>; +defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010>; } // RVVConstraint = WidenV // Set earlyclobber for following instructions for second and mask operands. // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. let RVVConstraint = WidenW in { -defm VFWADD_W : VALU_FV_V_F<"vfwadd", 0b110100, "w">; -defm VFWSUB_W : VALU_FV_V_F<"vfwsub", 0b110110, "w">; +defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">; +defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">; } // RVVConstraint = WidenW } // Constraints = "@earlyclobber $vd" // Vector Single-Width Floating-Point Multiply/Divide Instructions -defm VFMUL_V : VALU_FV_V_F<"vfmul", 0b100100>; -defm VFDIV_V : VALU_FV_V_F<"vfdiv", 0b100000>; -defm VFRDIV_V : VALU_FV_F<"vfrdiv", 0b100001>; +defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>; +defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>; +defm VFRDIV_V : VRDIV_FV_F<"vfrdiv", 0b100001>; // Vector Widening Floating-Point Multiply let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VFWMUL_V : VALU_FV_V_F<"vfwmul", 0b111000>; +defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Single-Width Floating-Point Fused Multiply-Add Instructions -defm VFMACC_V : VALUr_FV_V_F<"vfmacc", 0b101100>; -defm VFNMACC_V : VALUr_FV_V_F<"vfnmacc", 0b101101>; -defm VFMSAC_V : VALUr_FV_V_F<"vfmsac", 0b101110>; -defm VFNMSAC_V : VALUr_FV_V_F<"vfnmsac", 0b101111>; -defm VFMADD_V : VALUr_FV_V_F<"vfmadd", 0b101000>; -defm VFNMADD_V : VALUr_FV_V_F<"vfnmadd", 0b101001>; -defm VFMSUB_V : VALUr_FV_V_F<"vfmsub", 0b101010>; -defm VFNMSUB_V : VALUr_FV_V_F<"vfnmsub", 0b101011>; +defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>; +defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>; +defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>; +defm VFNMSAC_V : VMAC_FV_V_F<"vfnmsac", 0b101111>; +defm VFMADD_V : VMAC_FV_V_F<"vfmadd", 0b101000>; +defm VFNMADD_V : VMAC_FV_V_F<"vfnmadd", 0b101001>; +defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>; +defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>; // Vector Widening Floating-Point Fused Multiply-Add Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VFWMACC_V : VALUr_FV_V_F<"vfwmacc", 0b111100>; -defm VFWNMACC_V : VALUr_FV_V_F<"vfwnmacc", 0b111101>; -defm VFWMSAC_V : VALUr_FV_V_F<"vfwmsac", 0b111110>; -defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>; +defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>; +defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>; +defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>; +defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Floating-Point Square-Root Instruction -defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; -defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>; -defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; +defm VFSQRT_V : VSQR_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; +defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>; +defm VFREC7_V : VRCP_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; // Vector Floating-Point MIN/MAX Instructions -defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>; -defm VFMAX_V : VALU_FV_V_F<"vfmax", 0b000110>; +defm VFMIN_V : VCMP_FV_V_F<"vfmin", 0b000100>; +defm VFMAX_V : VCMP_FV_V_F<"vfmax", 0b000110>; // Vector Floating-Point Sign-Injection Instructions -defm VFSGNJ_V : VALU_FV_V_F<"vfsgnj", 0b001000>; -defm VFSGNJN_V : VALU_FV_V_F<"vfsgnjn", 0b001001>; -defm VFSGNJX_V : VALU_FV_V_F<"vfsgnjx", 0b001010>; +defm VFSGNJ_V : VSGNJ_FV_V_F<"vfsgnj", 0b001000>; +defm VFSGNJN_V : VSGNJ_FV_V_F<"vfsgnjn", 0b001001>; +defm VFSGNJX_V : VSGNJ_FV_V_F<"vfsgnjx", 0b001010>; def : InstAlias<"vfneg.v $vd, $vs$vm", (VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>; @@ -824,12 +1233,12 @@ def : InstAlias<"vfabs.v $vd, $vs$vm", // Vector Floating-Point Compare Instructions let RVVConstraint = NoConstraint in { -defm VMFEQ_V : VALU_FV_V_F<"vmfeq", 0b011000>; -defm VMFNE_V : VALU_FV_V_F<"vmfne", 0b011100>; -defm VMFLT_V : VALU_FV_V_F<"vmflt", 0b011011>; -defm VMFLE_V : VALU_FV_V_F<"vmfle", 0b011001>; -defm VMFGT_V : VALU_FV_F<"vmfgt", 0b011101>; -defm VMFGE_V : VALU_FV_F<"vmfge", 0b011111>; +defm VMFEQ_V : VCMP_FV_V_F<"vmfeq", 0b011000>; +defm VMFNE_V : VCMP_FV_V_F<"vmfne", 0b011100>; +defm VMFLT_V : VCMP_FV_V_F<"vmflt", 0b011011>; +defm VMFLE_V : VCMP_FV_V_F<"vmfle", 0b011001>; +defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>; +defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>; } // RVVConstraint = NoConstraint def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm", @@ -838,68 +1247,70 @@ def : InstAlias<"vmfge.vv $vd, $va, $vb$vm", (VMFLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; // Vector Floating-Point Classify Instruction -defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b010011, 0b10000>; +defm VFCLASS_V : VCLS_FV_VS2<"vfclass.v", 0b010011, 0b10000>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { + // Vector Floating-Point Merge Instruction +let vm = 0 in def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd), (ins VR:$vs2, FPR32:$rs1, VMV0:$v0), - "vfmerge.vfm", "$vd, $vs2, $rs1, v0"> { - let vm = 0; -} + "vfmerge.vfm", "$vd, $vs2, $rs1, v0">, + Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; // Vector Floating-Point Move Instruction let RVVConstraint = NoConstraint in +let vm = 1, vs2 = 0 in def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd), - (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1"> { - let vs2 = 0; - let vm = 1; -} + (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1">, + Sched<[WriteVFMovV, ReadVFMovF]>; + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Single-Width Floating-Point/Integer Type-Convert Instructions -defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>; -defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>; -defm VFCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>; -defm VFCVT_RTZ_X_F_V : VALU_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>; -defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>; -defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; +defm VFCVT_XU_F_V : VCVTI_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>; +defm VFCVT_X_F_V : VCVTI_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>; +defm VFCVT_RTZ_XU_F_V : VCVTI_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>; +defm VFCVT_RTZ_X_F_V : VCVTI_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>; +defm VFCVT_F_XU_V : VCVTF_IV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>; +defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; // Widening Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt in { -defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; -defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; -defm VFWCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>; -defm VFWCVT_RTZ_X_F_V : VALU_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>; -defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>; -defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>; -defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; +defm VFWCVT_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; +defm VFWCVT_X_F_V : VWCVTI_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; +defm VFWCVT_RTZ_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>; +defm VFWCVT_RTZ_X_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>; +defm VFWCVT_F_XU_V : VWCVTF_IV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>; +defm VFWCVT_F_X_V : VWCVTF_IV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>; +defm VFWCVT_F_F_V : VWCVTF_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt // Narrowing Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd" in { -defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>; -defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>; -defm VFNCVT_RTZ_XU_F_W : VALU_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>; -defm VFNCVT_RTZ_X_F_W : VALU_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>; -defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>; -defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>; -defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>; -defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>; +defm VFNCVT_XU_F_W : VNCVTI_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>; +defm VFNCVT_X_F_W : VNCVTI_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>; +defm VFNCVT_RTZ_XU_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>; +defm VFNCVT_RTZ_X_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>; +defm VFNCVT_F_XU_W : VNCVTF_IV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>; +defm VFNCVT_F_X_W : VNCVTF_IV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>; +defm VFNCVT_F_F_W : VNCVTF_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>; +defm VFNCVT_ROD_F_F_W : VNCVTF_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>; } // Constraints = "@earlyclobber $vd" } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { + // Vector Single-Width Integer Reduction Instructions let RVVConstraint = NoConstraint in { -defm VREDSUM : VALU_MV_V<"vredsum", 0b000000>; -defm VREDMAXU : VALU_MV_V<"vredmaxu", 0b000110>; -defm VREDMAX : VALU_MV_V<"vredmax", 0b000111>; -defm VREDMINU : VALU_MV_V<"vredminu", 0b000100>; -defm VREDMIN : VALU_MV_V<"vredmin", 0b000101>; -defm VREDAND : VALU_MV_V<"vredand", 0b000001>; -defm VREDOR : VALU_MV_V<"vredor", 0b000010>; -defm VREDXOR : VALU_MV_V<"vredxor", 0b000011>; +defm VREDSUM : VRED_MV_V<"vredsum", 0b000000>; +defm VREDMAXU : VRED_MV_V<"vredmaxu", 0b000110>; +defm VREDMAX : VRED_MV_V<"vredmax", 0b000111>; +defm VREDMINU : VRED_MV_V<"vredminu", 0b000100>; +defm VREDMIN : VRED_MV_V<"vredmin", 0b000101>; +defm VREDAND : VRED_MV_V<"vredand", 0b000001>; +defm VREDOR : VRED_MV_V<"vredor", 0b000010>; +defm VREDXOR : VRED_MV_V<"vredxor", 0b000011>; } // RVVConstraint = NoConstraint // Vector Widening Integer Reduction Instructions @@ -908,18 +1319,19 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. -defm VWREDSUMU : VALU_IV_V<"vwredsumu", 0b110000>; -defm VWREDSUM : VALU_IV_V<"vwredsum", 0b110001>; +defm VWREDSUMU : VWRED_IV_V<"vwredsumu", 0b110000>; +defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>; } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint + } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { // Vector Single-Width Floating-Point Reduction Instructions let RVVConstraint = NoConstraint in { -defm VFREDOSUM : VALU_FV_V<"vfredosum", 0b000011>; -defm VFREDSUM : VALU_FV_V<"vfredsum", 0b000001>; -defm VFREDMAX : VALU_FV_V<"vfredmax", 0b000111>; -defm VFREDMIN : VALU_FV_V<"vfredmin", 0b000101>; +defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>; +defm VFREDSUM : VRED_FV_V<"vfredsum", 0b000001>; +defm VFREDMAX : VRED_FV_V<"vfredmax", 0b000111>; +defm VFREDMIN : VRED_FV_V<"vfredmin", 0b000101>; } // RVVConstraint = NoConstraint // Vector Widening Floating-Point Reduction Instructions @@ -928,22 +1340,22 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. -defm VFWREDOSUM : VALU_FV_V<"vfwredosum", 0b110011>; -defm VFWREDSUM : VALU_FV_V<"vfwredsum", 0b110001>; +defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>; +defm VFWREDSUM : VWRED_FV_V<"vfwredsum", 0b110001>; } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { // Vector Mask-Register Logical Instructions let RVVConstraint = NoConstraint in { -defm VMAND_M : VALU_MV_Mask<"vmand", 0b011001, "m">; -defm VMNAND_M : VALU_MV_Mask<"vmnand", 0b011101, "m">; -defm VMANDNOT_M : VALU_MV_Mask<"vmandnot", 0b011000, "m">; -defm VMXOR_M : VALU_MV_Mask<"vmxor", 0b011011, "m">; -defm VMOR_M : VALU_MV_Mask<"vmor", 0b011010, "m">; -defm VMNOR_M : VALU_MV_Mask<"vmnor", 0b011110, "m">; -defm VMORNOT_M : VALU_MV_Mask<"vmornot", 0b011100, "m">; -defm VMXNOR_M : VALU_MV_Mask<"vmxnor", 0b011111, "m">; +defm VMAND_M : VMALU_MV_Mask<"vmand", 0b011001, "m">; +defm VMNAND_M : VMALU_MV_Mask<"vmnand", 0b011101, "m">; +defm VMANDNOT_M : VMALU_MV_Mask<"vmandnot", 0b011000, "m">; +defm VMXOR_M : VMALU_MV_Mask<"vmxor", 0b011011, "m">; +defm VMOR_M : VMALU_MV_Mask<"vmor", 0b011010, "m">; +defm VMNOR_M : VMALU_MV_Mask<"vmnor", 0b011110, "m">; +defm VMORNOT_M : VMALU_MV_Mask<"vmornot", 0b011100, "m">; +defm VMXNOR_M : VMALU_MV_Mask<"vmxnor", 0b011111, "m">; } def : InstAlias<"vmmv.m $vd, $vs", @@ -957,98 +1369,113 @@ def : InstAlias<"vmnot.m $vd, $vs", let hasSideEffects = 0, mayLoad = 0, mayStore = 0, RVVConstraint = NoConstraint in { + // Vector mask population count vpopc def VPOPC_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd), - (ins VR:$vs2, VMaskOp:$vm), - "vpopc.m", "$vd, $vs2$vm">; + (ins VR:$vs2, VMaskOp:$vm), + "vpopc.m", "$vd, $vs2$vm">, + Sched<[WriteVMPopV, ReadVMPopV, ReadVMask]>; // vfirst find-first-set mask bit def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd), - (ins VR:$vs2, VMaskOp:$vm), - "vfirst.m", "$vd, $vs2$vm">; + (ins VR:$vs2, VMaskOp:$vm), + "vfirst.m", "$vd, $vs2$vm">, + Sched<[WriteVMFFSV, ReadVMFFSV, ReadVMask]>; + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in { + // vmsbf.m set-before-first mask bit -defm VMSBF_M : VALU_MV_VS2<"vmsbf.m", 0b010100, 0b00001>; +defm VMSBF_M : VMSFS_MV_V<"vmsbf.m", 0b010100, 0b00001>; // vmsif.m set-including-first mask bit -defm VMSIF_M : VALU_MV_VS2<"vmsif.m", 0b010100, 0b00011>; +defm VMSIF_M : VMSFS_MV_V<"vmsif.m", 0b010100, 0b00011>; // vmsof.m set-only-first mask bit -defm VMSOF_M : VALU_MV_VS2<"vmsof.m", 0b010100, 0b00010>; +defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>; // Vector Iota Instruction -defm VIOTA_M : VALU_MV_VS2<"viota.m", 0b010100, 0b10000>; +defm VIOTA_M : VMIOT_MV_V<"viota.m", 0b010100, 0b10000>; + } // Constraints = "@earlyclobber $vd", RVVConstraint = Iota // Vector Element Index Instruction let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { + +let vs2 = 0 in def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd), - (ins VMaskOp:$vm), "vid.v", "$vd$vm"> { - let vs2 = 0; -} + (ins VMaskOp:$vm), "vid.v", "$vd$vm">, + Sched<[WriteVMIdxV, ReadVMask]>; // Integer Scalar Move Instructions let vm = 1, RVVConstraint = NoConstraint in { def VMV_X_S : RVInstV<0b010000, 0b00000, OPMVV, (outs GPR:$vd), - (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">; + (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">, + Sched<[WriteVIMovVX, ReadVIMovVX]>; let Constraints = "$vd = $vd_wb" in def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VR:$vd_wb), - (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">; - + (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">, + Sched<[WriteVIMovXV, ReadVIMovXV, ReadVIMovXX]>; } + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 + } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1, RVVConstraint = NoConstraint in { // Floating-Point Scalar Move Instructions def VFMV_F_S : RVInstV<0b010000, 0b00000, OPFVV, (outs FPR32:$vd), - (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">; + (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">, + Sched<[WriteVFMovVF, ReadVFMovVF]>; let Constraints = "$vd = $vd_wb" in def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb), - (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">; + (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">, + Sched<[WriteVFMovFV, ReadVFMovFV, ReadVFMovFX]>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1 + } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { // Vector Slide Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { -defm VSLIDEUP_V : VALU_IV_X_I<"vslideup", 0b001110, uimm5>; -defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>; +defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, uimm5>; +defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp -defm VSLIDEDOWN_V : VALU_IV_X_I<"vslidedown", 0b001111, uimm5>; -defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>; +defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, uimm5>; +defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>; } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { -defm VFSLIDE1UP_V : VALU_FV_F<"vfslide1up", 0b001110>; +defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp -defm VFSLIDE1DOWN_V : VALU_FV_F<"vfslide1down", 0b001111>; +defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>; } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { // Vector Register Gather Instruction let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in { -defm VRGATHER_V : VALU_IV_V_X_I<"vrgather", 0b001100, uimm5>; -def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">; +defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100, uimm5>; +def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">, + Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV]>; } // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather // Vector Compress Instruction let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress in { -defm VCOMPRESS_V : VALU_MV_Mask<"vcompress", 0b010111>; +defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>; } // Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress let hasSideEffects = 0, mayLoad = 0, mayStore = 0, RVVConstraint = NoConstraint in { -foreach nf = [1, 2, 4, 8] in { - def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VR:$vd), - (ins VR:$vs2), "vmv" # nf # "r.v", - "$vd, $vs2"> { - let Uses = []; - let vm = 1; - } +foreach n = [1, 2, 4, 8] in { + def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs VR:$vd), + (ins VR:$vs2), "vmv" # n # "r.v", "$vd, $vs2">, + VMVRSched<n> { + let Uses = []; + let vm = 1; +} } } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 } // Predicates = [HasStdExtV] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td index ed26a5026114..14f59152ed42 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -231,6 +231,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>; def : ReadAdvance<ReadFClass32, 0>; def : ReadAdvance<ReadFClass64, 0>; +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; defm : UnsupportedSchedZba; defm : UnsupportedSchedZbb; defm : UnsupportedSchedZfh; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 314af180aca1..75ca6ca861be 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -219,6 +219,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>; def : ReadAdvance<ReadFClass32, 0>; def : ReadAdvance<ReadFClass64, 0>; +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; defm : UnsupportedSchedZba; defm : UnsupportedSchedZbb; defm : UnsupportedSchedZfh; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td index f31e4af46c1b..4971ca1d4e3e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -230,3 +230,4 @@ def : ReadAdvance<ReadFSqrt16, 0>; // Include the scheduler resources for other instruction extensions. include "RISCVScheduleB.td" +include "RISCVScheduleV.td" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td new file mode 100644 index 000000000000..43af1802d706 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -0,0 +1,820 @@ +//===-- RISCVScheduleV.td - RISCV Scheduling Definitions V -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +/// Define scheduler resources associated with def operands. + +// 7. Vector Loads and Stores +// 7.4. Vector Unit-Stride Instructions +def WriteVLDE8 : SchedWrite; +def WriteVLDE16 : SchedWrite; +def WriteVLDE32 : SchedWrite; +def WriteVLDE64 : SchedWrite; +def WriteVSTE8 : SchedWrite; +def WriteVSTE16 : SchedWrite; +def WriteVSTE32 : SchedWrite; +def WriteVSTE64 : SchedWrite; +// 7.4.1. Vector Unit-Strided Mask +def WriteVLDM : SchedWrite; +def WriteVSTM : SchedWrite; +// 7.5. Vector Strided Instructions +def WriteVLDS8 : SchedWrite; +def WriteVLDS16 : SchedWrite; +def WriteVLDS32 : SchedWrite; +def WriteVLDS64 : SchedWrite; +def WriteVSTS8 : SchedWrite; +def WriteVSTS16 : SchedWrite; +def WriteVSTS32 : SchedWrite; +def WriteVSTS64 : SchedWrite; +// 7.6. Vector Indexed Instructions +def WriteVLDUX8 : SchedWrite; +def WriteVLDUX16 : SchedWrite; +def WriteVLDUX32 : SchedWrite; +def WriteVLDUX64 : SchedWrite; +def WriteVLDOX8 : SchedWrite; +def WriteVLDOX16 : SchedWrite; +def WriteVLDOX32 : SchedWrite; +def WriteVLDOX64 : SchedWrite; +def WriteVSTUX8 : SchedWrite; +def WriteVSTUX16 : SchedWrite; +def WriteVSTUX32 : SchedWrite; +def WriteVSTUX64 : SchedWrite; +def WriteVSTOX8 : SchedWrite; +def WriteVSTOX16 : SchedWrite; +def WriteVSTOX32 : SchedWrite; +def WriteVSTOX64 : SchedWrite; +// 7.7. Vector Unit-stride Fault-Only-First Loads +def WriteVLDFF8 : SchedWrite; +def WriteVLDFF16 : SchedWrite; +def WriteVLDFF32 : SchedWrite; +def WriteVLDFF64 : SchedWrite; +// 7.9. Vector Whole Register Instructions +def WriteVLD1R8 : SchedWrite; +def WriteVLD1R16 : SchedWrite; +def WriteVLD1R32 : SchedWrite; +def WriteVLD1R64 : SchedWrite; +def WriteVLD2R8 : SchedWrite; +def WriteVLD2R16 : SchedWrite; +def WriteVLD2R32 : SchedWrite; +def WriteVLD2R64 : SchedWrite; +def WriteVLD4R8 : SchedWrite; +def WriteVLD4R16 : SchedWrite; +def WriteVLD4R32 : SchedWrite; +def WriteVLD4R64 : SchedWrite; +def WriteVLD8R8 : SchedWrite; +def WriteVLD8R16 : SchedWrite; +def WriteVLD8R32 : SchedWrite; +def WriteVLD8R64 : SchedWrite; +def WriteVST1R : SchedWrite; +def WriteVST2R : SchedWrite; +def WriteVST4R : SchedWrite; +def WriteVST8R : SchedWrite; + +// 11. Vector Integer Arithmetic Instructions +// 11.1. Vector Single-Width Integer Add and Subtract +// 11.5. Vector Bitwise Logical Instructions +def WriteVIALUV : SchedWrite; +def WriteVIALUX : SchedWrite; +def WriteVIALUI : SchedWrite; +// 11.2. Vector Widening Integer Add/Subtract +def WriteVIWALUV : SchedWrite; +def WriteVIWALUX : SchedWrite; +def WriteVIWALUI : SchedWrite; +// 11.3. Vector Integer Extension +def WriteVExtV : SchedWrite; +// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions +def WriteVICALUV : SchedWrite; +def WriteVICALUX : SchedWrite; +def WriteVICALUI : SchedWrite; +// 11.6. Vector Single-Width Bit Shift Instructions +def WriteVShiftV : SchedWrite; +def WriteVShiftX : SchedWrite; +def WriteVShiftI : SchedWrite; +// 11.7. Vector Narrowing Integer Right Shift Instructions +def WriteVNShiftV : SchedWrite; +def WriteVNShiftX : SchedWrite; +def WriteVNShiftI : SchedWrite; +// 11.8. Vector Integer Comparison Instructions +// 11.9. Vector Integer Min/Max Instructions +def WriteVICmpV : SchedWrite; +def WriteVICmpX : SchedWrite; +def WriteVICmpI : SchedWrite; +// 11.10. Vector Single-Width Integer Multiply Instructions +def WriteVIMulV : SchedWrite; +def WriteVIMulX : SchedWrite; +// 11.11. Vector Integer Divide Instructions +def WriteVIDivV : SchedWrite; +def WriteVIDivX : SchedWrite; +// 11.12. Vector Widening Integer Multiply Instructions +def WriteVIWMulV : SchedWrite; +def WriteVIWMulX : SchedWrite; +// 11.13. Vector Single-Width Integer Multiply-Add Instructions +def WriteVIMulAddV : SchedWrite; +def WriteVIMulAddX : SchedWrite; +// 11.14. Vector Widening Integer Multiply-Add Instructions +def WriteVIWMulAddV : SchedWrite; +def WriteVIWMulAddX : SchedWrite; +// 11.15. Vector Integer Merge Instructions +def WriteVIMergeV : SchedWrite; +def WriteVIMergeX : SchedWrite; +def WriteVIMergeI : SchedWrite; +// 11.16. Vector Integer Move Instructions +def WriteVIMovV : SchedWrite; +def WriteVIMovX : SchedWrite; +def WriteVIMovI : SchedWrite; + +// 12. Vector Fixed-Point Arithmetic Instructions +// 12.1. Vector Single-Width Saturating Add and Subtract +def WriteVSALUV : SchedWrite; +def WriteVSALUX : SchedWrite; +def WriteVSALUI : SchedWrite; +// 12.2. Vector Single-Width Averaging Add and Subtract +def WriteVAALUV : SchedWrite; +def WriteVAALUX : SchedWrite; +// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation +def WriteVSMulV : SchedWrite; +def WriteVSMulX : SchedWrite; +// 12.4. Vector Single-Width Scaling Shift Instructions +def WriteVSShiftV : SchedWrite; +def WriteVSShiftX : SchedWrite; +def WriteVSShiftI : SchedWrite; +// 12.5. Vector Narrowing Fixed-Point Clip Instructions +def WriteVNClipV : SchedWrite; +def WriteVNClipX : SchedWrite; +def WriteVNClipI : SchedWrite; + +// 13. Vector Floating-Point Instructions +// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions +def WriteVFALUV : SchedWrite; +def WriteVFALUF : SchedWrite; +// 13.3. Vector Widening Floating-Point Add/Subtract Instructions +def WriteVFWALUV : SchedWrite; +def WriteVFWALUF : SchedWrite; +// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions +def WriteVFMulV : SchedWrite; +def WriteVFMulF : SchedWrite; +def WriteVFDivV : SchedWrite; +def WriteVFDivF : SchedWrite; +// 13.5. Vector Widening Floating-Point Multiply +def WriteVFWMulV : SchedWrite; +def WriteVFWMulF : SchedWrite; +// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +def WriteVFMulAddV : SchedWrite; +def WriteVFMulAddF : SchedWrite; +// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +def WriteVFWMulAddV : SchedWrite; +def WriteVFWMulAddF : SchedWrite; +// 13.8. Vector Floating-Point Square-Root Instruction +def WriteVFSqrtV : SchedWrite; +// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction +// 13.10. Vector Floating-Point Reciprocal Estimate Instruction +def WriteVFRecpV : SchedWrite; +// 13.11. Vector Floating-Point MIN/MAX Instructions +// 13.13. Vector Floating-Point Compare Instructions +def WriteVFCmpV : SchedWrite; +def WriteVFCmpF : SchedWrite; +// 13.12. Vector Floating-Point Sign-Injection Instructions +def WriteVFSgnjV : SchedWrite; +def WriteVFSgnjF : SchedWrite; +// 13.14. Vector Floating-Point Classify Instruction +def WriteVFClassV : SchedWrite; +// 13.15. Vector Floating-Point Merge Instruction +def WriteVFMergeV : SchedWrite; +// 13.16. Vector Floating-Point Move Instruction +def WriteVFMovV : SchedWrite; +// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions +def WriteVFCvtIToFV : SchedWrite; +def WriteVFCvtFToIV : SchedWrite; +def WriteVFCvtFToFV : SchedWrite; +// 13.18. Widening Floating-Point/Integer Type-Convert Instructions +def WriteVFWCvtIToFV : SchedWrite; +def WriteVFWCvtFToIV : SchedWrite; +def WriteVFWCvtFToFV : SchedWrite; +// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions +def WriteVFNCvtIToFV : SchedWrite; +def WriteVFNCvtFToIV : SchedWrite; +def WriteVFNCvtFToFV : SchedWrite; + +// 14. Vector Reduction Operations +// 14.1. Vector Single-Width Integer Reduction Instructions +def WriteVIRedV : SchedWrite; +// 14.2. Vector Widening Integer Reduction Instructions +def WriteVIWRedV : SchedWrite; +// 14.3. Vector Single-Width Floating-Point Reduction Instructions +def WriteVFRedV : SchedWrite; +def WriteVFRedOV : SchedWrite; +// 14.4. Vector Widening Floating-Point Reduction Instructions +def WriteVFWRedV : SchedWrite; +def WriteVFWRedOV : SchedWrite; + +// 15. Vector Mask Instructions +// 15.1. Vector Mask-Register Logical Instructions +def WriteVMALUV : SchedWrite; +// 15.2. Vector Mask Population Count +def WriteVMPopV : SchedWrite; +// 15.3. Vector Find-First-Set Mask Bit +def WriteVMFFSV : SchedWrite; +// 15.4. Vector Set-Before-First Mask Bit +// 15.5. Vector Set-Including-First Mask Bit +// 15.6. Vector Set-only-First Mask Bit +def WriteVMSFSV : SchedWrite; +// 15.8. Vector Iota Instruction +def WriteVMIotV : SchedWrite; +// 15.9. Vector Element Index Instruction +def WriteVMIdxV : SchedWrite; + +// 16. Vector Permutation Instructions +// 16.1. Integer Scalar Move Instructions +def WriteVIMovVX : SchedWrite; +def WriteVIMovXV : SchedWrite; +// 16.2. Floating-Point Scalar Move Instructions +def WriteVFMovVF : SchedWrite; +def WriteVFMovFV : SchedWrite; +// 16.3. Vector Slide Instructions +def WriteVISlideX : SchedWrite; +def WriteVISlideI : SchedWrite; +def WriteVISlide1X : SchedWrite; +def WriteVFSlide1F : SchedWrite; +// 16.4. Vector Register Gather Instructions +def WriteVGatherV : SchedWrite; +def WriteVGatherX : SchedWrite; +def WriteVGatherI : SchedWrite; +// 16.5. Vector Compress Instruction +def WriteVCompressV : SchedWrite; +// 16.6. Whole Vector Register Move +def WriteVMov1V : SchedWrite; +def WriteVMov2V : SchedWrite; +def WriteVMov4V : SchedWrite; +def WriteVMov8V : SchedWrite; + +//===----------------------------------------------------------------------===// +/// Define scheduler resources associated with use operands. + +// 7. Vector Loads and Stores +def ReadVLDX : SchedRead; +def ReadVSTX : SchedRead; +// 7.4. Vector Unit-Stride Instructions +def ReadVSTE8V : SchedRead; +def ReadVSTE16V : SchedRead; +def ReadVSTE32V : SchedRead; +def ReadVSTE64V : SchedRead; +// 7.4.1. Vector Unit-Strided Mask +def ReadVSTM : SchedRead; +// 7.5. Vector Strided Instructions +def ReadVLDSX : SchedRead; +def ReadVSTSX : SchedRead; +def ReadVSTS8V : SchedRead; +def ReadVSTS16V : SchedRead; +def ReadVSTS32V : SchedRead; +def ReadVSTS64V : SchedRead; +// 7.6. Vector Indexed Instructions +def ReadVLDUXV : SchedRead; +def ReadVLDOXV : SchedRead; +def ReadVSTUX8 : SchedRead; +def ReadVSTUX16 : SchedRead; +def ReadVSTUX32 : SchedRead; +def ReadVSTUX64 : SchedRead; +def ReadVSTUXV : SchedRead; +def ReadVSTUX8V : SchedRead; +def ReadVSTUX16V : SchedRead; +def ReadVSTUX32V : SchedRead; +def ReadVSTUX64V : SchedRead; +def ReadVSTOX8 : SchedRead; +def ReadVSTOX16 : SchedRead; +def ReadVSTOX32 : SchedRead; +def ReadVSTOX64 : SchedRead; +def ReadVSTOXV : SchedRead; +def ReadVSTOX8V : SchedRead; +def ReadVSTOX16V : SchedRead; +def ReadVSTOX32V : SchedRead; +def ReadVSTOX64V : SchedRead; +// 7.9. Vector Whole Register Instructions +def ReadVST1R : SchedRead; +def ReadVST2R : SchedRead; +def ReadVST4R : SchedRead; +def ReadVST8R : SchedRead; + +// 11. Vector Integer Arithmetic Instructions +// 11.1. Vector Single-Width Integer Add and Subtract +// 11.5. Vector Bitwise Logical Instructions +def ReadVIALUV : SchedRead; +def ReadVIALUX : SchedRead; +// 11.2. Vector Widening Integer Add/Subtract +def ReadVIWALUV : SchedRead; +def ReadVIWALUX : SchedRead; +// 11.3. Vector Integer Extension +def ReadVExtV : SchedRead; +// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions +def ReadVIALUCV : SchedRead; +def ReadVIALUCX : SchedRead; +// 11.6. Vector Single-Width Bit Shift Instructions +def ReadVShiftV : SchedRead; +def ReadVShiftX : SchedRead; +// 11.7. Vector Narrowing Integer Right Shift Instructions +def ReadVNShiftV : SchedRead; +def ReadVNShiftX : SchedRead; +// 11.8. Vector Integer Comparison Instructions +// 11.9. Vector Integer Min/Max Instructions +def ReadVICmpV : SchedRead; +def ReadVICmpX : SchedRead; +// 11.10. Vector Single-Width Integer Multiply Instructions +def ReadVIMulV : SchedRead; +def ReadVIMulX : SchedRead; +// 11.11. Vector Integer Divide Instructions +def ReadVIDivV : SchedRead; +def ReadVIDivX : SchedRead; +// 11.12. Vector Widening Integer Multiply Instructions +def ReadVIWMulV : SchedRead; +def ReadVIWMulX : SchedRead; +// 11.13. Vector Single-Width Integer Multiply-Add Instructions +def ReadVIMulAddV : SchedRead; +def ReadVIMulAddX : SchedRead; +// 11.14. Vector Widening Integer Multiply-Add Instructions +def ReadVIWMulAddV : SchedRead; +def ReadVIWMulAddX : SchedRead; +// 11.15. Vector Integer Merge Instructions +def ReadVIMergeV : SchedRead; +def ReadVIMergeX : SchedRead; +// 11.16. Vector Integer Move Instructions +def ReadVIMovV : SchedRead; +def ReadVIMovX : SchedRead; + +// 12. Vector Fixed-Point Arithmetic Instructions +// 12.1. Vector Single-Width Saturating Add and Subtract +def ReadVSALUV : SchedRead; +def ReadVSALUX : SchedRead; +// 12.2. Vector Single-Width Averaging Add and Subtract +def ReadVAALUV : SchedRead; +def ReadVAALUX : SchedRead; +// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation +def ReadVSMulV : SchedRead; +def ReadVSMulX : SchedRead; +// 12.4. Vector Single-Width Scaling Shift Instructions +def ReadVSShiftV : SchedRead; +def ReadVSShiftX : SchedRead; +// 12.5. Vector Narrowing Fixed-Point Clip Instructions +def ReadVNClipV : SchedRead; +def ReadVNClipX : SchedRead; + +// 13. Vector Floating-Point Instructions +// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions +def ReadVFALUV : SchedRead; +def ReadVFALUF : SchedRead; +// 13.3. Vector Widening Floating-Point Add/Subtract Instructions +def ReadVFWALUV : SchedRead; +def ReadVFWALUF : SchedRead; +// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions +def ReadVFMulV : SchedRead; +def ReadVFMulF : SchedRead; +def ReadVFDivV : SchedRead; +def ReadVFDivF : SchedRead; +// 13.5. Vector Widening Floating-Point Multiply +def ReadVFWMulV : SchedRead; +def ReadVFWMulF : SchedRead; +// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +def ReadVFMulAddV : SchedRead; +def ReadVFMulAddF : SchedRead; +// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +def ReadVFWMulAddV : SchedRead; +def ReadVFWMulAddF : SchedRead; +// 13.8. Vector Floating-Point Square-Root Instruction +def ReadVFSqrtV : SchedRead; +// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction +// 13.10. Vector Floating-Point Reciprocal Estimate Instruction +def ReadVFRecpV : SchedRead; +// 13.11. Vector Floating-Point MIN/MAX Instructions +// 13.13. Vector Floating-Point Compare Instructions +def ReadVFCmpV : SchedRead; +def ReadVFCmpF : SchedRead; +// 13.12. Vector Floating-Point Sign-Injection Instructions +def ReadVFSgnjV : SchedRead; +def ReadVFSgnjF : SchedRead; +// 13.14. Vector Floating-Point Classify Instruction +def ReadVFClassV : SchedRead; +// 13.15. Vector Floating-Point Merge Instruction +def ReadVFMergeV : SchedRead; +def ReadVFMergeF : SchedRead; +// 13.16. Vector Floating-Point Move Instruction +def ReadVFMovF : SchedRead; +// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions +def ReadVFCvtIToFV : SchedRead; +def ReadVFCvtFToIV : SchedRead; +// 13.18. Widening Floating-Point/Integer Type-Convert Instructions +def ReadVFWCvtIToFV : SchedRead; +def ReadVFWCvtFToIV : SchedRead; +def ReadVFWCvtFToFV : SchedRead; +// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions +def ReadVFNCvtIToFV : SchedRead; +def ReadVFNCvtFToIV : SchedRead; +def ReadVFNCvtFToFV : SchedRead; + +// 14. Vector Reduction Operations +// 14.1. Vector Single-Width Integer Reduction Instructions +def ReadVIRedV : SchedRead; +def ReadVIRedV0 : SchedRead; +// 14.2. Vector Widening Integer Reduction Instructions +def ReadVIWRedV : SchedRead; +def ReadVIWRedV0 : SchedRead; +// 14.3. Vector Single-Width Floating-Point Reduction Instructions +def ReadVFRedV : SchedRead; +def ReadVFRedV0 : SchedRead; +def ReadVFRedOV : SchedRead; +def ReadVFRedOV0 : SchedRead; +// 14.4. Vector Widening Floating-Point Reduction Instructions +def ReadVFWRedV : SchedRead; +def ReadVFWRedV0 : SchedRead; +def ReadVFWRedOV : SchedRead; +def ReadVFWRedOV0 : SchedRead; + +// 15. Vector Mask Instructions +// 15.1. Vector Mask-Register Logical Instructions +def ReadVMALUV : SchedRead; +// 15.2. Vector Mask Population Count +def ReadVMPopV : SchedRead; +// 15.3. Vector Find-First-Set Mask Bit +def ReadVMFFSV : SchedRead; +// 15.4. Vector Set-Before-First Mask Bit +// 15.5. Vector Set-Including-First Mask Bit +// 15.6. Vector Set-only-First Mask Bit +def ReadVMSFSV : SchedRead; +// 15.8. Vector Iota Instruction +def ReadVMIotV : SchedRead; + +// 16. Vector Permutation Instructions +// 16.1. Integer Scalar Move Instructions +def ReadVIMovVX : SchedRead; +def ReadVIMovXV : SchedRead; +def ReadVIMovXX : SchedRead; +// 16.2. Floating-Point Scalar Move Instructions +def ReadVFMovVF : SchedRead; +def ReadVFMovFV : SchedRead; +def ReadVFMovFX : SchedRead; +// 16.3. Vector Slide Instructions +def ReadVISlideV : SchedRead; +def ReadVISlideX : SchedRead; +def ReadVFSlideV : SchedRead; +def ReadVFSlideF : SchedRead; +// 16.4. Vector Register Gather Instructions +def ReadVGatherV : SchedRead; +def ReadVGatherX : SchedRead; +// 16.5. Vector Compress Instruction +def ReadVCompressV : SchedRead; +// 16.6. Whole Vector Register Move +def ReadVMov1V : SchedRead; +def ReadVMov2V : SchedRead; +def ReadVMov4V : SchedRead; +def ReadVMov8V : SchedRead; + +// Others +def ReadVMask : SchedRead; + +//===----------------------------------------------------------------------===// +/// Define default scheduler resources for V. + +multiclass UnsupportedSchedV { +let Unsupported = true in { + +// 7. Vector Loads and Stores +def : WriteRes<WriteVLDE8, []>; +def : WriteRes<WriteVLDE16, []>; +def : WriteRes<WriteVLDE32, []>; +def : WriteRes<WriteVLDE64, []>; +def : WriteRes<WriteVSTE8, []>; +def : WriteRes<WriteVSTE16, []>; +def : WriteRes<WriteVSTE32, []>; +def : WriteRes<WriteVSTE64, []>; +def : WriteRes<WriteVLDM, []>; +def : WriteRes<WriteVSTM, []>; +def : WriteRes<WriteVLDS8, []>; +def : WriteRes<WriteVLDS16, []>; +def : WriteRes<WriteVLDS32, []>; +def : WriteRes<WriteVLDS64, []>; +def : WriteRes<WriteVSTS8, []>; +def : WriteRes<WriteVSTS16, []>; +def : WriteRes<WriteVSTS32, []>; +def : WriteRes<WriteVSTS64, []>; +def : WriteRes<WriteVLDUX8, []>; +def : WriteRes<WriteVLDUX16, []>; +def : WriteRes<WriteVLDUX32, []>; +def : WriteRes<WriteVLDUX64, []>; +def : WriteRes<WriteVLDOX8, []>; +def : WriteRes<WriteVLDOX16, []>; +def : WriteRes<WriteVLDOX32, []>; +def : WriteRes<WriteVLDOX64, []>; +def : WriteRes<WriteVSTUX8, []>; +def : WriteRes<WriteVSTUX16, []>; +def : WriteRes<WriteVSTUX32, []>; +def : WriteRes<WriteVSTUX64, []>; +def : WriteRes<WriteVSTOX8, []>; +def : WriteRes<WriteVSTOX16, []>; +def : WriteRes<WriteVSTOX32, []>; +def : WriteRes<WriteVSTOX64, []>; +def : WriteRes<WriteVLDFF8, []>; +def : WriteRes<WriteVLDFF16, []>; +def : WriteRes<WriteVLDFF32, []>; +def : WriteRes<WriteVLDFF64, []>; +def : WriteRes<WriteVLD1R8, []>; +def : WriteRes<WriteVLD1R16, []>; +def : WriteRes<WriteVLD1R32, []>; +def : WriteRes<WriteVLD1R64, []>; +def : WriteRes<WriteVLD2R8, []>; +def : WriteRes<WriteVLD2R16, []>; +def : WriteRes<WriteVLD2R32, []>; +def : WriteRes<WriteVLD2R64, []>; +def : WriteRes<WriteVLD4R8, []>; +def : WriteRes<WriteVLD4R16, []>; +def : WriteRes<WriteVLD4R32, []>; +def : WriteRes<WriteVLD4R64, []>; +def : WriteRes<WriteVLD8R8, []>; +def : WriteRes<WriteVLD8R16, []>; +def : WriteRes<WriteVLD8R32, []>; +def : WriteRes<WriteVLD8R64, []>; +def : WriteRes<WriteVST1R, []>; +def : WriteRes<WriteVST2R, []>; +def : WriteRes<WriteVST4R, []>; +def : WriteRes<WriteVST8R, []>; + +// 12. Vector Integer Arithmetic Instructions +def : WriteRes<WriteVIALUV, []>; +def : WriteRes<WriteVIALUX, []>; +def : WriteRes<WriteVIALUI, []>; +def : WriteRes<WriteVIWALUV, []>; +def : WriteRes<WriteVIWALUX, []>; +def : WriteRes<WriteVIWALUI, []>; +def : WriteRes<WriteVExtV, []>; +def : WriteRes<WriteVICALUV, []>; +def : WriteRes<WriteVICALUX, []>; +def : WriteRes<WriteVICALUI, []>; +def : WriteRes<WriteVShiftV, []>; +def : WriteRes<WriteVShiftX, []>; +def : WriteRes<WriteVShiftI, []>; +def : WriteRes<WriteVNShiftV, []>; +def : WriteRes<WriteVNShiftX, []>; +def : WriteRes<WriteVNShiftI, []>; +def : WriteRes<WriteVICmpV, []>; +def : WriteRes<WriteVICmpX, []>; +def : WriteRes<WriteVICmpI, []>; +def : WriteRes<WriteVIMulV, []>; +def : WriteRes<WriteVIMulX, []>; +def : WriteRes<WriteVIDivV, []>; +def : WriteRes<WriteVIDivX, []>; +def : WriteRes<WriteVIWMulV, []>; +def : WriteRes<WriteVIWMulX, []>; +def : WriteRes<WriteVIMulAddV, []>; +def : WriteRes<WriteVIMulAddX, []>; +def : WriteRes<WriteVIWMulAddV, []>; +def : WriteRes<WriteVIWMulAddX, []>; +def : WriteRes<WriteVIMergeV, []>; +def : WriteRes<WriteVIMergeX, []>; +def : WriteRes<WriteVIMergeI, []>; +def : WriteRes<WriteVIMovV, []>; +def : WriteRes<WriteVIMovX, []>; +def : WriteRes<WriteVIMovI, []>; + +// 13. Vector Fixed-Point Arithmetic Instructions +def : WriteRes<WriteVSALUV, []>; +def : WriteRes<WriteVSALUX, []>; +def : WriteRes<WriteVSALUI, []>; +def : WriteRes<WriteVAALUV, []>; +def : WriteRes<WriteVAALUX, []>; +def : WriteRes<WriteVSMulV, []>; +def : WriteRes<WriteVSMulX, []>; +def : WriteRes<WriteVSShiftV, []>; +def : WriteRes<WriteVSShiftX, []>; +def : WriteRes<WriteVSShiftI, []>; +def : WriteRes<WriteVNClipV, []>; +def : WriteRes<WriteVNClipX, []>; +def : WriteRes<WriteVNClipI, []>; + +// 14. Vector Floating-Point Instructions +def : WriteRes<WriteVFALUV, []>; +def : WriteRes<WriteVFALUF, []>; +def : WriteRes<WriteVFWALUV, []>; +def : WriteRes<WriteVFWALUF, []>; +def : WriteRes<WriteVFMulV, []>; +def : WriteRes<WriteVFMulF, []>; +def : WriteRes<WriteVFDivV, []>; +def : WriteRes<WriteVFDivF, []>; +def : WriteRes<WriteVFWMulV, []>; +def : WriteRes<WriteVFWMulF, []>; +def : WriteRes<WriteVFMulAddV, []>; +def : WriteRes<WriteVFMulAddF, []>; +def : WriteRes<WriteVFWMulAddV, []>; +def : WriteRes<WriteVFWMulAddF, []>; +def : WriteRes<WriteVFSqrtV, []>; +def : WriteRes<WriteVFRecpV, []>; +def : WriteRes<WriteVFCmpV, []>; +def : WriteRes<WriteVFCmpF, []>; +def : WriteRes<WriteVFSgnjV, []>; +def : WriteRes<WriteVFSgnjF, []>; +def : WriteRes<WriteVFClassV, []>; +def : WriteRes<WriteVFMergeV, []>; +def : WriteRes<WriteVFMovV, []>; +def : WriteRes<WriteVFCvtIToFV, []>; +def : WriteRes<WriteVFCvtFToIV, []>; +def : WriteRes<WriteVFCvtFToFV, []>; +def : WriteRes<WriteVFWCvtIToFV, []>; +def : WriteRes<WriteVFWCvtFToIV, []>; +def : WriteRes<WriteVFWCvtFToFV, []>; +def : WriteRes<WriteVFNCvtIToFV, []>; +def : WriteRes<WriteVFNCvtFToIV, []>; +def : WriteRes<WriteVFNCvtFToFV, []>; + +// 15. Vector Reduction Operations +def : WriteRes<WriteVIRedV, []>; +def : WriteRes<WriteVIWRedV, []>; +def : WriteRes<WriteVFRedV, []>; +def : WriteRes<WriteVFRedOV, []>; +def : WriteRes<WriteVFWRedV, []>; +def : WriteRes<WriteVFWRedOV, []>; + +// 16. Vector Mask Instructions +def : WriteRes<WriteVMALUV, []>; +def : WriteRes<WriteVMPopV, []>; +def : WriteRes<WriteVMFFSV, []>; +def : WriteRes<WriteVMSFSV, []>; +def : WriteRes<WriteVMIotV, []>; +def : WriteRes<WriteVMIdxV, []>; + +// 17. Vector Permutation Instructions +def : WriteRes<WriteVIMovVX, []>; +def : WriteRes<WriteVIMovXV, []>; +def : WriteRes<WriteVFMovVF, []>; +def : WriteRes<WriteVFMovFV, []>; +def : WriteRes<WriteVISlideX, []>; +def : WriteRes<WriteVISlideI, []>; +def : WriteRes<WriteVISlide1X, []>; +def : WriteRes<WriteVFSlide1F, []>; +def : WriteRes<WriteVGatherV, []>; +def : WriteRes<WriteVGatherX, []>; +def : WriteRes<WriteVGatherI, []>; +def : WriteRes<WriteVCompressV, []>; +def : WriteRes<WriteVMov1V, []>; +def : WriteRes<WriteVMov2V, []>; +def : WriteRes<WriteVMov4V, []>; +def : WriteRes<WriteVMov8V, []>; + +// 7. Vector Loads and Stores +def : ReadAdvance<ReadVLDX, 0>; +def : ReadAdvance<ReadVSTX, 0>; +def : ReadAdvance<ReadVSTE8V, 0>; +def : ReadAdvance<ReadVSTE16V, 0>; +def : ReadAdvance<ReadVSTE32V, 0>; +def : ReadAdvance<ReadVSTE64V, 0>; +def : ReadAdvance<ReadVSTM, 0>; +def : ReadAdvance<ReadVLDSX, 0>; +def : ReadAdvance<ReadVSTSX, 0>; +def : ReadAdvance<ReadVSTS8V, 0>; +def : ReadAdvance<ReadVSTS16V, 0>; +def : ReadAdvance<ReadVSTS32V, 0>; +def : ReadAdvance<ReadVSTS64V, 0>; +def : ReadAdvance<ReadVLDUXV, 0>; +def : ReadAdvance<ReadVLDOXV, 0>; +def : ReadAdvance<ReadVSTUXV, 0>; +def : ReadAdvance<ReadVSTUX8, 0>; +def : ReadAdvance<ReadVSTUX16, 0>; +def : ReadAdvance<ReadVSTUX32, 0>; +def : ReadAdvance<ReadVSTUX64, 0>; +def : ReadAdvance<ReadVSTUX8V, 0>; +def : ReadAdvance<ReadVSTUX16V, 0>; +def : ReadAdvance<ReadVSTUX32V, 0>; +def : ReadAdvance<ReadVSTUX64V, 0>; +def : ReadAdvance<ReadVSTOX8, 0>; +def : ReadAdvance<ReadVSTOX16, 0>; +def : ReadAdvance<ReadVSTOX32, 0>; +def : ReadAdvance<ReadVSTOX64, 0>; +def : ReadAdvance<ReadVSTOXV, 0>; +def : ReadAdvance<ReadVSTOX8V, 0>; +def : ReadAdvance<ReadVSTOX16V, 0>; +def : ReadAdvance<ReadVSTOX32V, 0>; +def : ReadAdvance<ReadVSTOX64V, 0>; +def : ReadAdvance<ReadVST1R, 0>; +def : ReadAdvance<ReadVST2R, 0>; +def : ReadAdvance<ReadVST4R, 0>; +def : ReadAdvance<ReadVST8R, 0>; + +// 12. Vector Integer Arithmetic Instructions +def : ReadAdvance<ReadVIALUV, 0>; +def : ReadAdvance<ReadVIALUX, 0>; +def : ReadAdvance<ReadVIWALUV, 0>; +def : ReadAdvance<ReadVIWALUX, 0>; +def : ReadAdvance<ReadVExtV, 0>; +def : ReadAdvance<ReadVIALUCV, 0>; +def : ReadAdvance<ReadVIALUCX, 0>; +def : ReadAdvance<ReadVShiftV, 0>; +def : ReadAdvance<ReadVShiftX, 0>; +def : ReadAdvance<ReadVNShiftV, 0>; +def : ReadAdvance<ReadVNShiftX, 0>; +def : ReadAdvance<ReadVICmpV, 0>; +def : ReadAdvance<ReadVICmpX, 0>; +def : ReadAdvance<ReadVIMulV, 0>; +def : ReadAdvance<ReadVIMulX, 0>; +def : ReadAdvance<ReadVIDivV, 0>; +def : ReadAdvance<ReadVIDivX, 0>; +def : ReadAdvance<ReadVIWMulV, 0>; +def : ReadAdvance<ReadVIWMulX, 0>; +def : ReadAdvance<ReadVIMulAddV, 0>; +def : ReadAdvance<ReadVIMulAddX, 0>; +def : ReadAdvance<ReadVIWMulAddV, 0>; +def : ReadAdvance<ReadVIWMulAddX, 0>; +def : ReadAdvance<ReadVIMergeV, 0>; +def : ReadAdvance<ReadVIMergeX, 0>; +def : ReadAdvance<ReadVIMovV, 0>; +def : ReadAdvance<ReadVIMovX, 0>; + +// 13. Vector Fixed-Point Arithmetic Instructions +def : ReadAdvance<ReadVSALUV, 0>; +def : ReadAdvance<ReadVSALUX, 0>; +def : ReadAdvance<ReadVAALUV, 0>; +def : ReadAdvance<ReadVAALUX, 0>; +def : ReadAdvance<ReadVSMulV, 0>; +def : ReadAdvance<ReadVSMulX, 0>; +def : ReadAdvance<ReadVSShiftV, 0>; +def : ReadAdvance<ReadVSShiftX, 0>; +def : ReadAdvance<ReadVNClipV, 0>; +def : ReadAdvance<ReadVNClipX, 0>; + +// 14. Vector Floating-Point Instructions +def : ReadAdvance<ReadVFALUV, 0>; +def : ReadAdvance<ReadVFALUF, 0>; +def : ReadAdvance<ReadVFWALUV, 0>; +def : ReadAdvance<ReadVFWALUF, 0>; +def : ReadAdvance<ReadVFMulV, 0>; +def : ReadAdvance<ReadVFMulF, 0>; +def : ReadAdvance<ReadVFDivV, 0>; +def : ReadAdvance<ReadVFDivF, 0>; +def : ReadAdvance<ReadVFWMulV, 0>; +def : ReadAdvance<ReadVFWMulF, 0>; +def : ReadAdvance<ReadVFMulAddV, 0>; +def : ReadAdvance<ReadVFMulAddF, 0>; +def : ReadAdvance<ReadVFWMulAddV, 0>; +def : ReadAdvance<ReadVFWMulAddF, 0>; +def : ReadAdvance<ReadVFSqrtV, 0>; +def : ReadAdvance<ReadVFRecpV, 0>; +def : ReadAdvance<ReadVFCmpV, 0>; +def : ReadAdvance<ReadVFCmpF, 0>; +def : ReadAdvance<ReadVFSgnjV, 0>; +def : ReadAdvance<ReadVFSgnjF, 0>; +def : ReadAdvance<ReadVFClassV, 0>; +def : ReadAdvance<ReadVFMergeV, 0>; +def : ReadAdvance<ReadVFMergeF, 0>; +def : ReadAdvance<ReadVFMovF, 0>; +def : ReadAdvance<ReadVFCvtIToFV, 0>; +def : ReadAdvance<ReadVFCvtFToIV, 0>; +def : ReadAdvance<ReadVFWCvtIToFV, 0>; +def : ReadAdvance<ReadVFWCvtFToIV, 0>; +def : ReadAdvance<ReadVFWCvtFToFV, 0>; +def : ReadAdvance<ReadVFNCvtIToFV, 0>; +def : ReadAdvance<ReadVFNCvtFToIV, 0>; +def : ReadAdvance<ReadVFNCvtFToFV, 0>; + +// 15. Vector Reduction Operations +def : ReadAdvance<ReadVIRedV, 0>; +def : ReadAdvance<ReadVIRedV0, 0>; +def : ReadAdvance<ReadVIWRedV, 0>; +def : ReadAdvance<ReadVIWRedV0, 0>; +def : ReadAdvance<ReadVFRedV, 0>; +def : ReadAdvance<ReadVFRedV0, 0>; +def : ReadAdvance<ReadVFRedOV, 0>; +def : ReadAdvance<ReadVFRedOV0, 0>; +def : ReadAdvance<ReadVFWRedV, 0>; +def : ReadAdvance<ReadVFWRedV0, 0>; +def : ReadAdvance<ReadVFWRedOV, 0>; +def : ReadAdvance<ReadVFWRedOV0, 0>; + +// 16. Vector Mask Instructions +def : ReadAdvance<ReadVMALUV, 0>; +def : ReadAdvance<ReadVMPopV, 0>; +def : ReadAdvance<ReadVMFFSV, 0>; +def : ReadAdvance<ReadVMSFSV, 0>; +def : ReadAdvance<ReadVMIotV, 0>; + +// 17. Vector Permutation Instructions +def : ReadAdvance<ReadVIMovVX, 0>; +def : ReadAdvance<ReadVIMovXV, 0>; +def : ReadAdvance<ReadVIMovXX, 0>; +def : ReadAdvance<ReadVFMovVF, 0>; +def : ReadAdvance<ReadVFMovFV, 0>; +def : ReadAdvance<ReadVFMovFX, 0>; +def : ReadAdvance<ReadVISlideV, 0>; +def : ReadAdvance<ReadVISlideX, 0>; +def : ReadAdvance<ReadVFSlideV, 0>; +def : ReadAdvance<ReadVFSlideF, 0>; +def : ReadAdvance<ReadVGatherV, 0>; +def : ReadAdvance<ReadVGatherX, 0>; +def : ReadAdvance<ReadVCompressV, 0>; +def : ReadAdvance<ReadVMov1V, 0>; +def : ReadAdvance<ReadVMov2V, 0>; +def : ReadAdvance<ReadVMov4V, 0>; +def : ReadAdvance<ReadVMov8V, 0>; + +// Others +def : ReadAdvance<ReadVMask, 0>; + +} // Unsupported +} // UnsupportedSchedV diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index 3a64b3460030..a69850896436 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6704,17 +6704,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { auto *MemIntr = cast<MemIntrinsicSDNode>(Op); SDValue Ptr = MemIntr->getBasePtr(); + // The source constant may be larger than the subvector broadcast, + // ensure we extract the correct subvector constants. if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) { Type *CstTy = Cst->getType(); unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); - if (!CstTy->isVectorTy() || (SizeInBits % CstSizeInBits) != 0) + unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits(); + if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 || + (SizeInBits % SubVecSizeInBits) != 0) return false; - unsigned SubEltSizeInBits = CstTy->getScalarSizeInBits(); - unsigned NumSubElts = CstSizeInBits / SubEltSizeInBits; - unsigned NumSubVecs = SizeInBits / CstSizeInBits; + unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); + unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits; + unsigned NumSubVecs = SizeInBits / SubVecSizeInBits; APInt UndefSubElts(NumSubElts, 0); SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs, - APInt(SubEltSizeInBits, 0)); + APInt(CstEltSizeInBits, 0)); for (unsigned i = 0; i != NumSubElts; ++i) { if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i], UndefSubElts, i)) diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td index e83e1e74ff52..ba00e7da81f9 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -708,6 +708,19 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, mnemonic, "{$src2, $src1|$src1, $src2}", pattern>, Sched<[sched.Folded, sched.ReadAfterFold]>; +// BinOpRM - Instructions like "adc reg, reg, [mem]". +// There is an implicit register read at the end of the operand sequence. +class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + dag outlist, X86FoldableSchedWrite sched, list<dag> pattern> + : ITy<opcode, MRMSrcMem, typeinfo, outlist, + (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", pattern>, + Sched<[sched.Folded, sched.ReadAfterFold, + // base, scale, index, offset, segment. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // implicit register read. + sched.ReadAfterFold]>; + // BinOpRM_F - Instructions like "cmp reg, [mem]". class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> @@ -725,7 +738,7 @@ class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, // BinOpRM_RFF - Instructions like "adc reg, reg, [mem]". class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> - : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC, + : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC, [(set typeinfo.RegClass:$dst, EFLAGS, (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2), EFLAGS))]>; @@ -805,7 +818,11 @@ class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpMR<opcode, mnemonic, typeinfo, [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst), - (implicit EFLAGS)]>, Sched<[WriteALURMW]>; + (implicit EFLAGS)]>, Sched<[WriteALURMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, + WriteALU.ReadAfterFold]>; // reg // BinOpMR_RMW_FF - Instructions like "adc [mem], reg". class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -813,7 +830,12 @@ class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpMR<opcode, mnemonic, typeinfo, [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS), addr:$dst), - (implicit EFLAGS)]>, Sched<[WriteADCRMW]>; + (implicit EFLAGS)]>, Sched<[WriteADCRMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, + WriteALU.ReadAfterFold, // reg + WriteALU.ReadAfterFold]>; // EFLAGS // BinOpMR_F - Instructions like "cmp [mem], reg". class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp index 762317425026..91b16ec66ee3 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/ValueHandle.h" @@ -250,10 +251,12 @@ Value *AA::getWithType(Value &V, Type &Ty) { return Constant::getNullValue(&Ty); if (C->getType()->isPointerTy() && Ty.isPointerTy()) return ConstantExpr::getPointerCast(C, &Ty); - if (C->getType()->isIntegerTy() && Ty.isIntegerTy()) - return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true); - if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy()) - return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true); + if (C->getType()->getPrimitiveSizeInBits() >= Ty.getPrimitiveSizeInBits()) { + if (C->getType()->isIntegerTy() && Ty.isIntegerTy()) + return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true); + if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy()) + return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true); + } } return nullptr; } @@ -1023,7 +1026,7 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred, while (!Worklist.empty()) { const Use *U = Worklist.pop_back_val(); - if (!Visited.insert(U).second) + if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second) continue; LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in " << *U->getUser() << "\n"); @@ -1925,49 +1928,85 @@ void Attributor::createShallowWrapper(Function &F) { NumFnShallowWrappersCreated++; } +bool Attributor::isInternalizable(Function &F) { + if (F.isDeclaration() || F.hasLocalLinkage() || + GlobalValue::isInterposableLinkage(F.getLinkage())) + return false; + return true; +} + Function *Attributor::internalizeFunction(Function &F, bool Force) { if (!AllowDeepWrapper && !Force) return nullptr; - if (F.isDeclaration() || F.hasLocalLinkage() || - GlobalValue::isInterposableLinkage(F.getLinkage())) + if (!isInternalizable(F)) return nullptr; - Module &M = *F.getParent(); - FunctionType *FnTy = F.getFunctionType(); - - // create a copy of the current function - Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), - F.getName() + ".internalized"); - ValueToValueMapTy VMap; - auto *NewFArgIt = Copied->arg_begin(); - for (auto &Arg : F.args()) { - auto ArgName = Arg.getName(); - NewFArgIt->setName(ArgName); - VMap[&Arg] = &(*NewFArgIt++); - } - SmallVector<ReturnInst *, 8> Returns; - - // Copy the body of the original function to the new one - CloneFunctionInto(Copied, &F, VMap, CloneFunctionChangeType::LocalChangesOnly, - Returns); + SmallPtrSet<Function *, 2> FnSet = {&F}; + DenseMap<Function *, Function *> InternalizedFns; + internalizeFunctions(FnSet, InternalizedFns); - // Set the linakage and visibility late as CloneFunctionInto has some implicit - // requirements. - Copied->setVisibility(GlobalValue::DefaultVisibility); - Copied->setLinkage(GlobalValue::PrivateLinkage); + return InternalizedFns[&F]; +} - // Copy metadata - SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; - F.getAllMetadata(MDs); - for (auto MDIt : MDs) - if (!Copied->hasMetadata()) - Copied->addMetadata(MDIt.first, *MDIt.second); +bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet, + DenseMap<Function *, Function *> &FnMap) { + for (Function *F : FnSet) + if (!Attributor::isInternalizable(*F)) + return false; - M.getFunctionList().insert(F.getIterator(), Copied); - F.replaceAllUsesWith(Copied); - Copied->setDSOLocal(true); + FnMap.clear(); + // Generate the internalized version of each function. + for (Function *F : FnSet) { + Module &M = *F->getParent(); + FunctionType *FnTy = F->getFunctionType(); + + // Create a copy of the current function + Function *Copied = + Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(), + F->getName() + ".internalized"); + ValueToValueMapTy VMap; + auto *NewFArgIt = Copied->arg_begin(); + for (auto &Arg : F->args()) { + auto ArgName = Arg.getName(); + NewFArgIt->setName(ArgName); + VMap[&Arg] = &(*NewFArgIt++); + } + SmallVector<ReturnInst *, 8> Returns; + + // Copy the body of the original function to the new one + CloneFunctionInto(Copied, F, VMap, + CloneFunctionChangeType::LocalChangesOnly, Returns); + + // Set the linakage and visibility late as CloneFunctionInto has some + // implicit requirements. + Copied->setVisibility(GlobalValue::DefaultVisibility); + Copied->setLinkage(GlobalValue::PrivateLinkage); + + // Copy metadata + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + F->getAllMetadata(MDs); + for (auto MDIt : MDs) + if (!Copied->hasMetadata()) + Copied->addMetadata(MDIt.first, *MDIt.second); + + M.getFunctionList().insert(F->getIterator(), Copied); + Copied->setDSOLocal(true); + FnMap[F] = Copied; + } + + // Replace all uses of the old function with the new internalized function + // unless the caller is a function that was just internalized. + for (Function *F : FnSet) { + auto &InternalizedFn = FnMap[F]; + auto IsNotInternalized = [&](Use &U) -> bool { + if (auto *CB = dyn_cast<CallBase>(U.getUser())) + return !FnMap.lookup(CB->getCaller()); + return false; + }; + F->replaceUsesWithIf(InternalizedFn, IsNotInternalized); + } - return Copied; + return true; } bool Attributor::isValidFunctionSignatureRewrite( diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 98ce286d5139..3529923a9082 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1149,19 +1149,23 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { return true; }; + /// Helper struct, will support ranges eventually. + struct OffsetInfo { + int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown; + + bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; } + }; + /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { using namespace AA::PointerInfo; State S = getState(); ChangeStatus Changed = ChangeStatus::UNCHANGED; Value &AssociatedValue = getAssociatedValue(); - struct OffsetInfo { - int64_t Offset = 0; - }; const DataLayout &DL = A.getDataLayout(); DenseMap<Value *, OffsetInfo> OffsetInfoMap; - OffsetInfoMap[&AssociatedValue] = {}; + OffsetInfoMap[&AssociatedValue] = OffsetInfo{0}; auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI, bool &Follow) { @@ -1219,8 +1223,48 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { Follow = true; return true; } - if (isa<CastInst>(Usr) || isa<PHINode>(Usr) || isa<SelectInst>(Usr)) + if (isa<CastInst>(Usr) || isa<SelectInst>(Usr)) return HandlePassthroughUser(Usr, PtrOI, Follow); + + // For PHIs we need to take care of the recurrence explicitly as the value + // might change while we iterate through a loop. For now, we give up if + // the PHI is not invariant. + if (isa<PHINode>(Usr)) { + // Check if the PHI is invariant (so far). + OffsetInfo &UsrOI = OffsetInfoMap[Usr]; + if (UsrOI == PtrOI) + return true; + + // Check if the PHI operand has already an unknown offset as we can't + // improve on that anymore. + if (PtrOI.Offset == OffsetAndSize::Unknown) { + UsrOI = PtrOI; + Follow = true; + return true; + } + + // Check if the PHI operand is not dependent on the PHI itself. + APInt Offset(DL.getIndexTypeSizeInBits(AssociatedValue.getType()), 0); + if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)) { + if (Offset != PtrOI.Offset) { + LLVM_DEBUG(dbgs() + << "[AAPointerInfo] PHI operand pointer offset mismatch " + << *CurPtr << " in " << *Usr << "\n"); + return false; + } + return HandlePassthroughUser(Usr, PtrOI, Follow); + } + + // TODO: Approximate in case we know the direction of the recurrence. + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " + << *CurPtr << " in " << *Usr << "\n"); + UsrOI = PtrOI; + UsrOI.Offset = OffsetAndSize::Unknown; + Follow = true; + return true; + } + if (auto *LoadI = dyn_cast<LoadInst>(Usr)) return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, AccessKind::AK_READ, PtrOI.Offset, Changed, diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index b80349352719..d6b97915ede6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4176,28 +4176,32 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { ORE.emit([&]() { OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F); return ORA << "Could not internalize function. " - << "Some optimizations may not be possible."; + << "Some optimizations may not be possible. [OMP140]"; }); }; // Create internal copies of each function if this is a kernel Module. This // allows iterprocedural passes to see every call edge. - DenseSet<const Function *> InternalizedFuncs; - if (isOpenMPDevice(M)) + DenseMap<Function *, Function *> InternalizedMap; + if (isOpenMPDevice(M)) { + SmallPtrSet<Function *, 16> InternalizeFns; for (Function &F : M) if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && !DisableInternalization) { - if (Attributor::internalizeFunction(F, /* Force */ true)) { - InternalizedFuncs.insert(&F); + if (Attributor::isInternalizable(F)) { + InternalizeFns.insert(&F); } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { EmitRemark(F); } } + Attributor::internalizeFunctions(InternalizeFns, InternalizedMap); + } + // Look at every function in the Module unless it was internalized. SmallVector<Function *, 16> SCC; for (Function &F : M) - if (!F.isDeclaration() && !InternalizedFuncs.contains(&F)) + if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) SCC.push_back(&F); if (SCC.empty()) diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2b0ef0c5f2cc..c5e14ebf3ae3 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5158,6 +5158,83 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { if (!isa<Constant>(Op1) && Op1Min == Op1Max) return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min)); + // Don't break up a clamp pattern -- (min(max X, Y), Z) -- by replacing a + // min/max canonical compare with some other compare. That could lead to + // conflict with select canonicalization and infinite looping. + // FIXME: This constraint may go away if min/max intrinsics are canonical. + auto isMinMaxCmp = [&](Instruction &Cmp) { + if (!Cmp.hasOneUse()) + return false; + Value *A, *B; + SelectPatternFlavor SPF = matchSelectPattern(Cmp.user_back(), A, B).Flavor; + if (!SelectPatternResult::isMinOrMax(SPF)) + return false; + return match(Op0, m_MaxOrMin(m_Value(), m_Value())) || + match(Op1, m_MaxOrMin(m_Value(), m_Value())); + }; + if (!isMinMaxCmp(I)) { + switch (Pred) { + default: + break; + case ICmpInst::ICMP_ULT: { + if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + // A <u C -> A == C-1 if min(A)+1 == C + if (*CmpC == Op0Min + 1) + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC - 1)); + // X <u C --> X == 0, if the number of zero bits in the bottom of X + // exceeds the log2 of C. + if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2()) + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + Constant::getNullValue(Op1->getType())); + } + break; + } + case ICmpInst::ICMP_UGT: { + if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + // A >u C -> A == C+1 if max(a)-1 == C + if (*CmpC == Op0Max - 1) + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC + 1)); + // X >u C --> X != 0, if the number of zero bits in the bottom of X + // exceeds the log2 of C. + if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits()) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, + Constant::getNullValue(Op1->getType())); + } + break; + } + case ICmpInst::ICMP_SLT: { + if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC - 1)); + } + break; + } + case ICmpInst::ICMP_SGT: { + if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC + 1)); + } + break; + } + } + } + // Based on the range information we know about the LHS, see if we can // simplify this comparison. For example, (x&4) < 8 is always true. switch (Pred) { @@ -5219,21 +5296,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - // A <u C -> A == C-1 if min(A)+1 == C - if (*CmpC == Op0Min + 1) - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC - 1)); - // X <u C --> X == 0, if the number of zero bits in the bottom of X - // exceeds the log2 of C. - if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2()) - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - Constant::getNullValue(Op1->getType())); - } break; } case ICmpInst::ICMP_UGT: { @@ -5241,21 +5303,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - // A >u C -> A == C+1 if max(a)-1 == C - if (*CmpC == Op0Max - 1) - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC + 1)); - // X >u C --> X != 0, if the number of zero bits in the bottom of X - // exceeds the log2 of C. - if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits()) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, - Constant::getNullValue(Op1->getType())); - } break; } case ICmpInst::ICMP_SLT: { @@ -5263,14 +5310,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC - 1)); - } break; } case ICmpInst::ICMP_SGT: { @@ -5278,14 +5317,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC + 1)); - } break; } case ICmpInst::ICMP_SGE: diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index a8474e27383d..80abc775299a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -261,8 +261,8 @@ private: bool PointerReplacer::collectUsers(Instruction &I) { for (auto U : I.users()) { - Instruction *Inst = cast<Instruction>(&*U); - if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { + auto *Inst = cast<Instruction>(&*U); + if (auto *Load = dyn_cast<LoadInst>(Inst)) { if (Load->isVolatile()) return false; Worklist.insert(Load); @@ -270,7 +270,9 @@ bool PointerReplacer::collectUsers(Instruction &I) { Worklist.insert(Inst); if (!collectUsers(*Inst)) return false; - } else if (isa<MemTransferInst>(Inst)) { + } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) { + if (MI->isVolatile()) + return false; Worklist.insert(Inst); } else if (Inst->isLifetimeStartOrEnd()) { continue; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index ce2b913dba61..5bbc3c87ca4f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3230,7 +3230,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *Mask; if (match(TrueVal, m_Zero()) && match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask), - m_CombineOr(m_Undef(), m_Zero())))) { + m_CombineOr(m_Undef(), m_Zero()))) && + (CondVal->getType() == Mask->getType())) { // We can remove the select by ensuring the load zeros all lanes the // select would have. We determine this by proving there is no overlap // between the load and select masks. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp index c77769368ede..66c9d9f0902a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp @@ -272,9 +272,10 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, if (PredBB && IsSafeToHoist(RemInst, RemBB) && IsSafeToHoist(DivInst, DivBB) && - llvm::all_of(successors(PredBB), [&](BasicBlock *BB) { - return BB == DivBB || BB == RemBB; - })) { + all_of(successors(PredBB), + [&](BasicBlock *BB) { return BB == DivBB || BB == RemBB; }) && + all_of(predecessors(DivBB), + [&](BasicBlock *BB) { return BB == RemBB || BB == PredBB; })) { DivDominates = true; DivInst->moveBefore(PredBB->getTerminator()); Changed = true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5f210380ae5a..404852f1dd4d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5906,9 +5906,12 @@ struct SCEVDbgValueBuilder { pushValue(V); } - void pushConst(const SCEVConstant *C) { + bool pushConst(const SCEVConstant *C) { + if (C->getAPInt().getMinSignedBits() > 64) + return false; Expr.push_back(llvm::dwarf::DW_OP_consts); Expr.push_back(C->getAPInt().getSExtValue()); + return true; } /// Several SCEV types are sequences of the same arithmetic operator applied @@ -5947,10 +5950,10 @@ struct SCEVDbgValueBuilder { bool pushSCEV(const llvm::SCEV *S) { bool Success = true; if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) { - pushConst(StartInt); + Success &= pushConst(StartInt); } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { - if(!U->getValue()) + if (!U->getValue()) return false; pushValue(U->getValue()); @@ -6033,6 +6036,8 @@ struct SCEVDbgValueBuilder { /// SCEV constant value is an identity function. bool isIdentityFunction(uint64_t Op, const SCEV *S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + if (C->getAPInt().getMinSignedBits() > 64) + return false; int64_t I = C->getAPInt().getSExtValue(); switch (Op) { case llvm::dwarf::DW_OP_plus: @@ -6162,6 +6167,9 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, bool Changed = false; if (const SCEVAddRecExpr *IVAddRec = dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) { + if (!IVAddRec->isAffine()) + return false; + SCEVDbgValueBuilder IterCountExpr; IterCountExpr.pushValue(LSRInductionVar); if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE)) @@ -6180,6 +6188,8 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, // supported by SCEV salvaging. But, we can attempt a salvage by restoring // the pre-LSR single-op expression. if (DVIRec.DVI->hasArgList()) { + if (!DVIRec.DVI->getVariableLocationOp(0)) + continue; llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType(); DVIRec.DVI->setRawLocation( llvm::ValueAsMetadata::get(UndefValue::get(Ty))); @@ -6207,7 +6217,8 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, if (DVI->hasArgList()) continue; - if (!SE.isSCEVable(DVI->getVariableLocationOp(0)->getType())) + if (!DVI->getVariableLocationOp(0) || + !SE.isSCEVable(DVI->getVariableLocationOp(0)->getType())) continue; SalvageableDVISCEVs.push_back( @@ -6232,9 +6243,8 @@ static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE, assert(isa<PHINode>(&*IV) && "Expected PhI node."); if (SE.isSCEVable((*IV).getType())) { PHINode *Phi = dyn_cast<PHINode>(&*IV); - LLVM_DEBUG(const llvm::SCEV *S = SE.getSCEV(Phi); - dbgs() << "scev-salvage: IV : " << *IV << "with SCEV: " << *S - << "\n"); + LLVM_DEBUG(dbgs() << "scev-salvage: IV : " << *IV + << "with SCEV: " << *SE.getSCEV(Phi) << "\n"); return Phi; } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp index 5ec01454e5b2..fe160d5415bd 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2811,10 +2811,11 @@ private: if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) return false; + // Length must be in range for FixedVectorType. auto *C = cast<ConstantInt>(II.getLength()); - if (C->getBitWidth() > 64) + const uint64_t Len = C->getLimitedValue(); + if (Len > std::numeric_limits<unsigned>::max()) return false; - const auto Len = C->getZExtValue(); auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext()); auto *SrcTy = FixedVectorType::get(Int8Ty, Len); return canConvertValue(DL, SrcTy, AllocaTy) && diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 91280762aaa7..bd2b6fafdf2e 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/AssemblyAnnotationWriter.h" @@ -23,6 +24,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -566,10 +568,18 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, // to ensure we dominate all of our uses. Always insert right before the // relevant instruction (terminator, assume), so that we insert in proper // order in the case of multiple predicateinfo in the same block. + // The number of named values is used to detect if a new declaration was + // added. If so, that declaration is tracked so that it can be removed when + // the analysis is done. The corner case were a new declaration results in + // a name clash and the old name being renamed is not considered as that + // represents an invalid module. if (isa<PredicateWithEdge>(ValInfo)) { IRBuilder<> B(getBranchTerminator(ValInfo)); + auto NumDecls = F.getParent()->getNumNamedValues(); Function *IF = Intrinsic::getDeclaration( F.getParent(), Intrinsic::ssa_copy, Op->getType()); + if (NumDecls != F.getParent()->getNumNamedValues()) + PI.CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); PI.PredicateMap.insert({PIC, ValInfo}); @@ -581,8 +591,11 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, // Insert the predicate directly after the assume. While it also holds // directly before it, assume(i1 true) is not a useful fact. IRBuilder<> B(PAssume->AssumeInst->getNextNode()); + auto NumDecls = F.getParent()->getNumNamedValues(); Function *IF = Intrinsic::getDeclaration( F.getParent(), Intrinsic::ssa_copy, Op->getType()); + if (NumDecls != F.getParent()->getNumNamedValues()) + PI.CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op); PI.PredicateMap.insert({PIC, ValInfo}); Result.Def = PIC; @@ -761,6 +774,23 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT, Builder.buildPredicateInfo(); } +// Remove all declarations we created . The PredicateInfo consumers are +// responsible for remove the ssa_copy calls created. +PredicateInfo::~PredicateInfo() { + // Collect function pointers in set first, as SmallSet uses a SmallVector + // internally and we have to remove the asserting value handles first. + SmallPtrSet<Function *, 20> FunctionPtrs; + for (auto &F : CreatedDeclarations) + FunctionPtrs.insert(&*F); + CreatedDeclarations.clear(); + + for (Function *F : FunctionPtrs) { + assert(F->user_begin() == F->user_end() && + "PredicateInfo consumer did not remove all SSA copies."); + F->eraseFromParent(); + } +} + Optional<PredicateConstraint> PredicateBase::getConstraint() const { switch (Type) { case PT_Assume: @@ -827,6 +857,19 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AssumptionCacheTracker>(); } +// Replace ssa_copy calls created by PredicateInfo with their operand. +static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { + for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) { + const auto *PI = PredInfo.getPredicateInfoFor(&Inst); + auto *II = dyn_cast<IntrinsicInst>(&Inst); + if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy) + continue; + + Inst.replaceAllUsesWith(II->getOperand(0)); + Inst.eraseFromParent(); + } +} + bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); @@ -834,6 +877,8 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { PredInfo->print(dbgs()); if (VerifyPredicateInfo) PredInfo->verifyPredicateInfo(); + + replaceCreatedSSACopys(*PredInfo, F); return false; } @@ -845,6 +890,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC); PredInfo->print(OS); + replaceCreatedSSACopys(*PredInfo, F); return PreservedAnalyses::all(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f24ae6b100d5..671bc6b5212b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5433,6 +5433,21 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { // lane 0 demanded or b) are uses which demand only lane 0 of their operand. for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::sideeffect: + case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::assume: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + if (TheLoop->hasLoopInvariantOperands(&I)) + addToWorklistIfAllowed(&I); + break; + default: + break; + } + } + // If there's no pointer operand, there's nothing to do. auto *Ptr = getLoadStorePointerOperand(&I); if (!Ptr) @@ -8916,6 +8931,37 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range); + // Even if the instruction is not marked as uniform, there are certain + // intrinsic calls that can be effectively treated as such, so we check for + // them here. Conservatively, we only do this for scalable vectors, since + // for fixed-width VFs we can always fall back on full scalarization. + if (!IsUniform && Range.Start.isScalable() && isa<IntrinsicInst>(I)) { + switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + case Intrinsic::assume: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // For scalable vectors if one of the operands is variant then we still + // want to mark as uniform, which will generate one instruction for just + // the first lane of the vector. We can't scalarize the call in the same + // way as for fixed-width vectors because we don't know how many lanes + // there are. + // + // The reasons for doing it this way for scalable vectors are: + // 1. For the assume intrinsic generating the instruction for the first + // lane is still be better than not generating any at all. For + // example, the input may be a splat across all lanes. + // 2. For the lifetime start/end intrinsics the pointer operand only + // does anything useful when the input comes from a stack object, + // which suggests it should always be uniform. For non-stack objects + // the effect is to poison the object, which still allows us to + // remove the call. + IsUniform = true; + break; + default: + break; + } + } + auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); setRecipe(I, Recipe); diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp index 9a949761bb75..4ecc3015529c 100644 --- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp @@ -145,10 +145,11 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, double AverageTime1, AverageTime2, AverageTime3; AverageTime1 = - (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions; - AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions; - AverageTime3 = - (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions; + (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions; + AverageTime2 = + (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions; + AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) / + CumulativeExecutions; OS << Executions; OS.PadToColumn(13); @@ -157,18 +158,18 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, if (!PrintingTotals) tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, BufferSize); - OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); + OS << format("%.1f", floor(AverageTime1 + 0.5) / 10); OS.PadToColumn(20); if (!PrintingTotals) tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, BufferSize); - OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); + OS << format("%.1f", floor(AverageTime2 + 0.5) / 10); OS.PadToColumn(27); if (!PrintingTotals) tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, CumulativeExecutions, getSubTargetInfo().getSchedModel().MicroOpBufferSize); - OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); + OS << format("%.1f", floor(AverageTime3 + 0.5) / 10); if (OS.has_colors()) OS.resetColor(); diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp index 7311819f77ff..137f99078faf 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp @@ -77,6 +77,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::ppcf128: return "MVT::ppcf128"; case MVT::x86mmx: return "MVT::x86mmx"; case MVT::x86amx: return "MVT::x86amx"; + case MVT::i64x8: return "MVT::i64x8"; case MVT::Glue: return "MVT::Glue"; case MVT::isVoid: return "MVT::isVoid"; case MVT::v1i1: return "MVT::v1i1"; |