diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-04-14 21:41:27 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-07-23 17:38:08 +0000 |
commit | 320d4fb58b6b1c6a0c7ffeab3d4672d1479d5e17 (patch) | |
tree | 4b5e279a6f091bb6bdc639752cf4139dfd7053a4 /contrib/llvm-project/llvm/include/llvm/CodeGen | |
parent | 814cfa6ad43c73de9b8030f241f516dad3f669ef (diff) | |
download | src-320d4fb58b6b1c6a0c7ffeab3d4672d1479d5e17.tar.gz src-320d4fb58b6b1c6a0c7ffeab3d4672d1479d5e17.zip |
Merge llvm-project main llvmorg-16-init-18548-gb0daacf58f41
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-16-init-18548-gb0daacf58f41.
PR: 271047
MFC after: 1 month
(cherry picked from commit bdd1243df58e60e85101c09001d9812a789b6bc4)
Diffstat (limited to 'contrib/llvm-project/llvm/include/llvm/CodeGen')
82 files changed, 2887 insertions, 1188 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/AccelTable.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/AccelTable.h index c0e976317aef..be7ed03deb27 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/AccelTable.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/AccelTable.h @@ -300,7 +300,7 @@ void emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents, template <typename DataT> void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents, StringRef Prefix, const MCSymbol *SecBegin) { - static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value, ""); + static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value); emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms); } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h index 5e900e9162d8..33fda248120b 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -15,12 +15,15 @@ #ifndef LLVM_CODEGEN_ASMPRINTER_H #define LLVM_CODEGEN_ASMPRINTER_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinterHandler.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/InlineAsm.h" #include "llvm/Support/ErrorHandling.h" #include <cstdint> @@ -68,7 +71,6 @@ class MDNode; class Module; class PseudoProbeHandler; class raw_ostream; -class StackMaps; class StringRef; class TargetLoweringObjectFile; class TargetMachine; @@ -180,8 +182,8 @@ private: /// block's address of label. std::unique_ptr<AddrLabelMap> AddrLabelSymbols; - // The garbage collection metadata printer table. - void *GCMetadataPrinters = nullptr; // Really a DenseMap. + /// The garbage collection metadata printer table. + DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>> GCMetadataPrinters; /// Emit comments in assembly output if this is true. bool VerboseAsm; @@ -189,16 +191,24 @@ private: /// Output stream for the stack usage file (i.e., .su file). std::unique_ptr<raw_fd_ostream> StackUsageStream; + /// List of symbols to be inserted into PC sections. + DenseMap<const MDNode *, SmallVector<const MCSymbol *>> PCSectionsSymbols; + static char ID; protected: MCSymbol *CurrentFnBegin = nullptr; + /// For dso_local functions, the current $local alias for the function. + MCSymbol *CurrentFnBeginLocal = nullptr; + /// A vector of all debug/EH info emitters we should use. This vector /// maintains ownership of the emitters. std::vector<HandlerInfo> Handlers; size_t NumUserHandlers = 0; + StackMaps SM; + private: /// If generated on the fly this own the instance. std::unique_ptr<MachineDominatorTree> OwnedMDT; @@ -317,6 +327,14 @@ public: /// definition in the same module. MCSymbol *getSymbolPreferLocal(const GlobalValue &GV) const; + bool doesDwarfUseRelocationsAcrossSections() const { + return DwarfUsesRelocationsAcrossSections; + } + + void setDwarfUsesRelocationsAcrossSections(bool Enable) { + DwarfUsesRelocationsAcrossSections = Enable; + } + //===------------------------------------------------------------------===// // XRay instrumentation implementation. //===------------------------------------------------------------------===// @@ -401,10 +419,19 @@ public: void emitBBAddrMapSection(const MachineFunction &MF); + void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol); + virtual void emitKCFITypeId(const MachineFunction &MF); + void emitPseudoProbe(const MachineInstr &MI); void emitRemarksSection(remarks::RemarkStreamer &RS); + /// Emits a label as reference for PC sections. + void emitPCSectionsLabel(const MachineFunction &MF, const MDNode &MD); + + /// Emits the PC sections collected from instructions. + void emitPCSections(const MachineFunction &MF); + /// Get the CFISection type for a function. CFISection getFunctionCFISectionType(const Function &F) const; @@ -497,7 +524,7 @@ public: void emitGlobalGOTEquivs(); /// Emit the stack maps. - void emitStackMaps(StackMaps &SM); + void emitStackMaps(); //===------------------------------------------------------------------===// // Overridable Hooks @@ -801,6 +828,8 @@ private: mutable unsigned LastFn = 0; mutable unsigned Counter = ~0U; + bool DwarfUsesRelocationsAcrossSections = false; + /// This method emits the header for the current function. virtual void emitFunctionHeader(); @@ -835,7 +864,7 @@ private: /// Emit bytes for llvm.commandline metadata. void emitModuleCommandLines(Module &M); - GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S); + GCMetadataPrinter *getOrCreateGCPrinter(GCStrategy &S); void emitGlobalAlias(Module &M, const GlobalAlias &GA); void emitGlobalIFunc(Module &M, const GlobalIFunc &GI); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinterHandler.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinterHandler.h index dc81a3040097..5c06645f767e 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinterHandler.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinterHandler.h @@ -53,13 +53,20 @@ public: virtual void markFunctionEnd(); /// Gather post-function debug information. - /// Please note that some AsmPrinter implementations may not call - /// beginFunction at all. virtual void endFunction(const MachineFunction *MF) = 0; - virtual void beginFragment(const MachineBasicBlock *MBB, - ExceptionSymbolProvider ESP) {} - virtual void endFragment() {} + /// Process the beginning of a new basic-block-section within a + /// function. Always called immediately after beginFunction for the first + /// basic-block. When basic-block-sections are enabled, called before the + /// first block of each such section. + virtual void beginBasicBlockSection(const MachineBasicBlock &MBB) {} + + /// Process the end of a basic-block-section within a function. When + /// basic-block-sections are enabled, called after the last block in each such + /// section (including the last section in the function). When + /// basic-block-sections are disabled, called at the end of a function, + /// immediately prior to markFunctionEnd. + virtual void endBasicBlockSection(const MachineBasicBlock &MBB) {} /// Emit target-specific EH funclet machinery. virtual void beginFunclet(const MachineBasicBlock &MBB, @@ -71,12 +78,6 @@ public: /// Process end of an instruction. virtual void endInstruction() = 0; - - /// Process beginning of a basic block during basic block sections. - virtual void beginBasicBlock(const MachineBasicBlock &MBB) {} - - /// Process end of a basic block during basic block sections. - virtual void endBasicBlock(const MachineBasicBlock &MBB) {} }; } // End of namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h new file mode 100644 index 000000000000..6e82b2b1c158 --- /dev/null +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h @@ -0,0 +1,117 @@ +#ifndef LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H +#define LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H + +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" + +namespace llvm { +class Function; +class Instruction; +class Value; +class raw_ostream; +} // namespace llvm +class FunctionVarLocsBuilder; + +namespace llvm { +/// Type wrapper for integer ID for Variables. 0 is reserved. +enum class VariableID : unsigned { Reserved = 0 }; +/// Variable location definition used by FunctionVarLocs. +struct VarLocInfo { + llvm::VariableID VariableID; + DIExpression *Expr = nullptr; + DebugLoc DL; + Value *V = nullptr; // TODO: Needs to be value_s_ for variadic expressions. +}; + +/// Data structure describing the variable locations in a function. Used as the +/// result of the AssignmentTrackingAnalysis pass. Essentially read-only +/// outside of AssignmentTrackingAnalysis where it is built. +class FunctionVarLocs { + /// Maps VarLocInfo.VariableID to a DebugVariable for VarLocRecords. + SmallVector<DebugVariable> Variables; + /// List of variable location changes grouped by the instruction the + /// change occurs before (see VarLocsBeforeInst). The elements from + /// zero to SingleVarLocEnd represent variables with a single location. + SmallVector<VarLocInfo> VarLocRecords; + /// End of range of VarLocRecords that represent variables with a single + /// location that is valid for the entire scope. Range starts at 0. + unsigned SingleVarLocEnd = 0; + /// Maps an instruction to a range of VarLocs that start just before it. + DenseMap<const Instruction *, std::pair<unsigned, unsigned>> + VarLocsBeforeInst; + +public: + /// Return the DILocalVariable for the location definition represented by \p + /// ID. + DILocalVariable *getDILocalVariable(const VarLocInfo *Loc) const { + VariableID VarID = Loc->VariableID; + return getDILocalVariable(VarID); + } + /// Return the DILocalVariable of the variable represented by \p ID. + DILocalVariable *getDILocalVariable(VariableID ID) const { + return const_cast<DILocalVariable *>(getVariable(ID).getVariable()); + } + /// Return the DebugVariable represented by \p ID. + const DebugVariable &getVariable(VariableID ID) const { + return Variables[static_cast<unsigned>(ID)]; + } + + ///@name iterators + ///@{ + /// First single-location variable location definition. + const VarLocInfo *single_locs_begin() const { return VarLocRecords.begin(); } + /// One past the last single-location variable location definition. + const VarLocInfo *single_locs_end() const { + const auto *It = VarLocRecords.begin(); + std::advance(It, SingleVarLocEnd); + return It; + } + /// First variable location definition that comes before \p Before. + const VarLocInfo *locs_begin(const Instruction *Before) const { + auto Span = VarLocsBeforeInst.lookup(Before); + const auto *It = VarLocRecords.begin(); + std::advance(It, Span.first); + return It; + } + /// One past the last variable location definition that comes before \p + /// Before. + const VarLocInfo *locs_end(const Instruction *Before) const { + auto Span = VarLocsBeforeInst.lookup(Before); + const auto *It = VarLocRecords.begin(); + std::advance(It, Span.second); + return It; + } + ///@} + + void print(raw_ostream &OS, const Function &Fn) const; + + ///@{ + /// Non-const methods used by AssignmentTrackingAnalysis (which invalidate + /// analysis results if called incorrectly). + void init(FunctionVarLocsBuilder &Builder); + void clear(); + ///@} +}; + +class AssignmentTrackingAnalysis : public FunctionPass { + std::unique_ptr<FunctionVarLocs> Results; + +public: + static char ID; + + AssignmentTrackingAnalysis(); + + bool runOnFunction(Function &F) override; + + static bool isRequired() { return true; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + const FunctionVarLocs *getResults() { return Results.get(); } +}; + +} // end namespace llvm +#endif // LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/AtomicExpandUtils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/AtomicExpandUtils.h index 7615ddb0ab3d..1cb410a0c31c 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/AtomicExpandUtils.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/AtomicExpandUtils.h @@ -22,7 +22,7 @@ class Value; /// (the builder, %addr, %loaded, %new_val, ordering, /// /* OUT */ %success, /* OUT */ %new_loaded) using CreateCmpXchgInstFun = - function_ref<void(IRBuilder<> &, Value *, Value *, Value *, Align, + function_ref<void(IRBuilderBase &, Value *, Value *, Value *, Align, AtomicOrdering, SyncScope::ID, Value *&, Value *&)>; /// Expand an atomic RMW instruction into a loop utilizing diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 557339548581..b944c6edde55 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -15,7 +15,6 @@ #ifndef LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H #define LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -32,8 +31,8 @@ namespace llvm { // The cluster information for a machine basic block. struct BBClusterInfo { - // MachineBasicBlock ID. - unsigned MBBNumber; + // Unique ID for this basic block. + unsigned BBID; // Cluster ID this basic block belongs to. unsigned ClusterID; // Position of basic block within the cluster. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h index c35a9e878613..77dd3157d070 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -47,10 +47,12 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include <algorithm> #include <cassert> #include <cstdint> #include <limits> +#include <optional> #include <utility> namespace llvm { @@ -84,21 +86,25 @@ private: /// Estimate a cost of Broadcast as an extract and sequence of insert /// operations. - InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy) { + InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy, + TTI::TargetCostKind CostKind) { InstructionCost Cost = 0; // Broadcast cost is equal to the cost of extracting the zero'th element // plus the cost of inserting it into every element of the result vector. - Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, + CostKind, 0, nullptr, nullptr); for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { - Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, + CostKind, i, nullptr, nullptr); } return Cost; } /// Estimate a cost of shuffle as a sequence of extract and insert /// operations. - InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy) { + InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy, + TTI::TargetCostKind CostKind) { InstructionCost Cost = 0; // Shuffle cost is equal to the cost of extracting element from its argument // plus the cost of inserting them onto the result vector. @@ -108,16 +114,20 @@ private: // vector and finally index 3 of second vector and insert them at index // <0,1,2,3> of result vector. for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { - Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); - Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, + CostKind, i, nullptr, nullptr); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, + CostKind, i, nullptr, nullptr); } return Cost; } /// Estimate a cost of subvector extraction as a sequence of extract and /// insert operations. - InstructionCost getExtractSubvectorOverhead(VectorType *VTy, int Index, - FixedVectorType *SubVTy) { + InstructionCost getExtractSubvectorOverhead(VectorType *VTy, + TTI::TargetCostKind CostKind, + int Index, + FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only extract subvectors from vectors"); int NumSubElts = SubVTy->getNumElements(); @@ -131,18 +141,21 @@ private: // the source type plus the cost of inserting them into the result vector // type. for (int i = 0; i != NumSubElts; ++i) { - Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, - i + Index); Cost += - thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i); + thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, + CostKind, i + Index, nullptr, nullptr); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, + CostKind, i, nullptr, nullptr); } return Cost; } /// Estimate a cost of subvector insertion as a sequence of extract and /// insert operations. - InstructionCost getInsertSubvectorOverhead(VectorType *VTy, int Index, - FixedVectorType *SubVTy) { + InstructionCost getInsertSubvectorOverhead(VectorType *VTy, + TTI::TargetCostKind CostKind, + int Index, + FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only insert subvectors into vectors"); int NumSubElts = SubVTy->getNumElements(); @@ -156,10 +169,11 @@ private: // the source type plus the cost of inserting them into the result vector // type. for (int i = 0; i != NumSubElts; ++i) { + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, + CostKind, i, nullptr, nullptr); Cost += - thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i); - Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, - i + Index); + thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, CostKind, + i + Index, nullptr, nullptr); } return Cost; } @@ -210,7 +224,7 @@ private: FixedVectorType::get( PointerType::get(VT->getElementType(), 0), VT->getNumElements()), - -1) + CostKind, -1, nullptr, nullptr) : 0; InstructionCost LoadCost = VT->getNumElements() * @@ -218,8 +232,9 @@ private: getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind)); // Next, compute the cost of packing the result in a vector. - InstructionCost PackingCost = getScalarizationOverhead( - VT, Opcode != Instruction::Store, Opcode == Instruction::Store); + InstructionCost PackingCost = + getScalarizationOverhead(VT, Opcode != Instruction::Store, + Opcode == Instruction::Store, CostKind); InstructionCost ConditionalCost = 0; if (VariableMask) { @@ -235,7 +250,7 @@ private: Instruction::ExtractElement, FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()), VT->getNumElements()), - -1) + + CostKind, -1, nullptr, nullptr) + getCFInstrCost(Instruction::Br, CostKind) + getCFInstrCost(Instruction::PHI, CostKind)); } @@ -255,7 +270,7 @@ public: /// @{ bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, - bool *Fast) const { + unsigned *Fast) const { EVT E = EVT::getIntegerVT(Context, BitWidth); return getTLI()->allowsMisalignedMemoryAccesses( E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); @@ -287,6 +302,11 @@ public: return getTLI()->getTargetMachine().getAssumedAddrSpace(V); } + bool isSingleThreaded() const { + return getTLI()->getTargetMachine().Options.ThreadModel == + ThreadModel::Single; + } + std::pair<const Value *, unsigned> getPredicatedAddrSpace(const Value *V) const { return getTLI()->getTargetMachine().getPredicatedAddrSpace(V); @@ -368,7 +388,9 @@ public: AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; AM.Scale = Scale; - return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); + if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace)) + return 0; + return -1; } bool isTruncateFree(Type *Ty1, Type *Ty2) { @@ -603,29 +625,29 @@ public: bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - LoopVectorizationLegality *LVL) { - return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL); + LoopVectorizationLegality *LVL, + InterleavedAccessInfo *IAI) { + return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI); } PredicationStyle emitGetActiveLaneMask() { return BaseT::emitGetActiveLaneMask(); } - Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, + std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) { return BaseT::instCombineIntrinsic(IC, II); } - Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, - IntrinsicInst &II, - APInt DemandedMask, - KnownBits &Known, - bool &KnownBitsComputed) { + std::optional<Value *> + simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, + APInt DemandedMask, KnownBits &Known, + bool &KnownBitsComputed) { return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, KnownBitsComputed); } - Optional<Value *> simplifyDemandedVectorEltsIntrinsic( + std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function<void(Instruction *, unsigned, APInt, APInt &)> @@ -635,22 +657,15 @@ public: SimplifyAndSetOp); } - InstructionCost getInstructionLatency(const Instruction *I) { - if (isa<LoadInst>(I)) - return getST()->getSchedModel().DefaultLoadLatency; - - return BaseT::getInstructionLatency(I); - } - - virtual Optional<unsigned> + virtual std::optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const { - return Optional<unsigned>( - getST()->getCacheSize(static_cast<unsigned>(Level))); + return std::optional<unsigned>( + getST()->getCacheSize(static_cast<unsigned>(Level))); } - virtual Optional<unsigned> + virtual std::optional<unsigned> getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { - Optional<unsigned> TargetResult = + std::optional<unsigned> TargetResult = getST()->getCacheAssociativity(static_cast<unsigned>(Level)); if (TargetResult) @@ -683,6 +698,10 @@ public: return getST()->enableWritePrefetching(); } + virtual bool shouldPrefetchAddressSpace(unsigned AS) const { + return getST()->shouldPrefetchAddressSpace(AS); + } + /// @} /// \name Vector TTI Implementations @@ -692,15 +711,16 @@ public: return TypeSize::getFixed(32); } - Optional<unsigned> getMaxVScale() const { return None; } - Optional<unsigned> getVScaleForTuning() const { return None; } + std::optional<unsigned> getMaxVScale() const { return std::nullopt; } + std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or /// extracted from vectors. InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, - bool Insert, bool Extract) { + bool Insert, bool Extract, + TTI::TargetCostKind CostKind) { /// FIXME: a bitfield is not a reasonable abstraction for talking about /// which elements are needed from a scalable vector if (isa<ScalableVectorType>(InTy)) @@ -716,9 +736,11 @@ public: if (!DemandedElts[i]) continue; if (Insert) - Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, + CostKind, i, nullptr, nullptr); if (Extract) - Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, + CostKind, i, nullptr, nullptr); } return Cost; @@ -726,20 +748,24 @@ public: /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead. InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, - bool Extract) { + bool Extract, + TTI::TargetCostKind CostKind) { if (isa<ScalableVectorType>(InTy)) return InstructionCost::getInvalid(); auto *Ty = cast<FixedVectorType>(InTy); APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements()); - return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); + return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract, + CostKind); } /// Estimate the overhead of scalarizing an instructions unique /// non-constant operands. The (potentially vector) types to use for each of /// argument are passes via Tys. - InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, - ArrayRef<Type *> Tys) { + InstructionCost + getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + ArrayRef<Type *> Tys, + TTI::TargetCostKind CostKind) { assert(Args.size() == Tys.size() && "Expected matching Args and Tys"); InstructionCost Cost = 0; @@ -754,7 +780,8 @@ public: if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { if (auto *VecTy = dyn_cast<VectorType>(Ty)) - Cost += getScalarizationOverhead(VecTy, false, true); + Cost += getScalarizationOverhead(VecTy, /*Insert*/ false, + /*Extract*/ true, CostKind); } } @@ -767,26 +794,62 @@ public: /// added as a heuristic. InstructionCost getScalarizationOverhead(VectorType *RetTy, ArrayRef<const Value *> Args, - ArrayRef<Type *> Tys) { - InstructionCost Cost = getScalarizationOverhead(RetTy, true, false); + ArrayRef<Type *> Tys, + TTI::TargetCostKind CostKind) { + InstructionCost Cost = getScalarizationOverhead( + RetTy, /*Insert*/ true, /*Extract*/ false, CostKind); if (!Args.empty()) - Cost += getOperandsScalarizationOverhead(Args, Tys); + Cost += getOperandsScalarizationOverhead(Args, Tys, CostKind); else // When no information on arguments is provided, we add the cost // associated with one argument as a heuristic. - Cost += getScalarizationOverhead(RetTy, false, true); + Cost += getScalarizationOverhead(RetTy, /*Insert*/ false, + /*Extract*/ true, CostKind); return Cost; } + /// Estimate the cost of type-legalization and the legalized type. + std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const { + LLVMContext &C = Ty->getContext(); + EVT MTy = getTLI()->getValueType(DL, Ty); + + InstructionCost Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + TargetLoweringBase::LegalizeKind LK = getTLI()->getTypeConversion(C, MTy); + + if (LK.first == TargetLoweringBase::TypeScalarizeScalableVector) { + // Ensure we return a sensible simple VT here, since many callers of + // this function require it. + MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64; + return std::make_pair(InstructionCost::getInvalid(), VT); + } + + if (LK.first == TargetLoweringBase::TypeLegal) + return std::make_pair(Cost, MTy.getSimpleVT()); + + if (LK.first == TargetLoweringBase::TypeSplitVector || + LK.first == TargetLoweringBase::TypeExpandInteger) + Cost *= 2; + + // Do not loop with f128 type. + if (MTy == LK.second) + return std::make_pair(Cost, MTy.getSimpleVT()); + + // Keep legalizing the type. + MTy = LK.second; + } + } + unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, - TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, - TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, - TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, - TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + TTI::OperandValueInfo Opd1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None}, ArrayRef<const Value *> Args = ArrayRef<const Value *>(), const Instruction *CxtI = nullptr) { // Check if any of the operands are vector operands. @@ -798,10 +861,9 @@ public: if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo, Args, CxtI); - std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty); bool IsFloat = Ty->isFPOrFPVectorTy(); // Assume that floating point arithmetic operations cost twice as much as @@ -831,8 +893,7 @@ public: LT.second)) { unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv; InstructionCost DivCost = thisT()->getArithmeticInstrCost( - DivOpc, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, - Opd2PropInfo); + DivOpc, Ty, CostKind, Opd1Info, Opd2Info); InstructionCost MulCost = thisT()->getArithmeticInstrCost(Instruction::Mul, Ty, CostKind); InstructionCost SubCost = @@ -851,11 +912,11 @@ public: if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) { InstructionCost Cost = thisT()->getArithmeticInstrCost( Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo, Args, CxtI); + Args, CxtI); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. SmallVector<Type *> Tys(Args.size(), Ty); - return getScalarizationOverhead(VTy, Args, Tys) + + return getScalarizationOverhead(VTy, Args, Tys, CostKind) + VTy->getNumElements() * Cost; } @@ -871,6 +932,7 @@ public: // ShuffleVectorInst::isSingleSourceMask). any_of(Mask, [Limit](int I) { return I >= Limit; })) return Kind; + int Index; switch (Kind) { case TTI::SK_PermuteSingleSrc: if (ShuffleVectorInst::isReverseMask(Mask)) @@ -883,6 +945,8 @@ public: return TTI::SK_Select; if (ShuffleVectorInst::isTransposeMask(Mask)) return TTI::SK_Transpose; + if (ShuffleVectorInst::isSpliceMask(Mask, Index)) + return TTI::SK_Splice; break; case TTI::SK_Select: case TTI::SK_Reverse: @@ -897,14 +961,15 @@ public: } InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, - ArrayRef<int> Mask, int Index, + ArrayRef<int> Mask, + TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef<const Value *> Args = None) { + ArrayRef<const Value *> Args = std::nullopt) { switch (improveShuffleKindFromMask(Kind, Mask)) { case TTI::SK_Broadcast: if (auto *FVT = dyn_cast<FixedVectorType>(Tp)) - return getBroadcastShuffleOverhead(FVT); + return getBroadcastShuffleOverhead(FVT, CostKind); return InstructionCost::getInvalid(); case TTI::SK_Select: case TTI::SK_Splice: @@ -913,13 +978,13 @@ public: case TTI::SK_PermuteSingleSrc: case TTI::SK_PermuteTwoSrc: if (auto *FVT = dyn_cast<FixedVectorType>(Tp)) - return getPermuteShuffleOverhead(FVT); + return getPermuteShuffleOverhead(FVT, CostKind); return InstructionCost::getInvalid(); case TTI::SK_ExtractSubvector: - return getExtractSubvectorOverhead(Tp, Index, + return getExtractSubvectorOverhead(Tp, CostKind, Index, cast<FixedVectorType>(SubTp)); case TTI::SK_InsertSubvector: - return getInsertSubvectorOverhead(Tp, Index, + return getInsertSubvectorOverhead(Tp, CostKind, Index, cast<FixedVectorType>(SubTp)); } llvm_unreachable("Unknown TTI::ShuffleKind"); @@ -935,10 +1000,8 @@ public: const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - std::pair<InstructionCost, MVT> SrcLT = - TLI->getTypeLegalizationCost(DL, Src); - std::pair<InstructionCost, MVT> DstLT = - TLI->getTypeLegalizationCost(DL, Dst); + std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src); + std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst); TypeSize SrcSize = SrcLT.second.getSizeInBits(); TypeSize DstSize = DstLT.second.getSizeInBits(); @@ -952,7 +1015,7 @@ public: // Check for NOOP conversions. if (TLI->isTruncateFree(SrcLT.second, DstLT.second)) return 0; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Instruction::BitCast: // Bitcast between types that are legalized to the same type are free and // assume int to/from ptr of the same size is also free. @@ -967,7 +1030,7 @@ public: case Instruction::ZExt: if (TLI->isZExtFree(SrcLT.second, DstLT.second)) return 0; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Instruction::SExt: if (I && getTLI()->isExtFree(I)) return 0; @@ -1033,7 +1096,7 @@ public: // If we are legalizing by splitting, query the concrete TTI for the cost // of casting the original vector twice. We also need to factor in the // cost of the split itself. Count that as 1, to be consistent with - // TLI->getTypeLegalizationCost(). + // getTypeLegalizationCost(). bool SplitSrc = TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == TargetLowering::TypeSplitVector; @@ -1065,7 +1128,9 @@ public: // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. - return getScalarizationOverhead(DstVTy, true, true) + Num * Cost; + return getScalarizationOverhead(DstVTy, /*Insert*/ true, /*Extract*/ true, + CostKind) + + Num * Cost; } // We already handled vector-to-vector and scalar-to-scalar conversions. @@ -1074,8 +1139,12 @@ public: // that the conversion is scalarized in one way or another. if (Opcode == Instruction::BitCast) { // Illegal bitcasts are done by storing and loading from a stack slot. - return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) + - (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0); + return (SrcVTy ? getScalarizationOverhead(SrcVTy, /*Insert*/ false, + /*Extract*/ true, CostKind) + : 0) + + (DstVTy ? getScalarizationOverhead(DstVTy, /*Insert*/ true, + /*Extract*/ false, CostKind) + : 0); } llvm_unreachable("Unhandled cast"); @@ -1083,11 +1152,11 @@ public: InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) { + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy, - Index) + + CostKind, Index, nullptr, nullptr) + thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(), - TTI::CastContextHint::None, - TTI::TCK_RecipThroughput); + TTI::CastContextHint::None, CostKind); } InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, @@ -1114,8 +1183,7 @@ public: if (CondTy->isVectorTy()) ISD = ISD::VSELECT; } - std::pair<InstructionCost, MVT> LT = - TLI->getTypeLegalizationCost(DL, ValTy); + std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy); if (!(ValTy->isVectorTy() && !LT.second.isVector()) && !TLI->isOperationExpand(ISD, LT.second)) { @@ -1139,7 +1207,9 @@ public: // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. - return getScalarizationOverhead(ValVTy, true, false) + Num * Cost; + return getScalarizationOverhead(ValVTy, /*Insert*/ true, + /*Extract*/ false, CostKind) + + Num * Cost; } // Unknown scalar opcode. @@ -1147,11 +1217,22 @@ public: } InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { - std::pair<InstructionCost, MVT> LT = - getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); + TTI::TargetCostKind CostKind, + unsigned Index, Value *Op0, Value *Op1) { + return getRegUsageForType(Val->getScalarType()); + } - return LT.first; + InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, + TTI::TargetCostKind CostKind, + unsigned Index) { + Value *Op0 = nullptr; + Value *Op1 = nullptr; + if (auto *IE = dyn_cast<InsertElementInst>(&I)) { + Op0 = IE->getOperand(0); + Op1 = IE->getOperand(1); + } + return thisT()->getVectorInstrCost(I.getOpcode(), Val, CostKind, Index, Op0, + Op1); } InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, @@ -1179,24 +1260,24 @@ public: APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF); Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts, /*Insert*/ false, - /*Extract*/ true); - Cost += - thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts, - /*Insert*/ true, /*Extract*/ false); + /*Extract*/ true, CostKind); + Cost += thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts, + /*Insert*/ true, + /*Extract*/ false, CostKind); return Cost; } - InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, - MaybeAlign Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) { + InstructionCost + getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, + unsigned AddressSpace, TTI::TargetCostKind CostKind, + TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr) { assert(!Src->isVoidTy() && "Invalid type"); // Assume types, such as structs, are expensive. if (getTLI()->getValueType(DL, Src, true) == MVT::Other) return 4; - std::pair<InstructionCost, MVT> LT = - getTLI()->getTypeLegalizationCost(DL, Src); + std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src); // Assuming that all loads of legal types cost 1. InstructionCost Cost = LT.first; @@ -1223,9 +1304,9 @@ public: if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { // This is a vector load/store for some illegal type that is scalarized. // We must account for the cost of building or decomposing the vector. - Cost += getScalarizationOverhead(cast<VectorType>(Src), - Opcode != Instruction::Store, - Opcode == Instruction::Store); + Cost += getScalarizationOverhead( + cast<VectorType>(Src), Opcode != Instruction::Store, + Opcode == Instruction::Store, CostKind); } } @@ -1276,7 +1357,7 @@ public: // Legalize the vector type, and get the legalized and unlegalized type // sizes. - MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; + MVT VecTyLT = getTypeLegalizationCost(VecTy).second; unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy); unsigned VecTyLTSize = VecTyLT.getStoreSize(); @@ -1337,13 +1418,13 @@ public: // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 // The cost is estimated as extract elements at 0, 2, 4, 6 from the // <8 x i32> vector and insert them into a <4 x i32> vector. - InstructionCost InsSubCost = - thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts, - /*Insert*/ true, /*Extract*/ false); + InstructionCost InsSubCost = thisT()->getScalarizationOverhead( + SubVT, DemandedAllSubElts, + /*Insert*/ true, /*Extract*/ false, CostKind); Cost += Indices.size() * InsSubCost; - Cost += - thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts, - /*Insert*/ false, /*Extract*/ true); + Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts, + /*Insert*/ false, + /*Extract*/ true, CostKind); } else { // The interleave cost is extract elements from sub vectors, and // insert them into the wide vector. @@ -1358,13 +1439,13 @@ public: // The cost is estimated as extract all elements (of actual members, // excluding gaps) from both <4 x i32> vectors and insert into the <12 x // i32> vector. - InstructionCost ExtSubCost = - thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts, - /*Insert*/ false, /*Extract*/ true); + InstructionCost ExtSubCost = thisT()->getScalarizationOverhead( + SubVT, DemandedAllSubElts, + /*Insert*/ false, /*Extract*/ true, CostKind); Cost += ExtSubCost * Indices.size(); Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts, /*Insert*/ true, - /*Extract*/ false); + /*Extract*/ false, CostKind); } if (!UseMaskForCond) @@ -1440,13 +1521,13 @@ public: break; case Intrinsic::cttz: // FIXME: If necessary, this should go in target-specific overrides. - if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz()) + if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz(RetTy)) return TargetTransformInfo::TCC_Basic; break; case Intrinsic::ctlz: // FIXME: If necessary, this should go in target-specific overrides. - if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz()) + if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz(RetTy)) return TargetTransformInfo::TCC_Basic; break; @@ -1480,9 +1561,9 @@ public: if (isa<ScalableVectorType>(RetTy)) return BaseT::getIntrinsicInstrCost(ICA, CostKind); unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue(); - return thisT()->getShuffleCost(TTI::SK_ExtractSubvector, - cast<VectorType>(Args[0]->getType()), None, - Index, cast<VectorType>(RetTy)); + return thisT()->getShuffleCost( + TTI::SK_ExtractSubvector, cast<VectorType>(Args[0]->getType()), + std::nullopt, CostKind, Index, cast<VectorType>(RetTy)); } case Intrinsic::vector_insert: { // FIXME: Handle case where a scalable vector is inserted into a scalable @@ -1491,19 +1572,19 @@ public: return BaseT::getIntrinsicInstrCost(ICA, CostKind); unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue(); return thisT()->getShuffleCost( - TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), None, - Index, cast<VectorType>(Args[1]->getType())); + TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), + std::nullopt, CostKind, Index, cast<VectorType>(Args[1]->getType())); } case Intrinsic::experimental_vector_reverse: { - return thisT()->getShuffleCost(TTI::SK_Reverse, - cast<VectorType>(Args[0]->getType()), None, - 0, cast<VectorType>(RetTy)); + return thisT()->getShuffleCost( + TTI::SK_Reverse, cast<VectorType>(Args[0]->getType()), std::nullopt, + CostKind, 0, cast<VectorType>(RetTy)); } case Intrinsic::experimental_vector_splice: { unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue(); - return thisT()->getShuffleCost(TTI::SK_Splice, - cast<VectorType>(Args[0]->getType()), None, - Index, cast<VectorType>(RetTy)); + return thisT()->getShuffleCost( + TTI::SK_Splice, cast<VectorType>(Args[0]->getType()), std::nullopt, + CostKind, Index, cast<VectorType>(RetTy)); } case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: @@ -1530,13 +1611,14 @@ public: const Value *X = Args[0]; const Value *Y = Args[1]; const Value *Z = Args[2]; - TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; - TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); - TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); - TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); - TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; - OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 - : TTI::OP_None; + const TTI::OperandValueInfo OpInfoX = TTI::getOperandInfo(X); + const TTI::OperandValueInfo OpInfoY = TTI::getOperandInfo(Y); + const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(Z); + const TTI::OperandValueInfo OpInfoBW = + {TTI::OK_UniformConstantValue, + isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 + : TTI::OP_None}; + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) InstructionCost Cost = 0; @@ -1545,15 +1627,15 @@ public: Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind); Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX); + BinaryOperator::Shl, RetTy, CostKind, OpInfoX, + {OpInfoZ.Kind, TTI::OP_None}); Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY); + BinaryOperator::LShr, RetTy, CostKind, OpInfoY, + {OpInfoZ.Kind, TTI::OP_None}); // Non-constant shift amounts requires a modulo. - if (OpKindZ != TTI::OK_UniformConstantValue && - OpKindZ != TTI::OK_NonUniformConstantValue) + if (!OpInfoZ.isConstant()) Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy, - CostKind, OpKindZ, OpKindBW, - OpPropsZ, OpPropsBW); + CostKind, OpInfoZ, OpInfoBW); // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { Type *CondTy = RetTy->getWithNewBitWidth(1); @@ -1573,9 +1655,7 @@ public: // If we're not expanding the intrinsic then we assume this is cheap // to implement. if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) { - std::pair<InstructionCost, MVT> LT = - getTLI()->getTypeLegalizationCost(DL, RetTy); - return LT.first; + return getTypeLegalizationCost(RetTy).first; } // Create the expanded types that will be used to calculate the uadd_sat @@ -1598,10 +1678,11 @@ public: if (RetVF.isVector() && !RetVF.isScalable()) { ScalarizationCost = 0; if (!RetTy->isVoidTy()) - ScalarizationCost += - getScalarizationOverhead(cast<VectorType>(RetTy), true, false); + ScalarizationCost += getScalarizationOverhead( + cast<VectorType>(RetTy), + /*Insert*/ true, /*Extract*/ false, CostKind); ScalarizationCost += - getOperandsScalarizationOverhead(Args, ICA.getArgTypes()); + getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind); } IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I, @@ -1653,7 +1734,8 @@ public: Type *ScalarRetTy = RetTy; if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) { if (!SkipScalarizationCost) - ScalarizationCost = getScalarizationOverhead(RetVTy, true, false); + ScalarizationCost = getScalarizationOverhead( + RetVTy, /*Insert*/ true, /*Extract*/ false, CostKind); ScalarCalls = std::max(ScalarCalls, cast<FixedVectorType>(RetVTy)->getNumElements()); ScalarRetTy = RetTy->getScalarType(); @@ -1663,7 +1745,8 @@ public: Type *Ty = Tys[i]; if (auto *VTy = dyn_cast<VectorType>(Ty)) { if (!SkipScalarizationCost) - ScalarizationCost += getScalarizationOverhead(VTy, false, true); + ScalarizationCost += getScalarizationOverhead( + VTy, /*Insert*/ false, /*Extract*/ true, CostKind); ScalarCalls = std::max(ScalarCalls, cast<FixedVectorType>(VTy)->getNumElements()); Ty = Ty->getScalarType(); @@ -1780,19 +1863,19 @@ public: } case Intrinsic::vector_reduce_add: return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy, - None, CostKind); + std::nullopt, CostKind); case Intrinsic::vector_reduce_mul: return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy, - None, CostKind); + std::nullopt, CostKind); case Intrinsic::vector_reduce_and: return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy, - None, CostKind); + std::nullopt, CostKind); case Intrinsic::vector_reduce_or: - return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, None, - CostKind); + return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, + std::nullopt, CostKind); case Intrinsic::vector_reduce_xor: return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy, - None, CostKind); + std::nullopt, CostKind); case Intrinsic::vector_reduce_fadd: return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy, FMF, CostKind); @@ -1822,7 +1905,7 @@ public: Pred, CostKind); // TODO: Should we add an OperandValueProperties::OP_Zero property? Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue); + BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); return Cost; } case Intrinsic::smax: @@ -1897,11 +1980,12 @@ public: Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, CCH, CostKind); Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, TTI::OK_AnyValue, - TTI::OK_UniformConstantValue); + CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - TTI::OK_AnyValue, - TTI::OK_UniformConstantValue); + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } @@ -1962,13 +2046,15 @@ public: Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, CCH, CostKind); Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, TTI::OK_AnyValue, - TTI::OK_UniformConstantValue); + CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); if (IsSigned) Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, - CostKind, TTI::OK_AnyValue, - TTI::OK_UniformConstantValue); + CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); Cost += thisT()->getCmpSelInstrCost( BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); @@ -2021,8 +2107,7 @@ public: } const TargetLoweringBase *TLI = getTLI(); - std::pair<InstructionCost, MVT> LT = - TLI->getTypeLegalizationCost(DL, RetTy); + std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy); if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && @@ -2071,8 +2156,10 @@ public: return InstructionCost::getInvalid(); InstructionCost ScalarizationCost = - SkipScalarizationCost ? ScalarizationCostPassed - : getScalarizationOverhead(RetVTy, true, false); + SkipScalarizationCost + ? ScalarizationCostPassed + : getScalarizationOverhead(RetVTy, /*Insert*/ true, + /*Extract*/ false, CostKind); unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements(); SmallVector<Type *, 4> ScalarTys; @@ -2088,7 +2175,8 @@ public: for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { if (auto *VTy = dyn_cast<VectorType>(Tys[i])) { if (!ICA.skipScalarizationCost()) - ScalarizationCost += getScalarizationOverhead(VTy, false, true); + ScalarizationCost += getScalarizationOverhead( + VTy, /*Insert*/ false, /*Extract*/ true, CostKind); ScalarCalls = std::max(ScalarCalls, cast<FixedVectorType>(VTy)->getNumElements()); } @@ -2118,8 +2206,7 @@ public: } unsigned getNumberOfParts(Type *Tp) { - std::pair<InstructionCost, MVT> LT = - getTLI()->getTypeLegalizationCost(DL, Tp); + std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp); return LT.first.isValid() ? *LT.first.getValue() : 0; } @@ -2177,16 +2264,16 @@ public: unsigned NumReduxLevels = Log2_32(NumVecElts); InstructionCost ArithCost = 0; InstructionCost ShuffleCost = 0; - std::pair<InstructionCost, MVT> LT = - thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); + std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty); unsigned LongVectorCount = 0; unsigned MVTLen = LT.second.isVector() ? LT.second.getVectorNumElements() : 1; while (NumVecElts > MVTLen) { NumVecElts /= 2; VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); - ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, - NumVecElts, SubTy); + ShuffleCost += + thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, + CostKind, NumVecElts, SubTy); ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind); Ty = SubTy; ++LongVectorCount; @@ -2200,12 +2287,14 @@ public: // architecture-dependent length. // By default reductions need one shuffle per reduction level. - ShuffleCost += NumReduxLevels * thisT()->getShuffleCost( - TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty); + ShuffleCost += + NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, + std::nullopt, CostKind, 0, Ty); ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind); return ShuffleCost + ArithCost + - thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); + thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, + CostKind, 0, nullptr, nullptr); } /// Try to calculate the cost of performing strict (in-order) reductions, @@ -2232,8 +2321,8 @@ public: return InstructionCost::getInvalid(); auto *VTy = cast<FixedVectorType>(Ty); - InstructionCost ExtractCost = - getScalarizationOverhead(VTy, /*Insert=*/false, /*Extract=*/true); + InstructionCost ExtractCost = getScalarizationOverhead( + VTy, /*Insert=*/false, /*Extract=*/true, CostKind); InstructionCost ArithCost = thisT()->getArithmeticInstrCost( Opcode, VTy->getElementType(), CostKind); ArithCost *= VTy->getNumElements(); @@ -2242,7 +2331,7 @@ public: } InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - Optional<FastMathFlags> FMF, + std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) { if (TTI::requiresOrderedReduction(FMF)) return getOrderedReductionCost(Opcode, Ty, CostKind); @@ -2273,8 +2362,7 @@ public: } InstructionCost MinMaxCost = 0; InstructionCost ShuffleCost = 0; - std::pair<InstructionCost, MVT> LT = - thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); + std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty); unsigned LongVectorCount = 0; unsigned MVTLen = LT.second.isVector() ? LT.second.getVectorNumElements() : 1; @@ -2283,8 +2371,9 @@ public: auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts); - ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, - NumVecElts, SubTy); + ShuffleCost += + thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, + CostKind, NumVecElts, SubTy); MinMaxCost += thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CmpInst::BAD_ICMP_PREDICATE, CostKind) + @@ -2300,8 +2389,9 @@ public: // operations performed on the current platform. That's why several final // reduction opertions are perfomed on the vectors with the same // architecture-dependent length. - ShuffleCost += NumReduxLevels * thisT()->getShuffleCost( - TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty); + ShuffleCost += + NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, + std::nullopt, CostKind, 0, Ty); MinMaxCost += NumReduxLevels * (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, @@ -2311,28 +2401,43 @@ public: // The last min/max should be in vector registers and we counted it above. // So just need a single extractelement. return ShuffleCost + MinMaxCost + - thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); + thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, + CostKind, 0, nullptr, nullptr); } - InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, - Type *ResTy, VectorType *Ty, - TTI::TargetCostKind CostKind) { + InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, + Type *ResTy, VectorType *Ty, + std::optional<FastMathFlags> FMF, + TTI::TargetCostKind CostKind) { + // Without any native support, this is equivalent to the cost of + // vecreduce.opcode(ext(Ty A)). + VectorType *ExtTy = VectorType::get(ResTy, Ty); + InstructionCost RedCost = + thisT()->getArithmeticReductionCost(Opcode, ExtTy, FMF, CostKind); + InstructionCost ExtCost = thisT()->getCastInstrCost( + IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty, + TTI::CastContextHint::None, CostKind); + + return RedCost + ExtCost; + } + + InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, + VectorType *Ty, + TTI::TargetCostKind CostKind) { // Without any native support, this is equivalent to the cost of - // vecreduce.add(ext) or if IsMLA vecreduce.add(mul(ext, ext)) + // vecreduce.add(mul(ext(Ty A), ext(Ty B))) or + // vecreduce.add(mul(A, B)). VectorType *ExtTy = VectorType::get(ResTy, Ty); InstructionCost RedCost = thisT()->getArithmeticReductionCost( - Instruction::Add, ExtTy, None, CostKind); - InstructionCost MulCost = 0; + Instruction::Add, ExtTy, std::nullopt, CostKind); InstructionCost ExtCost = thisT()->getCastInstrCost( IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty, TTI::CastContextHint::None, CostKind); - if (IsMLA) { - MulCost = - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - ExtCost *= 2; - } - return RedCost + MulCost + ExtCost; + InstructionCost MulCost = + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + + return RedCost + MulCost + 2 * ExtCost; } InstructionCost getVectorSplitCost() { return 1; } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h index 5c3776e972c0..005cfd269e3c 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -52,15 +52,16 @@ public: }; private: + // Holds one of: + // - the register that the value is assigned to; + // - the memory offset at which the value resides; + // - additional information about pending location; the exact interpretation + // of the data is target-dependent. + std::variant<Register, int64_t, unsigned> Data; + /// ValNo - This is the value number being assigned (e.g. an argument number). unsigned ValNo; - /// Loc is either a stack offset or a register number. - unsigned Loc; - - /// isMem - True if this is a memory loc, false if it is a register loc. - unsigned isMem : 1; - /// isCustom - True if this arg/retval requires special handling. unsigned isCustom : 1; @@ -72,82 +73,60 @@ private: /// LocVT - The type of the location being assigned to. MVT LocVT; -public: - static CCValAssign getReg(unsigned ValNo, MVT ValVT, - unsigned RegNo, MVT LocVT, - LocInfo HTP) { - CCValAssign Ret; - Ret.ValNo = ValNo; - Ret.Loc = RegNo; - Ret.isMem = false; - Ret.isCustom = false; - Ret.HTP = HTP; - Ret.ValVT = ValVT; - Ret.LocVT = LocVT; - return Ret; + CCValAssign(LocInfo HTP, unsigned ValNo, MVT ValVT, MVT LocVT, bool IsCustom) + : ValNo(ValNo), isCustom(IsCustom), HTP(HTP), ValVT(ValVT), LocVT(LocVT) { } - static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, - unsigned RegNo, MVT LocVT, - LocInfo HTP) { - CCValAssign Ret; - Ret = getReg(ValNo, ValVT, RegNo, LocVT, HTP); - Ret.isCustom = true; +public: + static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, + MVT LocVT, LocInfo HTP, bool IsCustom = false) { + CCValAssign Ret(HTP, ValNo, ValVT, LocVT, IsCustom); + Ret.Data = Register(RegNo); return Ret; } - static CCValAssign getMem(unsigned ValNo, MVT ValVT, - unsigned Offset, MVT LocVT, - LocInfo HTP) { - CCValAssign Ret; - Ret.ValNo = ValNo; - Ret.Loc = Offset; - Ret.isMem = true; - Ret.isCustom = false; - Ret.HTP = HTP; - Ret.ValVT = ValVT; - Ret.LocVT = LocVT; - return Ret; + static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, + MVT LocVT, LocInfo HTP) { + return getReg(ValNo, ValVT, RegNo, LocVT, HTP, /*IsCustom=*/true); } - static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, - unsigned Offset, MVT LocVT, - LocInfo HTP) { - CCValAssign Ret; - Ret = getMem(ValNo, ValVT, Offset, LocVT, HTP); - Ret.isCustom = true; + static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, + MVT LocVT, LocInfo HTP, bool IsCustom = false) { + CCValAssign Ret(HTP, ValNo, ValVT, LocVT, IsCustom); + Ret.Data = int64_t(Offset); return Ret; } - // There is no need to differentiate between a pending CCValAssign and other - // kinds, as they are stored in a different list. + static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, + MVT LocVT, LocInfo HTP) { + return getMem(ValNo, ValVT, Offset, LocVT, HTP, /*IsCustom=*/true); + } + static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo = 0) { - return getReg(ValNo, ValVT, ExtraInfo, LocVT, HTP); + CCValAssign Ret(HTP, ValNo, ValVT, LocVT, false); + Ret.Data = ExtraInfo; + return Ret; } - void convertToReg(unsigned RegNo) { - Loc = RegNo; - isMem = false; - } + void convertToReg(unsigned RegNo) { Data = Register(RegNo); } - void convertToMem(unsigned Offset) { - Loc = Offset; - isMem = true; - } + void convertToMem(unsigned Offset) { Data = int64_t(Offset); } unsigned getValNo() const { return ValNo; } MVT getValVT() const { return ValVT; } - bool isRegLoc() const { return !isMem; } - bool isMemLoc() const { return isMem; } + bool isRegLoc() const { return std::holds_alternative<Register>(Data); } + bool isMemLoc() const { return std::holds_alternative<int64_t>(Data); } + bool isPendingLoc() const { return std::holds_alternative<unsigned>(Data); } bool needsCustom() const { return isCustom; } - Register getLocReg() const { assert(isRegLoc()); return Loc; } - unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; } - unsigned getExtraInfo() const { return Loc; } + Register getLocReg() const { return std::get<Register>(Data); } + unsigned getLocMemOffset() const { return std::get<int64_t>(Data); } + unsigned getExtraInfo() const { return std::get<unsigned>(Data); } + MVT getLocVT() const { return LocVT; } LocInfo getLocInfo() const { return HTP; } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenCommonISel.h index ce278468dffc..3b11c840256d 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenCommonISel.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenCommonISel.h @@ -220,6 +220,12 @@ findSplitPointForStackProtector(MachineBasicBlock *BB, /// test. unsigned getInvertedFPClassTest(unsigned Test); +/// Assuming the instruction \p MI is going to be deleted, attempt to salvage +/// debug users of \p MI by writing the effect of \p MI in a DIExpression. +void salvageDebugInfoForDbgValue(const MachineRegisterInfo &MRI, + MachineInstr &MI, + ArrayRef<MachineOperand *> DbgUsers); + } // namespace llvm #endif // LLVM_CODEGEN_CODEGENCOMMONISEL_H diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index f4b1980b9ede..9ac51ed9f6fa 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -19,8 +19,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CFLAndersAliasAnalysis.h" -#include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" @@ -29,9 +27,9 @@ #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/UnreachableBlockElim.h" -#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Verifier.h" +#include "llvm/IRPrinter/IRPrintingPasses.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/CodeGen.h" @@ -191,7 +189,7 @@ protected: // This special-casing introduces less adaptor passes. If we have the need // of adding module passes after function passes, we could change the // implementation to accommodate that. - Optional<bool> AddingFunctionPasses; + std::optional<bool> AddingFunctionPasses; }; // Function object to maintain state while adding codegen machine passes. @@ -483,27 +481,12 @@ Error CodeGenPassBuilder<Derived>::buildPipeline( return Error::success(); } -static inline AAManager registerAAAnalyses(CFLAAType UseCFLAA) { +static inline AAManager registerAAAnalyses() { AAManager AA; // The order in which these are registered determines their priority when // being queried. - switch (UseCFLAA) { - case CFLAAType::Steensgaard: - AA.registerFunctionAnalysis<CFLSteensAA>(); - break; - case CFLAAType::Andersen: - AA.registerFunctionAnalysis<CFLAndersAA>(); - break; - case CFLAAType::Both: - AA.registerFunctionAnalysis<CFLAndersAA>(); - AA.registerFunctionAnalysis<CFLSteensAA>(); - break; - default: - break; - } - // Basic AliasAnalysis support. // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help @@ -527,7 +510,7 @@ void CodeGenPassBuilder<Derived>::registerModuleAnalyses( template <typename Derived> void CodeGenPassBuilder<Derived>::registerFunctionAnalyses( FunctionAnalysisManager &FAM) const { - FAM.registerPass([this] { return registerAAAnalyses(this->Opt.UseCFLAA); }); + FAM.registerPass([this] { return registerAAAnalyses(); }); #define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ FAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; }); @@ -685,7 +668,7 @@ void CodeGenPassBuilder<Derived>::addPassesToHandleExceptions( // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. addPass(SjLjEHPreparePass()); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: case ExceptionHandling::AIX: @@ -926,6 +909,7 @@ Error CodeGenPassBuilder<Derived>::addMachinePasses( addPass(StackMapLivenessPass()); addPass(LiveDebugValuesPass()); + addPass(MachineSanitizerBinaryMetadata()); if (TM.Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && Opt.EnableMachineOutliner != RunOutliner::NeverOutline) { @@ -1130,6 +1114,9 @@ void CodeGenPassBuilder<Derived>::addMachineLateOptimization( if (!TM.requiresStructuredCFG()) addPass(TailDuplicatePass()); + // Cleanup of redundant (identical) address/immediate loads. + addPass(MachineLateInstrsCleanupPass()); + // Copy propagation. addPass(MachineCopyPropagationPass()); } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.h index 9281ed723854..7b1ef60912f1 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.h @@ -18,6 +18,7 @@ #include "llvm/ADT/FloatingPointMode.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" +#include <optional> #include <string> #include <vector> @@ -37,16 +38,16 @@ std::string getMCPU(); std::vector<std::string> getMAttrs(); Reloc::Model getRelocModel(); -Optional<Reloc::Model> getExplicitRelocModel(); +std::optional<Reloc::Model> getExplicitRelocModel(); ThreadModel::Model getThreadModel(); CodeModel::Model getCodeModel(); -Optional<CodeModel::Model> getExplicitCodeModel(); +std::optional<CodeModel::Model> getExplicitCodeModel(); llvm::ExceptionHandling getExceptionModel(); -Optional<CodeGenFileType> getExplicitFileType(); +std::optional<CodeGenFileType> getExplicitFileType(); CodeGenFileType getFileType(); @@ -98,10 +99,10 @@ bool getLowerGlobalDtorsViaCxaAtExit(); bool getRelaxELFRelocations(); bool getDataSections(); -Optional<bool> getExplicitDataSections(); +std::optional<bool> getExplicitDataSections(); bool getFunctionSections(); -Optional<bool> getExplicitFunctionSections(); +std::optional<bool> getExplicitFunctionSections(); bool getIgnoreXCOFFVisibility(); @@ -132,7 +133,7 @@ bool getEnableMachineFunctionSplitter(); bool getEnableDebugEntryValues(); bool getValueTrackingVariableLocations(); -Optional<bool> getExplicitValueTrackingVariableLocations(); +std::optional<bool> getExplicitValueTrackingVariableLocations(); bool getForceDwarfFrameSection(); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h new file mode 100644 index 000000000000..99df6e5ad1d7 --- /dev/null +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h @@ -0,0 +1,53 @@ +//===- ComplexDeinterleavingPass.h - Complex Deinterleaving Pass *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements generation of target-specific intrinsics to support +// handling of complex number arithmetic and deinterleaving. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_COMPLEXDEINTERLEAVING_H +#define LLVM_CODEGEN_COMPLEXDEINTERLEAVING_H + +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" + +namespace llvm { + +class Function; +class TargetMachine; + +struct ComplexDeinterleavingPass + : public PassInfoMixin<ComplexDeinterleavingPass> { +private: + TargetMachine *TM; + +public: + ComplexDeinterleavingPass(TargetMachine *TM) : TM(TM) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +enum class ComplexDeinterleavingOperation { + CAdd, + CMulPartial, + // The following 'operations' are used to represent internal states. Backends + // are not expected to try and support these in any capacity. + Shuffle +}; + +enum class ComplexDeinterleavingRotation { + Rotation_0 = 0, + Rotation_90 = 1, + Rotation_180 = 2, + Rotation_270 = 3, +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_COMPLEXDEINTERLEAVING_H diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CostTable.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/CostTable.h index fc4648c25224..d23f79d94a7a 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/CostTable.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CostTable.h @@ -47,7 +47,7 @@ template <size_t N, class CostType> inline const CostTblEntryT<CostType> * CostTableLookup(const CostTblEntryT<CostType> (&Table)[N], int ISD, MVT Ty) { // Wrapper to fix template argument deduction failures. - return CostTableLookup<CostType>(makeArrayRef(Table), ISD, Ty); + return CostTableLookup<CostType>(Table, ISD, Ty); } /// Type Conversion Cost Table @@ -81,7 +81,7 @@ inline const TypeConversionCostTblEntryT<CostType> * ConvertCostTableLookup(const TypeConversionCostTblEntryT<CostType> (&Table)[N], int ISD, MVT Dst, MVT Src) { // Wrapper to fix template argument deduction failures. - return ConvertCostTableLookup<CostType>(makeArrayRef(Table), ISD, Dst, Src); + return ConvertCostTableLookup<CostType>(Table, ISD, Dst, Src); } } // namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h index 465829159e42..0cfe04af6f9e 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h @@ -12,6 +12,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineInstr.h" #include <utility> namespace llvm { @@ -20,7 +21,6 @@ class DILocation; class LexicalScopes; class DINode; class MachineFunction; -class MachineInstr; class TargetRegisterInfo; /// Record instruction ordering so we can query their relative positions within diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/DebugHandlerBase.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/DebugHandlerBase.h index 45823b2ba349..1b7355f5d9fb 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/DebugHandlerBase.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/DebugHandlerBase.h @@ -14,12 +14,12 @@ #ifndef LLVM_CODEGEN_DEBUGHANDLERBASE_H #define LLVM_CODEGEN_DEBUGHANDLERBASE_H -#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/AsmPrinterHandler.h" #include "llvm/CodeGen/DbgEntityHistoryCalculator.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" +#include <optional> namespace llvm { @@ -37,14 +37,14 @@ struct DbgVariableLocation { SmallVector<int64_t, 1> LoadChain; /// Present if the location is part of a larger variable. - llvm::Optional<llvm::DIExpression::FragmentInfo> FragmentInfo; + std::optional<llvm::DIExpression::FragmentInfo> FragmentInfo; /// Extract a VariableLocation from a MachineInstr. /// This will only work if Instruction is a debug value instruction /// and the associated DIExpression is in one of the supported forms. /// If these requirements are not met, the returned Optional will not /// have a value. - static Optional<DbgVariableLocation> + static std::optional<DbgVariableLocation> extractFromMachineInstruction(const MachineInstr &Instruction); }; @@ -72,6 +72,9 @@ protected: /// function body. DebugLoc PrologEndLoc; + /// This block includes epilogue instructions. + const MachineBasicBlock *EpilogBeginBlock; + /// If nonnull, stores the current machine instruction we're processing. const MachineInstr *CurMI = nullptr; @@ -123,8 +126,8 @@ public: void beginFunction(const MachineFunction *MF) override; void endFunction(const MachineFunction *MF) override; - void beginBasicBlock(const MachineBasicBlock &MBB) override; - void endBasicBlock(const MachineBasicBlock &MBB) override; + void beginBasicBlockSection(const MachineBasicBlock &MBB) override; + void endBasicBlockSection(const MachineBasicBlock &MBB) override; /// Return Label preceding the instruction. MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h index 8be97d2c2095..d25776e5eff1 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" @@ -204,7 +205,7 @@ protected: MachineRegisterInfo &MRI; MachineFrameInfo &MFI; MachineConstantPool &MCP; - DebugLoc DbgLoc; + MIMetadata MIMD; const TargetMachine &TM; const DataLayout &DL; const TargetInstrInfo &TII; @@ -212,7 +213,6 @@ protected: const TargetRegisterInfo &TRI; const TargetLibraryInfo *LibInfo; bool SkipTargetIndependentISel; - bool UseInstrRefDebugInfo = false; /// The position of the last instruction for materializing constants /// for use in the current block. It resets to EmitStartPt when it makes sense @@ -247,7 +247,7 @@ public: void finishBasicBlock(); /// Return current debug location information. - DebugLoc getCurDebugLoc() const { return DbgLoc; } + DebugLoc getCurDebugLoc() const { return MIMD.getDL(); } /// Do "fast" instruction selection for function arguments and append /// the machine instructions to the current block. Returns true when @@ -319,12 +319,6 @@ public: /// Reset InsertPt to the given old insert position. void leaveLocalValueArea(SavePoint Old); - /// Signal whether instruction referencing variable locations are desired for - /// this function's debug-info. - void useInstrRefDebugInfo(bool Flag) { - UseInstrRefDebugInfo = Flag; - } - protected: explicit FastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h index 4d9694347f17..488df12f13f8 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h @@ -70,7 +70,7 @@ class CSEMIRBuilder : public MachineIRBuilder { void profileMBBOpcode(GISelInstProfileBuilder &B, unsigned Opc) const; void profileEverything(unsigned Opc, ArrayRef<DstOp> DstOps, - ArrayRef<SrcOp> SrcOps, Optional<unsigned> Flags, + ArrayRef<SrcOp> SrcOps, std::optional<unsigned> Flags, GISelInstProfileBuilder &B) const; // Takes a MachineInstrBuilder and inserts it into the CSEMap using the @@ -91,9 +91,9 @@ public: // Pull in base class constructors. using MachineIRBuilder::MachineIRBuilder; // Unhide buildInstr - MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, - ArrayRef<SrcOp> SrcOps, - Optional<unsigned> Flag = None) override; + MachineInstrBuilder + buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, ArrayRef<SrcOp> SrcOps, + std::optional<unsigned> Flag = std::nullopt) override; // Bring in the other overload from the base class. using MachineIRBuilder::buildConstant; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 9bf1c134618c..de2d6876e6b7 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -144,6 +144,9 @@ public: /// The stack index for sret demotion. int DemoteStackIndex; + + /// Expected type identifier for indirect calls with a CFI check. + const ConstantInt *CFIType = nullptr; }; /// Argument handling is mostly uniform between the four places that @@ -349,6 +352,9 @@ protected: ISD::ArgFlagsTy getAttributesForArgIdx(const CallBase &Call, unsigned ArgIdx) const; + /// \returns Flags corresponding to the attributes on the return from \p Call. + ISD::ArgFlagsTy getAttributesForReturn(const CallBase &Call) const; + /// Adds flags to \p Flags based off of the attributes in \p Attrs. /// \p OpIdx is the index in \p Attrs to add flags from. void addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, @@ -389,21 +395,20 @@ protected: /// \p Handler to move them to the assigned locations. /// /// \return True if everything has succeeded, false otherwise. - bool - determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, - SmallVectorImpl<ArgInfo> &Args, - MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, bool IsVarArg, - ArrayRef<Register> ThisReturnRegs = None) const; + bool determineAndHandleAssignments( + ValueHandler &Handler, ValueAssigner &Assigner, + SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder, + CallingConv::ID CallConv, bool IsVarArg, + ArrayRef<Register> ThisReturnRegs = std::nullopt) const; /// Use \p Handler to insert code to handle the argument/return values /// represented by \p Args. It's expected determineAssignments previously /// processed these arguments to populate \p CCState and \p ArgLocs. - bool handleAssignments(ValueHandler &Handler, SmallVectorImpl<ArgInfo> &Args, - CCState &CCState, - SmallVectorImpl<CCValAssign> &ArgLocs, - MachineIRBuilder &MIRBuilder, - ArrayRef<Register> ThisReturnRegs = None) const; + bool + handleAssignments(ValueHandler &Handler, SmallVectorImpl<ArgInfo> &Args, + CCState &CCState, SmallVectorImpl<CCValAssign> &ArgLocs, + MachineIRBuilder &MIRBuilder, + ArrayRef<Register> ThisReturnRegs = std::nullopt) const; /// Check whether parameters to a call that are passed in callee saved /// registers are the same as from the calling function. This needs to be diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 73edc3c37970..9dc1abbcfa40 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Register.h" #include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/IR/InstrTypes.h" #include <functional> namespace llvm { @@ -112,12 +113,14 @@ protected: GISelChangeObserver &Observer; GISelKnownBits *KB; MachineDominatorTree *MDT; + bool IsPreLegalize; const LegalizerInfo *LI; const RegisterBankInfo *RBI; const TargetRegisterInfo *TRI; public: CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, + bool IsPreLegalize, GISelKnownBits *KB = nullptr, MachineDominatorTree *MDT = nullptr, const LegalizerInfo *LI = nullptr); @@ -126,6 +129,10 @@ public: return KB; } + MachineIRBuilder &getBuilder() const { + return Builder; + } + const TargetLowering &getTargetLowering() const; /// \returns true if the combiner is running pre-legalization. @@ -352,16 +359,15 @@ public: /// Transform fp_instr(cst) to constant result of the fp operation. bool matchCombineConstantFoldFpUnary(MachineInstr &MI, - Optional<APFloat> &Cst); + std::optional<APFloat> &Cst); void applyCombineConstantFoldFpUnary(MachineInstr &MI, - Optional<APFloat> &Cst); + std::optional<APFloat> &Cst); /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); /// Transform PtrToInt(IntToPtr(x)) to x. - bool matchCombineP2IToI2P(MachineInstr &MI, Register &Reg); void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg); /// Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) @@ -388,11 +394,7 @@ public: void applyCombineExtOfExt(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo); - /// Transform fneg(fneg(x)) to x. - bool matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg); - - /// Match fabs(fabs(x)) to fabs(x). - bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src); + /// Transform fabs(fabs(x)) to fabs(x). void applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src); /// Transform fabs(fneg(x)) to fabs(x). @@ -404,12 +406,17 @@ public: void applyCombineTruncOfExt(MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo); - /// Transform trunc (shl x, K) to shl (trunc x), - /// K => K < VT.getScalarSizeInBits(). - bool matchCombineTruncOfShl(MachineInstr &MI, - std::pair<Register, Register> &MatchInfo); - void applyCombineTruncOfShl(MachineInstr &MI, - std::pair<Register, Register> &MatchInfo); + /// Transform trunc (shl x, K) to shl (trunc x), K + /// if K < VT.getScalarSizeInBits(). + /// + /// Transforms trunc ([al]shr x, K) to (trunc ([al]shr (MidVT (trunc x)), K)) + /// if K <= (MidVT.getScalarSizeInBits() - VT.getScalarSizeInBits()) + /// MidVT is obtained by finding a legal type between the trunc's src and dst + /// types. + bool matchCombineTruncOfShift(MachineInstr &MI, + std::pair<MachineInstr *, LLT> &MatchInfo); + void applyCombineTruncOfShift(MachineInstr &MI, + std::pair<MachineInstr *, LLT> &MatchInfo); /// Transform G_MUL(x, -1) to G_SUB(0, x) void applyCombineMulByNegativeOne(MachineInstr &MI); @@ -431,6 +438,9 @@ public: /// Return true if a G_SELECT instruction \p MI has an undef comparison. bool matchUndefSelectCmp(MachineInstr &MI); + /// Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index. + bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI); + /// Return true if a G_SELECT instruction \p MI has a constant comparison. If /// true, \p OpIdx will store the operand index of the known selected value. bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx); @@ -647,6 +657,13 @@ public: bool matchUDivByConst(MachineInstr &MI); void applyUDivByConst(MachineInstr &MI); + /// Given an G_SDIV \p MI expressing a signed divide by constant, return an + /// expression that implements it by multiplying by a magic number. + /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". + MachineInstr *buildSDivUsingMul(MachineInstr &MI); + bool matchSDivByConst(MachineInstr &MI); + void applySDivByConst(MachineInstr &MI); + // G_UMULH x, (1 << c)) -> x >> (bitwidth - c) bool matchUMulHToLShr(MachineInstr &MI); void applyUMulHToLShr(MachineInstr &MI); @@ -674,6 +691,11 @@ public: /// (G_*ADDO x, 0) -> x + no carry out bool matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Match: + /// (G_*ADDE x, y, 0) -> (G_*ADDO x, y) + /// (G_*SUBE x, y, 0) -> (G_*SUBO x, y) + bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Transform (fadd x, fneg(y)) -> (fsub x, y) /// (fadd fneg(x), y) -> (fsub y, x) /// (fsub x, fneg(y)) -> (fadd x, y) @@ -683,6 +705,9 @@ public: /// (fma fneg(x), fneg(y), z) -> (fma x, y, z) bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo); + void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo); + bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate = false); @@ -740,6 +765,30 @@ public: /// Transform G_ADD(G_SUB(y, x), x) to y. bool matchAddSubSameReg(MachineInstr &MI, Register &Src); + bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo); + bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo); + bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo); + + /// Transform: + /// (x + y) - y -> x + /// (x + y) - x -> y + /// x - (y + x) -> 0 - y + /// x - (x + z) -> 0 - z + bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// \returns true if it is possible to simplify a select instruction \p MI + /// to a min/max instruction of some sort. + bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Transform: + /// (X + Y) == X -> Y == 0 + /// (X - Y) == X -> Y == 0 + /// (X ^ Y) == X -> Y == 0 + /// (X + Y) != X -> Y != 0 + /// (X - Y) != X -> Y != 0 + /// (X ^ Y) != X -> Y != 0 + bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. @@ -761,7 +810,7 @@ private: /// \param [in] Root - The search root. /// /// \returns The Registers found during the search. - Optional<SmallVector<Register, 8>> + std::optional<SmallVector<Register, 8>> findCandidatesForLoadOrCombine(const MachineInstr *Root) const; /// Helper function for matchLoadOrCombine. @@ -775,7 +824,7 @@ private: /// /// \returns On success, a 3-tuple containing lowest-index load found, the /// lowest index, and the last load in the sequence. - Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>> + std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>> findLoadOffsetsForLoadOrCombine( SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, const SmallVector<Register, 8> &RegsToVisit, @@ -785,6 +834,49 @@ private: /// a re-association of its operands would break an existing legal addressing /// mode that the address computation currently represents. bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd); + + /// Behavior when a floating point min/max is given one NaN and one + /// non-NaN as input. + enum class SelectPatternNaNBehaviour { + NOT_APPLICABLE = 0, /// NaN behavior not applicable. + RETURNS_NAN, /// Given one NaN input, returns the NaN. + RETURNS_OTHER, /// Given one NaN input, returns the non-NaN. + RETURNS_ANY /// Given one NaN input, can return either (or both operands are + /// known non-NaN.) + }; + + /// \returns which of \p LHS and \p RHS would be the result of a non-equality + /// floating point comparison where one of \p LHS and \p RHS may be NaN. + /// + /// If both \p LHS and \p RHS may be NaN, returns + /// SelectPatternNaNBehaviour::NOT_APPLICABLE. + SelectPatternNaNBehaviour + computeRetValAgainstNaN(Register LHS, Register RHS, + bool IsOrderedComparison) const; + + /// Determines the floating point min/max opcode which should be used for + /// a G_SELECT fed by a G_FCMP with predicate \p Pred. + /// + /// \returns 0 if this G_SELECT should not be combined to a floating point + /// min or max. If it should be combined, returns one of + /// + /// * G_FMAXNUM + /// * G_FMAXIMUM + /// * G_FMINNUM + /// * G_FMINIMUM + /// + /// Helper function for matchFPSelectToMinMax. + unsigned getFPMinMaxOpcForSelect(CmpInst::Predicate Pred, LLT DstTy, + SelectPatternNaNBehaviour VsNaNRetVal) const; + + /// Handle floating point cases for matchSimplifySelectToMinMax. + /// + /// E.g. + /// + /// select (fcmp uge x, 1.0) x, 1.0 -> fmax x, 1.0 + /// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0 + bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal, + Register FalseVal, BuildFnTy &MatchInfo); }; } // namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h index 70945fcecfe5..b42deb01f8d0 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h @@ -76,7 +76,7 @@ public: // Removes an observer from the list and does nothing if observer is not // present. void removeObserver(GISelChangeObserver *O) { - auto It = std::find(Observers.begin(), Observers.end(), O); + auto It = llvm::find(Observers, O); if (It != Observers.end()) Observers.erase(It); } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 58fe48200e73..049efa672f5b 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -153,7 +153,7 @@ public: /// Represents G_BUILD_VECTOR, G_CONCAT_VECTORS or G_MERGE_VALUES. /// All these have the common property of generating a single value from /// multiple sources. -class GMergeLikeOp : public GenericMachineInstr { +class GMergeLikeInstr : public GenericMachineInstr { public: /// Returns the number of source registers. unsigned getNumSources() const { return getNumOperands() - 1; } @@ -173,7 +173,7 @@ public: }; /// Represents a G_MERGE_VALUES. -class GMerge : public GMergeLikeOp { +class GMerge : public GMergeLikeInstr { public: static bool classof(const MachineInstr *MI) { return MI->getOpcode() == TargetOpcode::G_MERGE_VALUES; @@ -181,7 +181,7 @@ public: }; /// Represents a G_CONCAT_VECTORS. -class GConcatVectors : public GMergeLikeOp { +class GConcatVectors : public GMergeLikeInstr { public: static bool classof(const MachineInstr *MI) { return MI->getOpcode() == TargetOpcode::G_CONCAT_VECTORS; @@ -189,7 +189,7 @@ public: }; /// Represents a G_BUILD_VECTOR. -class GBuildVector : public GMergeLikeOp { +class GBuildVector : public GMergeLikeInstr { public: static bool classof(const MachineInstr *MI) { return MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index e40f00433870..67e884038b47 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -34,6 +34,7 @@ namespace llvm { class AllocaInst; +class AssumptionCache; class BasicBlock; class CallInst; class CallLowering; @@ -47,6 +48,7 @@ class MachineInstr; class MachineRegisterInfo; class OptimizationRemarkEmitter; class PHINode; +class TargetLibraryInfo; class TargetPassConfig; class User; class Value; @@ -570,6 +572,8 @@ private: std::unique_ptr<OptimizationRemarkEmitter> ORE; AAResults *AA; + AssumptionCache *AC; + const TargetLibraryInfo *LibInfo; FunctionLoweringInfo FuncInfo; // True when either the Target Machine specifies no optimizations or the diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 44ba81223ec3..db1a5473e45a 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -16,7 +16,6 @@ #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,6 +26,7 @@ #include <cstdint> #include <functional> #include <initializer_list> +#include <optional> #include <vector> namespace llvm { @@ -468,7 +468,7 @@ public: protected: using ComplexRendererFns = - Optional<SmallVector<std::function<void(MachineInstrBuilder &)>, 4>>; + std::optional<SmallVector<std::function<void(MachineInstrBuilder &)>, 4>>; using RecordedMIVector = SmallVector<MachineInstr *, 4>; using NewMIVector = SmallVector<MachineInstrBuilder, 4>; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index 1229dfcb2c31..fc4e94929d41 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -118,7 +118,7 @@ bool InstructionSelector::executeMatchTable( return false; break; } - if (Register::isPhysicalRegister(MO.getReg())) { + if (MO.getReg().isPhysical()) { DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": Is a physical register\n"); if (handleReject() == RejectAndGiveUp) @@ -694,7 +694,7 @@ bool InstructionSelector::executeMatchTable( (ISel.*ISelInfo.ComplexPredicates[ComplexPredicateID])( State.MIs[InsnID]->getOperand(OpIdx)); if (Renderer) - State.Renderers[RendererID] = Renderer.value(); + State.Renderers[RendererID] = *Renderer; else if (handleReject() == RejectAndGiveUp) return false; @@ -865,7 +865,7 @@ bool InstructionSelector::executeMatchTable( OutMIs.resize(NewInsnID + 1); OutMIs[NewInsnID] = BuildMI(*State.MIs[0]->getParent(), State.MIs[0], - State.MIs[0]->getDebugLoc(), TII.get(Opcode)); + MIMetadata(*State.MIs[0]), TII.get(Opcode)); DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": GIR_BuildMI(OutMIs[" << NewInsnID << "], " << Opcode << ")\n"); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 6802591b6350..0f8f5662926d 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -300,7 +300,7 @@ public: for (unsigned i = 0; i < NumSrcs; ++i) SrcRegs[i] = SrcMerge->getSourceReg(i); - Builder.buildMerge(DstReg, SrcRegs); + Builder.buildMergeValues(DstReg, SrcRegs); UpdatedDefs.push_back(DstReg); } else { // Unable to combine @@ -724,7 +724,10 @@ public: /// and its callees rely upon. Register findValueFromDefImpl(Register DefReg, unsigned StartBit, unsigned Size) { - MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI); + std::optional<DefinitionAndSourceRegister> DefSrcReg = + getDefSrcRegIgnoringCopies(DefReg, MRI); + MachineInstr *Def = DefSrcReg->MI; + DefReg = DefSrcReg->Reg; // If the instruction has a single def, then simply delegate the search. // For unmerge however with multiple defs, we need to compute the offset // into the source of the unmerge. @@ -808,6 +811,134 @@ public: } return DeadDefs.all(); } + + GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size, + unsigned &DefOperandIdx) { + if (Register Def = findValueFromDefImpl(Reg, 0, Size)) { + if (auto *Unmerge = dyn_cast<GUnmerge>(MRI.getVRegDef(Def))) { + DefOperandIdx = Unmerge->findRegisterDefOperandIdx(Def); + return Unmerge; + } + } + return nullptr; + } + + // Check if sequence of elements from merge-like instruction is defined by + // another sequence of elements defined by unmerge. Most often this is the + // same sequence. Search for elements using findValueFromDefImpl. + bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx, + GUnmerge *Unmerge, unsigned UnmergeIdxStart, + unsigned NumElts, unsigned EltSize) { + assert(MergeStartIdx + NumElts <= MI.getNumSources()); + for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) { + unsigned EltUnmergeIdx; + GUnmerge *EltUnmerge = findUnmergeThatDefinesReg( + MI.getSourceReg(i), EltSize, EltUnmergeIdx); + // Check if source i comes from the same Unmerge. + if (!EltUnmerge || EltUnmerge != Unmerge) + return false; + // Check that source i's def has same index in sequence in Unmerge. + if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) + return false; + } + return true; + } + + bool tryCombineMergeLike(GMergeLikeInstr &MI, + SmallVectorImpl<MachineInstr *> &DeadInsts, + SmallVectorImpl<Register> &UpdatedDefs, + GISelChangeObserver &Observer) { + Register Elt0 = MI.getSourceReg(0); + LLT EltTy = MRI.getType(Elt0); + unsigned EltSize = EltTy.getSizeInBits(); + + unsigned Elt0UnmergeIdx; + // Search for unmerge that will be candidate for combine. + auto *Unmerge = findUnmergeThatDefinesReg(Elt0, EltSize, Elt0UnmergeIdx); + if (!Unmerge) + return false; + + unsigned NumMIElts = MI.getNumSources(); + Register Dst = MI.getReg(0); + LLT DstTy = MRI.getType(Dst); + Register UnmergeSrc = Unmerge->getSourceReg(); + LLT UnmergeSrcTy = MRI.getType(UnmergeSrc); + + // Recognize copy of UnmergeSrc to Dst. + // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst. + // + // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty) + // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ... + // + // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty) + if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) { + if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize)) + return false; + replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer); + DeadInsts.push_back(&MI); + return true; + } + + // Recognize UnmergeSrc that can be unmerged to DstTy directly. + // Types have to be either both vector or both non-vector types. + // Merge-like opcodes are combined one at the time. First one creates new + // unmerge, following should use the same unmerge (builder performs CSE). + // + // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) + // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1 + // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 + // + // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc + if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && + (Elt0UnmergeIdx % NumMIElts == 0) && + getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { + if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, + EltSize)) + return false; + MIB.setInstrAndDebugLoc(MI); + auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg()); + unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits(); + replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB, + UpdatedDefs, Observer); + DeadInsts.push_back(&MI); + return true; + } + + // Recognize when multiple unmerged sources with UnmergeSrcTy type + // can be merged into Dst with DstTy type directly. + // Types have to be either both vector or both non-vector types. + + // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) + // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy) + // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3 + // + // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc + + if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && + getCoverTy(DstTy, UnmergeSrcTy) == DstTy) { + SmallVector<Register, 4> ConcatSources; + unsigned NumElts = Unmerge->getNumDefs(); + for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) { + unsigned EltUnmergeIdx; + auto *UnmergeI = findUnmergeThatDefinesReg(MI.getSourceReg(i), + EltSize, EltUnmergeIdx); + // All unmerges have to be the same size. + if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) || + (EltUnmergeIdx != 0)) + return false; + if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize)) + return false; + ConcatSources.push_back(UnmergeI->getSourceReg()); + } + + MIB.setInstrAndDebugLoc(MI); + MIB.buildMergeLikeInstr(Dst, ConcatSources); + DeadInsts.push_back(&MI); + return true; + } + + return false; + } }; bool tryCombineUnmergeValues(GUnmerge &MI, @@ -968,7 +1099,7 @@ public: Regs.push_back(MergeI->getOperand(Idx).getReg()); Register DefReg = MI.getReg(DefIdx); - Builder.buildMerge(DefReg, Regs); + Builder.buildMergeLikeInstr(DefReg, Regs); UpdatedDefs.push_back(DefReg); } @@ -1031,7 +1162,7 @@ public: Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); MachineInstr *MergeI = MRI.getVRegDef(SrcReg); - if (!MergeI || !isa<GMergeLikeOp>(MergeI)) + if (!MergeI || !isa<GMergeLikeInstr>(MergeI)) return false; Register DstReg = MI.getOperand(0).getReg(); @@ -1068,6 +1199,8 @@ public: bool tryCombineInstruction(MachineInstr &MI, SmallVectorImpl<MachineInstr *> &DeadInsts, GISelObserverWrapper &WrapperObserver) { + ArtifactValueFinder Finder(MRI, Builder, LI); + // This might be a recursive call, and we might have DeadInsts already // populated. To avoid bad things happening later with multiple vreg defs // etc, process the dead instructions now if any. @@ -1108,6 +1241,8 @@ public: break; } } + Changed = Finder.tryCombineMergeLike(cast<GMergeLikeInstr>(MI), DeadInsts, + UpdatedDefs, WrapperObserver); break; case TargetOpcode::G_EXTRACT: Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); @@ -1139,6 +1274,7 @@ public: case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_EXTRACT: case TargetOpcode::G_TRUNC: + case TargetOpcode::G_BUILD_VECTOR: // Adding Use to ArtifactList. WrapperObserver.changedInstr(Use); break; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index caa6346a40db..a019bc9876bd 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -379,6 +379,8 @@ public: LegalizeResult lowerFPTRUNC(MachineInstr &MI); LegalizeResult lowerFPOWI(MachineInstr &MI); + LegalizeResult lowerISFPCLASS(MachineInstr &MI); + LegalizeResult lowerMinMax(MachineInstr &MI); LegalizeResult lowerFCopySign(MachineInstr &MI); LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index c0cad8ff675d..50b11a4920bb 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -950,6 +950,22 @@ public: changeTo(typeIdx(TypeIdx), Ty)); } + /// Ensure the scalar is at least as wide as Ty if condition is met. + LegalizeRuleSet &minScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, + const LLT Ty) { + using namespace LegalityPredicates; + using namespace LegalizeMutations; + return actionIf( + LegalizeAction::WidenScalar, + [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && + QueryTy.getSizeInBits() < Ty.getSizeInBits() && + Predicate(Query); + }, + changeTo(typeIdx(TypeIdx), Ty)); + } + /// Ensure the scalar is at most as wide as Ty. LegalizeRuleSet &maxScalarOrElt(unsigned TypeIdx, const LLT Ty) { using namespace LegalityPredicates; @@ -1040,6 +1056,8 @@ public: }, [=](const LegalityQuery &Query) { LLT T = Query.Types[LargeTypeIdx]; + if (T.isVector() && T.getElementType().isPointer()) + T = T.changeElementType(LLT::scalar(T.getScalarSizeInBits())); return std::make_pair(TypeIdx, T); }); } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index 1cacf96620f0..ea6ed322e9b1 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -22,12 +22,14 @@ namespace llvm { namespace MIPatternMatch { template <typename Reg, typename Pattern> -bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P) { +[[nodiscard]] bool mi_match(Reg R, const MachineRegisterInfo &MRI, + Pattern &&P) { return P.match(MRI, R); } template <typename Pattern> -bool mi_match(MachineInstr &MI, const MachineRegisterInfo &MRI, Pattern &&P) { +[[nodiscard]] bool mi_match(MachineInstr &MI, const MachineRegisterInfo &MRI, + Pattern &&P) { return P.match(MRI, &MI); } @@ -61,17 +63,18 @@ inline OneNonDBGUse_match<SubPat> m_OneNonDBGUse(const SubPat &SP) { } template <typename ConstT> -inline Optional<ConstT> matchConstant(Register, const MachineRegisterInfo &); +inline std::optional<ConstT> matchConstant(Register, + const MachineRegisterInfo &); template <> -inline Optional<APInt> matchConstant(Register Reg, - const MachineRegisterInfo &MRI) { +inline std::optional<APInt> matchConstant(Register Reg, + const MachineRegisterInfo &MRI) { return getIConstantVRegVal(Reg, MRI); } template <> -inline Optional<int64_t> matchConstant(Register Reg, - const MachineRegisterInfo &MRI) { +inline std::optional<int64_t> matchConstant(Register Reg, + const MachineRegisterInfo &MRI) { return getIConstantVRegSExtVal(Reg, MRI); } @@ -95,18 +98,18 @@ inline ConstantMatch<int64_t> m_ICst(int64_t &Cst) { } template <typename ConstT> -inline Optional<ConstT> matchConstantSplat(Register, - const MachineRegisterInfo &); +inline std::optional<ConstT> matchConstantSplat(Register, + const MachineRegisterInfo &); template <> -inline Optional<APInt> matchConstantSplat(Register Reg, - const MachineRegisterInfo &MRI) { +inline std::optional<APInt> matchConstantSplat(Register Reg, + const MachineRegisterInfo &MRI) { return getIConstantSplatVal(Reg, MRI); } template <> -inline Optional<int64_t> matchConstantSplat(Register Reg, - const MachineRegisterInfo &MRI) { +inline std::optional<int64_t> +matchConstantSplat(Register Reg, const MachineRegisterInfo &MRI) { return getIConstantSplatSExtVal(Reg, MRI); } @@ -137,34 +140,35 @@ inline ICstOrSplatMatch<int64_t> m_ICstOrSplat(int64_t &Cst) { } struct GCstAndRegMatch { - Optional<ValueAndVReg> &ValReg; - GCstAndRegMatch(Optional<ValueAndVReg> &ValReg) : ValReg(ValReg) {} + std::optional<ValueAndVReg> &ValReg; + GCstAndRegMatch(std::optional<ValueAndVReg> &ValReg) : ValReg(ValReg) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { ValReg = getIConstantVRegValWithLookThrough(Reg, MRI); return ValReg ? true : false; } }; -inline GCstAndRegMatch m_GCst(Optional<ValueAndVReg> &ValReg) { +inline GCstAndRegMatch m_GCst(std::optional<ValueAndVReg> &ValReg) { return GCstAndRegMatch(ValReg); } struct GFCstAndRegMatch { - Optional<FPValueAndVReg> &FPValReg; - GFCstAndRegMatch(Optional<FPValueAndVReg> &FPValReg) : FPValReg(FPValReg) {} + std::optional<FPValueAndVReg> &FPValReg; + GFCstAndRegMatch(std::optional<FPValueAndVReg> &FPValReg) + : FPValReg(FPValReg) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { FPValReg = getFConstantVRegValWithLookThrough(Reg, MRI); return FPValReg ? true : false; } }; -inline GFCstAndRegMatch m_GFCst(Optional<FPValueAndVReg> &FPValReg) { +inline GFCstAndRegMatch m_GFCst(std::optional<FPValueAndVReg> &FPValReg) { return GFCstAndRegMatch(FPValReg); } struct GFCstOrSplatGFCstMatch { - Optional<FPValueAndVReg> &FPValReg; - GFCstOrSplatGFCstMatch(Optional<FPValueAndVReg> &FPValReg) + std::optional<FPValueAndVReg> &FPValReg; + GFCstOrSplatGFCstMatch(std::optional<FPValueAndVReg> &FPValReg) : FPValReg(FPValReg) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { return (FPValReg = getFConstantSplat(Reg, MRI)) || @@ -173,7 +177,7 @@ struct GFCstOrSplatGFCstMatch { }; inline GFCstOrSplatGFCstMatch -m_GFCstOrSplat(Optional<FPValueAndVReg> &FPValReg) { +m_GFCstOrSplat(std::optional<FPValueAndVReg> &FPValReg) { return GFCstOrSplatGFCstMatch(FPValReg); } @@ -237,6 +241,20 @@ inline SpecificConstantMatch m_AllOnesInt() { } ///} +/// Matcher for a specific register. +struct SpecificRegisterMatch { + Register RequestedReg; + SpecificRegisterMatch(Register RequestedReg) : RequestedReg(RequestedReg) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + return Reg == RequestedReg; + } +}; + +/// Matches a register only if it is equal to \p RequestedReg. +inline SpecificRegisterMatch m_SpecificReg(Register RequestedReg) { + return SpecificRegisterMatch(RequestedReg); +} + // TODO: Rework this for different kinds of MachineOperand. // Currently assumes the Src for a match is a register. // We might want to support taking in some MachineOperands and call getReg on @@ -354,6 +372,17 @@ inline bind_ty<LLT> m_Type(LLT Ty) { return Ty; } inline bind_ty<CmpInst::Predicate> m_Pred(CmpInst::Predicate &P) { return P; } inline operand_type_match m_Pred() { return operand_type_match(); } +struct ImplicitDefMatch { + bool match(const MachineRegisterInfo &MRI, Register Reg) { + MachineInstr *TmpMI; + if (mi_match(Reg, MRI, m_MInstr(TmpMI))) + return TmpMI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; + return false; + } +}; + +inline ImplicitDefMatch m_GImplicitDef() { return ImplicitDefMatch(); } + // Helper for matching G_FCONSTANT inline bind_ty<const ConstantFP *> m_GFCst(const ConstantFP *&C) { return C; } @@ -424,6 +453,19 @@ m_GAdd(const LHS &L, const RHS &R) { } template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false> +m_GBuildVector(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false>(L, R); +} + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false> +m_GBuildVectorTrunc(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false>(L, + R); +} + +template <typename LHS, typename RHS> inline BinaryOp_match<LHS, RHS, TargetOpcode::G_PTR_ADD, false> m_GPtrAdd(const LHS &L, const RHS &R) { return BinaryOp_match<LHS, RHS, TargetOpcode::G_PTR_ADD, false>(L, R); @@ -596,7 +638,8 @@ inline UnaryOp_match<SrcTy, TargetOpcode::G_FSQRT> m_GFSqrt(const SrcTy &Src) { // General helper for generic MI compares, i.e. G_ICMP and G_FCMP // TODO: Allow checking a specific predicate. -template <typename Pred_P, typename LHS_P, typename RHS_P, unsigned Opcode> +template <typename Pred_P, typename LHS_P, typename RHS_P, unsigned Opcode, + bool Commutable = false> struct CompareOp_match { Pred_P P; LHS_P L; @@ -615,9 +658,14 @@ struct CompareOp_match { static_cast<CmpInst::Predicate>(TmpMI->getOperand(1).getPredicate()); if (!P.match(MRI, TmpPred)) return false; - - return L.match(MRI, TmpMI->getOperand(2).getReg()) && - R.match(MRI, TmpMI->getOperand(3).getReg()); + Register LHS = TmpMI->getOperand(2).getReg(); + Register RHS = TmpMI->getOperand(3).getReg(); + if (L.match(MRI, LHS) && R.match(MRI, RHS)) + return true; + if (Commutable && L.match(MRI, RHS) && R.match(MRI, LHS) && + P.match(MRI, CmpInst::getSwappedPredicate(TmpPred))) + return true; + return false; } }; @@ -633,6 +681,36 @@ m_GFCmp(const Pred &P, const LHS &L, const RHS &R) { return CompareOp_match<Pred, LHS, RHS, TargetOpcode::G_FCMP>(P, L, R); } +/// G_ICMP matcher that also matches commuted compares. +/// E.g. +/// +/// m_c_GICmp(m_Pred(...), m_GAdd(...), m_GSub(...)) +/// +/// Could match both of: +/// +/// icmp ugt (add x, y) (sub a, b) +/// icmp ult (sub a, b) (add x, y) +template <typename Pred, typename LHS, typename RHS> +inline CompareOp_match<Pred, LHS, RHS, TargetOpcode::G_ICMP, true> +m_c_GICmp(const Pred &P, const LHS &L, const RHS &R) { + return CompareOp_match<Pred, LHS, RHS, TargetOpcode::G_ICMP, true>(P, L, R); +} + +/// G_FCMP matcher that also matches commuted compares. +/// E.g. +/// +/// m_c_GFCmp(m_Pred(...), m_FAdd(...), m_GFMul(...)) +/// +/// Could match both of: +/// +/// fcmp ogt (fadd x, y) (fmul a, b) +/// fcmp olt (fmul a, b) (fadd x, y) +template <typename Pred, typename LHS, typename RHS> +inline CompareOp_match<Pred, LHS, RHS, TargetOpcode::G_FCMP, true> +m_c_GFCmp(const Pred &P, const LHS &L, const RHS &R) { + return CompareOp_match<Pred, LHS, RHS, TargetOpcode::G_FCMP, true>(P, L, R); +} + // Helper for checking if a Reg is of specific type. struct CheckType { LLT Ty; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 01fd5d94d371..e5b48d9d52c0 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -50,6 +50,8 @@ struct MachineIRBuilderState { MachineRegisterInfo *MRI = nullptr; /// Debug location to be set to any instruction we create. DebugLoc DL; + /// PC sections metadata to be set to any instruction we create. + MDNode *PCSections = nullptr; /// \name Fields describing the insertion point. /// @{ @@ -215,11 +217,13 @@ private: /// Helper class to build MachineInstr. /// It keeps internally the insertion point and debug location for all /// the new instructions we want to create. -/// This information can be modify via the related setters. +/// This information can be modified via the related setters. class MachineIRBuilder { MachineIRBuilderState State; + unsigned getOpcodeForMerge(const DstOp &DstOp, ArrayRef<SrcOp> SrcOps) const; + protected: void validateTruncExt(const LLT Dst, const LLT Src, bool IsExtend); @@ -341,6 +345,7 @@ public: assert(MI.getParent() && "Instruction is not part of a basic block"); setMBB(*MI.getParent()); State.II = MI.getIterator(); + setPCSections(MI.getPCSections()); } /// @} @@ -364,6 +369,12 @@ public: /// Get the current instruction's debug location. const DebugLoc &getDebugLoc() { return State.DL; } + /// Set the PC sections metadata to \p MD for all the next build instructions. + void setPCSections(MDNode *MD) { State.PCSections = MD; } + + /// Get the current instruction's PC sections metadata. + MDNode *getPCSections() { return State.PCSections; } + /// Build and insert <empty> = \p Opcode <empty>. /// The insertion point is the one set by the last call of either /// setBasicBlock or setMI. @@ -478,9 +489,10 @@ public: /// type as \p Op0 or \p Op0 itself. /// /// \return a MachineInstrBuilder for the newly created instruction. - Optional<MachineInstrBuilder> materializePtrAdd(Register &Res, Register Op0, - const LLT ValueTy, - uint64_t Value); + std::optional<MachineInstrBuilder> materializePtrAdd(Register &Res, + Register Op0, + const LLT ValueTy, + uint64_t Value); /// Build and insert \p Res = G_PTRMASK \p Op0, \p Op1 MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0, @@ -649,11 +661,10 @@ public: /// Build and insert \p Res = G_FPEXT \p Op MachineInstrBuilder buildFPExt(const DstOp &Res, const SrcOp &Op, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FPEXT, {Res}, {Op}, Flags); } - /// Build and insert a G_PTRTOINT instruction. MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src) { return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src}); @@ -854,8 +865,8 @@ public: /// Build and insert G_ASSERT_SEXT, G_ASSERT_ZEXT, or G_ASSERT_ALIGN /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildAssertOp(unsigned Opc, const DstOp &Res, const SrcOp &Op, - unsigned Val) { + MachineInstrBuilder buildAssertInstr(unsigned Opc, const DstOp &Res, + const SrcOp &Op, unsigned Val) { return buildInstr(Opc, Res, Op).addImm(Val); } @@ -864,7 +875,7 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size) { - return buildAssertOp(TargetOpcode::G_ASSERT_ZEXT, Res, Op, Size); + return buildAssertInstr(TargetOpcode::G_ASSERT_ZEXT, Res, Op, Size); } /// Build and insert \p Res = G_ASSERT_SEXT Op, Size @@ -872,7 +883,7 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildAssertSExt(const DstOp &Res, const SrcOp &Op, unsigned Size) { - return buildAssertOp(TargetOpcode::G_ASSERT_SEXT, Res, Op, Size); + return buildAssertInstr(TargetOpcode::G_ASSERT_SEXT, Res, Op, Size); } /// Build and insert \p Res = G_ASSERT_ALIGN Op, AlignVal @@ -880,7 +891,8 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildAssertAlign(const DstOp &Res, const SrcOp &Op, Align AlignVal) { - return buildAssertOp(TargetOpcode::G_ASSERT_ALIGN, Res, Op, AlignVal.value()); + return buildAssertInstr(TargetOpcode::G_ASSERT_ALIGN, Res, Op, + AlignVal.value()); } /// Build and insert `Res = G_LOAD Addr, MMO`. @@ -959,7 +971,8 @@ public: /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ... /// /// G_MERGE_VALUES combines the input elements contiguously into a larger - /// register. + /// register. It should only be used when the destination register is not a + /// vector. /// /// \pre setBasicBlock or setMI must have been called. /// \pre The entire register \p Res (and no more) must be covered by the input @@ -967,9 +980,30 @@ public: /// \pre The type of all \p Ops registers must be identical. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef<Register> Ops); - MachineInstrBuilder buildMerge(const DstOp &Res, - std::initializer_list<SrcOp> Ops); + MachineInstrBuilder buildMergeValues(const DstOp &Res, + ArrayRef<Register> Ops); + + /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ... + /// or \p Res = G_BUILD_VECTOR \p Op0, ... + /// or \p Res = G_CONCAT_VECTORS \p Op0, ... + /// + /// G_MERGE_VALUES combines the input elements contiguously into a larger + /// register. It is used when the destination register is not a vector. + /// G_BUILD_VECTOR combines scalar inputs into a vector register. + /// G_CONCAT_VECTORS combines vector inputs into a vector register. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre The entire register \p Res (and no more) must be covered by the input + /// registers. + /// \pre The type of all \p Ops registers must be identical. + /// + /// \return a MachineInstrBuilder for the newly created instruction. The + /// opcode of the new instruction will depend on the types of both + /// the destination and the sources. + MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, + ArrayRef<Register> Ops); + MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, + std::initializer_list<SrcOp> Ops); /// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op /// @@ -1084,8 +1118,9 @@ public: /// \pre \p Res must be smaller than \p Op /// /// \return The newly created instruction. - MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, - Optional<unsigned> Flags = None); + MachineInstrBuilder + buildFPTrunc(const DstOp &Res, const SrcOp &Op, + std::optional<unsigned> Flags = std::nullopt); /// Build and insert \p Res = G_TRUNC \p Op /// @@ -1129,7 +1164,7 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, - Optional<unsigned> Flags = None); + std::optional<unsigned> Flags = std::nullopt); /// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1 /// @@ -1143,7 +1178,7 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, - Optional<unsigned> Flags = None); + std::optional<unsigned> Flags = std::nullopt); /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val, /// \p Elt, \p Idx @@ -1165,6 +1200,20 @@ public: /// \pre setBasicBlock or setMI must have been called. /// \pre \p Res must be a generic virtual register with scalar type. /// \pre \p Val must be a generic virtual register with vector type. + /// + /// \return The newly created instruction. + MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, + const SrcOp &Val, + const int Idx) { + return buildExtractVectorElement(Res, Val, + buildConstant(LLT::scalar(64), Idx)); + } + + /// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register with scalar type. + /// \pre \p Val must be a generic virtual register with vector type. /// \pre \p Idx must be a generic virtual register with scalar type. /// /// \return The newly created instruction. @@ -1473,7 +1522,7 @@ public: MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_ADD, {Dst}, {Src0, Src1}, Flags); } @@ -1490,7 +1539,7 @@ public: MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_SUB, {Dst}, {Src0, Src1}, Flags); } @@ -1506,74 +1555,74 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_MUL, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_UMULH, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildSMulH(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_UREM \p Op0, \p Op1 MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_UREM, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMUL, {Dst}, {Src0, Src1}, Flags); } - MachineInstrBuilder buildFMinNum(const DstOp &Dst, const SrcOp &Src0, - const SrcOp &Src1, - Optional<unsigned> Flags = None) { + MachineInstrBuilder + buildFMinNum(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMINNUM, {Dst}, {Src0, Src1}, Flags); } - MachineInstrBuilder buildFMaxNum(const DstOp &Dst, const SrcOp &Src0, - const SrcOp &Src1, - Optional<unsigned> Flags = None) { + MachineInstrBuilder + buildFMaxNum(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMAXNUM, {Dst}, {Src0, Src1}, Flags); } - MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, - const SrcOp &Src1, - Optional<unsigned> Flags = None) { + MachineInstrBuilder + buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMINNUM_IEEE, {Dst}, {Src0, Src1}, Flags); } - MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, - const SrcOp &Src1, - Optional<unsigned> Flags = None) { + MachineInstrBuilder + buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMAXNUM_IEEE, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_SHL, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_LSHR, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_ASHR, {Dst}, {Src0, Src1}, Flags); } @@ -1605,7 +1654,7 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_OR, {Dst}, {Src0, Src1}, Flags); } @@ -1664,90 +1713,100 @@ public: /// Build and insert \p Res = G_FADD \p Op0, \p Op1 MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}, Flags); } + /// Build and insert \p Res = G_STRICT_FADD \p Op0, \p Op1 + MachineInstrBuilder + buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, + std::optional<unsigned> Flags = std::nullopt) { + return buildInstr(TargetOpcode::G_STRICT_FADD, {Dst}, {Src0, Src1}, Flags); + } + /// Build and insert \p Res = G_FSUB \p Op0, \p Op1 MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FSUB, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_FDIV \p Op0, \p Op1 MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FDIV, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_FMA \p Op0, \p Op1, \p Op2 MachineInstrBuilder buildFMA(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, const SrcOp &Src2, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2}, Flags); } /// Build and insert \p Res = G_FMAD \p Op0, \p Op1, \p Op2 MachineInstrBuilder buildFMAD(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, const SrcOp &Src2, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMAD, {Dst}, {Src0, Src1, Src2}, Flags); } /// Build and insert \p Res = G_FNEG \p Op0 MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}, Flags); } /// Build and insert \p Res = G_FABS \p Op0 MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}, Flags); } /// Build and insert \p Dst = G_FCANONICALIZE \p Src0 - MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, - Optional<unsigned> Flags = None) { + MachineInstrBuilder + buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FCANONICALIZE, {Dst}, {Src0}, Flags); } /// Build and insert \p Dst = G_INTRINSIC_TRUNC \p Src0 - MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, - Optional<unsigned> Flags = None) { + MachineInstrBuilder + buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {Dst}, {Src0}, Flags); } /// Build and insert \p Res = GFFLOOR \p Op0, \p Op1 - MachineInstrBuilder buildFFloor(const DstOp &Dst, const SrcOp &Src0, - Optional<unsigned> Flags = None) { + MachineInstrBuilder + buildFFloor(const DstOp &Dst, const SrcOp &Src0, + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FFLOOR, {Dst}, {Src0}, Flags); } /// Build and insert \p Dst = G_FLOG \p Src MachineInstrBuilder buildFLog(const DstOp &Dst, const SrcOp &Src, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FLOG, {Dst}, {Src}, Flags); } /// Build and insert \p Dst = G_FLOG2 \p Src MachineInstrBuilder buildFLog2(const DstOp &Dst, const SrcOp &Src, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FLOG2, {Dst}, {Src}, Flags); } /// Build and insert \p Dst = G_FEXP2 \p Src MachineInstrBuilder buildFExp2(const DstOp &Dst, const SrcOp &Src, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FEXP2, {Dst}, {Src}, Flags); } /// Build and insert \p Dst = G_FPOW \p Src0, \p Src1 MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional<unsigned> Flags = None) { + std::optional<unsigned> Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FPOW, {Dst}, {Src0, Src1}, Flags); } @@ -1959,9 +2018,9 @@ public: return buildInstr(TargetOpcode::G_BITREVERSE, {Dst}, {Src}); } - virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, - ArrayRef<SrcOp> SrcOps, - Optional<unsigned> Flags = None); + virtual MachineInstrBuilder + buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, ArrayRef<SrcOp> SrcOps, + std::optional<unsigned> Flags = std::nullopt); }; } // End namespace llvm. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h index d0918485249d..8ca15bdae1de 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h @@ -407,7 +407,7 @@ public: } }; -private: +protected: /// Helper class used to represent the cost for mapping an instruction. /// When mapping an instruction, we may introduce some repairing code. /// In most cases, the repairing code is local to the instruction, @@ -639,6 +639,12 @@ public: .set(MachineFunctionProperties::Property::NoPHIs); } + /// Check that our input is fully legal: we require the function to have the + /// Legalized property, so it should be. + /// + /// FIXME: This should be in the MachineVerifier. + bool checkFunctionIsLegal(MachineFunction &MF) const; + /// Walk through \p MF and assign a register bank to every virtual register /// that are still mapped to nothing. /// The target needs to provide a RegisterBankInfo and in particular @@ -662,6 +668,8 @@ public: /// MIRBuilder.buildInstr(COPY, Tmp, ArgReg) /// inst.getOperand(argument.getOperandNo()).setReg(Tmp) /// \endcode + bool assignRegisterBanks(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; }; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 31f3d5d84186..c9941afc8013 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -166,12 +166,12 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC, MachineOptimizationRemarkMissed &R); /// If \p VReg is defined by a G_CONSTANT, return the corresponding value. -Optional<APInt> getIConstantVRegVal(Register VReg, - const MachineRegisterInfo &MRI); +std::optional<APInt> getIConstantVRegVal(Register VReg, + const MachineRegisterInfo &MRI); /// If \p VReg is defined by a G_CONSTANT fits in int64_t returns it. -Optional<int64_t> getIConstantVRegSExtVal(Register VReg, - const MachineRegisterInfo &MRI); +std::optional<int64_t> getIConstantVRegSExtVal(Register VReg, + const MachineRegisterInfo &MRI); /// Simple struct used to hold a constant integer value and a virtual /// register. @@ -182,14 +182,14 @@ struct ValueAndVReg { /// If \p VReg is defined by a statically evaluable chain of instructions rooted /// on a G_CONSTANT returns its APInt value and def register. -Optional<ValueAndVReg> +std::optional<ValueAndVReg> getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs = true); /// If \p VReg is defined by a statically evaluable chain of instructions rooted /// on a G_CONSTANT or G_FCONSTANT returns its value as APInt and def register. -Optional<ValueAndVReg> getAnyConstantVRegValWithLookThrough( +std::optional<ValueAndVReg> getAnyConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs = true, bool LookThroughAnyExt = false); @@ -200,7 +200,7 @@ struct FPValueAndVReg { /// If \p VReg is defined by a statically evaluable chain of instructions rooted /// on a G_FCONSTANT returns its APFloat value and def register. -Optional<FPValueAndVReg> +std::optional<FPValueAndVReg> getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs = true); @@ -225,7 +225,7 @@ struct DefinitionAndSourceRegister { /// away any copies. /// /// Also walks through hints such as G_ASSERT_ZEXT. -Optional<DefinitionAndSourceRegister> +std::optional<DefinitionAndSourceRegister> getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI); /// Find the def instruction for \p Reg, folding away any trivial copies. May @@ -260,12 +260,12 @@ APFloat getAPFloatFromSize(double Val, unsigned Size); /// fallback. void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU); -Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI); -Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI); +std::optional<APInt> ConstantFoldBinOp(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI); +std::optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI); /// Tries to constant fold a vector binop with sources \p Op1 and \p Op2. /// Returns an empty vector on failure. @@ -273,16 +273,17 @@ SmallVector<APInt> ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI); -Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1, - uint64_t Imm, const MachineRegisterInfo &MRI); +std::optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1, + uint64_t Imm, + const MachineRegisterInfo &MRI); -Optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, - Register Src, - const MachineRegisterInfo &MRI); +std::optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, + Register Src, + const MachineRegisterInfo &MRI); /// Tries to constant fold a G_CTLZ operation on \p Src. If \p Src is a vector /// then it tries to do an element-wise constant fold. -Optional<SmallVector<unsigned>> +std::optional<SmallVector<unsigned>> ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI); /// Test if the given value is known to have exactly one bit set. This differs @@ -369,32 +370,32 @@ public: }; /// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat. -/// If \p MI is not a splat, returns None. -Optional<int> getSplatIndex(MachineInstr &MI); +/// If \p MI is not a splat, returns std::nullopt. +std::optional<int> getSplatIndex(MachineInstr &MI); /// \returns the scalar integral splat value of \p Reg if possible. -Optional<APInt> getIConstantSplatVal(const Register Reg, - const MachineRegisterInfo &MRI); +std::optional<APInt> getIConstantSplatVal(const Register Reg, + const MachineRegisterInfo &MRI); /// \returns the scalar integral splat value defined by \p MI if possible. -Optional<APInt> getIConstantSplatVal(const MachineInstr &MI, - const MachineRegisterInfo &MRI); +std::optional<APInt> getIConstantSplatVal(const MachineInstr &MI, + const MachineRegisterInfo &MRI); /// \returns the scalar sign extended integral splat value of \p Reg if /// possible. -Optional<int64_t> getIConstantSplatSExtVal(const Register Reg, - const MachineRegisterInfo &MRI); +std::optional<int64_t> getIConstantSplatSExtVal(const Register Reg, + const MachineRegisterInfo &MRI); /// \returns the scalar sign extended integral splat value defined by \p MI if /// possible. -Optional<int64_t> getIConstantSplatSExtVal(const MachineInstr &MI, - const MachineRegisterInfo &MRI); +std::optional<int64_t> getIConstantSplatSExtVal(const MachineInstr &MI, + const MachineRegisterInfo &MRI); /// Returns a floating point scalar constant of a build vector splat if it /// exists. When \p AllowUndef == true some elements can be undef but not all. -Optional<FPValueAndVReg> getFConstantSplat(Register VReg, - const MachineRegisterInfo &MRI, - bool AllowUndef = true); +std::optional<FPValueAndVReg> getFConstantSplat(Register VReg, + const MachineRegisterInfo &MRI, + bool AllowUndef = true); /// Return true if the specified register is defined by G_BUILD_VECTOR or /// G_BUILD_VECTOR_TRUNC where all of the elements are \p SplatValue or undef. @@ -463,8 +464,8 @@ bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, /// \endcode /// /// In the above case, this will return a RegOrConstant containing 4. -Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI); +std::optional<RegOrConstant> getVectorSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI); /// Determines if \p MI defines a constant integer or a build vector of /// constant integers. Treats undef values as constants. @@ -473,9 +474,10 @@ bool isConstantOrConstantVector(MachineInstr &MI, /// Determines if \p MI defines a constant integer or a splat vector of /// constant integers. -/// \returns the scalar constant or None. -Optional<APInt> isConstantOrConstantSplatVector(MachineInstr &MI, - const MachineRegisterInfo &MRI); +/// \returns the scalar constant or std::nullopt. +std::optional<APInt> +isConstantOrConstantSplatVector(MachineInstr &MI, + const MachineRegisterInfo &MRI); /// Attempt to match a unary predicate against a scalar/splat constant or every /// element of a constant G_BUILD_VECTOR. If \p ConstVal is null, the source @@ -488,6 +490,10 @@ bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, /// the value \p Val contains a true value. bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP); +/// \returns true if given the TargetLowering's boolean contents information, +/// the value \p Val contains a false value. +bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, + bool IsFP); /// Returns an integer representing true, as defined by the /// TargetBooleanContents. @@ -506,5 +512,9 @@ void eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs, MachineRegisterInfo &MRI, void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver = nullptr); +/// Assuming the instruction \p MI is going to be deleted, attempt to salvage +/// debug users of \p MI by writing the effect of \p MI in a DIExpression. +void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI); + } // End namespace llvm. #endif diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h index b7f6de40266e..157247dfba98 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -859,12 +859,11 @@ enum NodeType { /// 3 Round to -inf /// 4 Round to nearest, ties to zero /// Result is rounding mode and chain. Input is a chain. - /// TODO: Rename this node to GET_ROUNDING. - FLT_ROUNDS_, + GET_ROUNDING, /// Set rounding mode. /// The first operand is a chain pointer. The second specifies the required - /// rounding mode, encoded in the same way as used in '``FLT_ROUNDS_``'. + /// rounding mode, encoded in the same way as used in '``GET_ROUNDING``'. SET_ROUNDING, /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type. @@ -1149,6 +1148,9 @@ enum NodeType { /// operand and output are the same floating type. ARITH_FENCE, + /// MEMBARRIER - Compiler barrier only; generate a no-op. + MEMBARRIER, + /// OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) /// This corresponds to the fence instruction. It takes an input chain, and /// two integer constants: an AtomicOrdering and a SynchronizationScope. @@ -1197,6 +1199,8 @@ enum NodeType { ATOMIC_LOAD_FSUB, ATOMIC_LOAD_FMAX, ATOMIC_LOAD_FMIN, + ATOMIC_LOAD_UINC_WRAP, + ATOMIC_LOAD_UDEC_WRAP, // Masked load and store - consecutive vector load and store operations // with additional mask operand that prevents memory accesses to the @@ -1337,10 +1341,10 @@ bool isVPBinaryOp(unsigned Opcode); bool isVPReduction(unsigned Opcode); /// The operand position of the vector mask. -Optional<unsigned> getVPMaskIdx(unsigned Opcode); +std::optional<unsigned> getVPMaskIdx(unsigned Opcode); /// The operand position of the explicit vector length parameter. -Optional<unsigned> getVPExplicitVectorLengthIdx(unsigned Opcode); +std::optional<unsigned> getVPExplicitVectorLengthIdx(unsigned Opcode); //===--------------------------------------------------------------------===// /// MemIndexedMode enum - This enum defines the load / store indexed diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/IndirectThunks.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/IndirectThunks.h index a2cdd0a9e965..6da60fb658ae 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/IndirectThunks.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/IndirectThunks.h @@ -21,27 +21,32 @@ namespace llvm { -template <typename Derived> class ThunkInserter { +template <typename Derived, typename InsertedThunksTy = bool> +class ThunkInserter { Derived &getDerived() { return *static_cast<Derived *>(this); } protected: - bool InsertedThunks; + // A variable used to track whether (and possible which) thunks have been + // inserted so far. InsertedThunksTy is usually a bool, but can be other types + // to represent more than one type of thunk. Requires an |= operator to + // accumulate results. + InsertedThunksTy InsertedThunks; void doInitialization(Module &M) {} void createThunkFunction(MachineModuleInfo &MMI, StringRef Name, bool Comdat = true); public: void init(Module &M) { - InsertedThunks = false; + InsertedThunks = InsertedThunksTy{}; getDerived().doInitialization(M); } // return `true` if `MMI` or `MF` was modified bool run(MachineModuleInfo &MMI, MachineFunction &MF); }; -template <typename Derived> -void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI, - StringRef Name, bool Comdat) { +template <typename Derived, typename InsertedThunksTy> +void ThunkInserter<Derived, InsertedThunksTy>::createThunkFunction( + MachineModuleInfo &MMI, StringRef Name, bool Comdat) { assert(Name.startswith(getDerived().getThunkPrefix()) && "Created a thunk with an unexpected prefix!"); @@ -82,26 +87,24 @@ void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI, MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); } -template <typename Derived> -bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) { +template <typename Derived, typename InsertedThunksTy> +bool ThunkInserter<Derived, InsertedThunksTy>::run(MachineModuleInfo &MMI, + MachineFunction &MF) { // If MF is not a thunk, check to see if we need to insert a thunk. if (!MF.getName().startswith(getDerived().getThunkPrefix())) { - // If we've already inserted a thunk, nothing else to do. - if (InsertedThunks) - return false; - // Only add a thunk if one of the functions has the corresponding feature - // enabled in its subtarget, and doesn't enable external thunks. + // enabled in its subtarget, and doesn't enable external thunks. The target + // can use InsertedThunks to detect whether relevant thunks have already + // been inserted. // FIXME: Conditionalize on indirect calls so we don't emit a thunk when // nothing will end up calling it. // FIXME: It's a little silly to look at every function just to enumerate // the subtargets, but eventually we'll want to look at them for indirect // calls, so maybe this is OK. - if (!getDerived().mayUseThunk(MF)) + if (!getDerived().mayUseThunk(MF, InsertedThunks)) return false; - getDerived().insertThunks(MMI); - InsertedThunks = true; + InsertedThunks |= getDerived().insertThunks(MMI, MF); return true; } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h index d615a5db4504..565d1c36d300 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -14,7 +14,6 @@ #ifndef LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H #define LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H -#include "llvm/IR/BuiltinGCs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/Target/TargetMachine.h" @@ -38,8 +37,6 @@ namespace { (void) llvm::createGreedyRegisterAllocator(); (void) llvm::createDefaultPBQPRegisterAllocator(); - llvm::linkAllBuiltinGCs(); - (void) llvm::createBURRListDAGScheduler(nullptr, llvm::CodeGenOpt::Default); (void) llvm::createSourceListDAGScheduler(nullptr, diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h index 3b61563cb598..507258fe665a 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -18,7 +18,6 @@ #define LLVM_CODEGEN_LIVERANGEEDIT_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -134,7 +133,7 @@ public: : Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis), VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), TheDelegate(delegate), FirstNew(newRegs.size()), DeadRemats(deadRemats) { - MRI.setDelegate(this); + MRI.addDelegate(this); } ~LiveRangeEdit() override { MRI.resetDelegate(this); } @@ -164,9 +163,7 @@ public: /// we want to drop it from the NewRegs set. void pop_back() { NewRegs.pop_back(); } - ArrayRef<Register> regs() const { - return makeArrayRef(NewRegs).slice(FirstNew); - } + ArrayRef<Register> regs() const { return ArrayRef(NewRegs).slice(FirstNew); } /// createFrom - Create a new virtual register based on OldReg. Register createFrom(Register OldReg); @@ -210,12 +207,14 @@ public: /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an /// instruction into MBB before MI. The new instruction is mapped, but - /// liveness is not updated. + /// liveness is not updated. If ReplaceIndexMI is not null it will be replaced + /// by new MI in the index map. /// Return the SlotIndex of the new instruction. SlotIndex rematerializeAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const Remat &RM, const TargetRegisterInfo &, - bool Late = false); + bool Late = false, unsigned SubIdx = 0, + MachineInstr *ReplaceIndexMI = nullptr); /// markRematerialized - explicitly mark a value as rematerialized after doing /// it manually. @@ -239,7 +238,7 @@ public: /// allocator. These registers should not be split into new intervals /// as currently those new intervals are not guaranteed to spill. void eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, - ArrayRef<Register> RegsBeingSpilled = None); + ArrayRef<Register> RegsBeingSpilled = std::nullopt); /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h index 39a1ec461ef6..a5a8fc6d92a3 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h @@ -166,8 +166,8 @@ inline iterator_range<filter_iterator< phys_regs_and_masks(const MachineInstr &MI) { std::function<bool(const MachineOperand &)> Pred = [](const MachineOperand &MOP) { - return MOP.isRegMask() || (MOP.isReg() && !MOP.isDebug() && - Register::isPhysicalRegister(MOP.getReg())); + return MOP.isRegMask() || + (MOP.isReg() && !MOP.isDebug() && MOP.getReg().isPhysical()); }; return make_filter_range(const_mi_bundle_ops(MI), Pred); } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MBFIWrapper.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MBFIWrapper.h index 7c0a4c6e0252..714ecc5d4334 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MBFIWrapper.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MBFIWrapper.h @@ -15,8 +15,9 @@ #define LLVM_CODEGEN_MBFIWRAPPER_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/raw_ostream.h" +#include <optional> namespace llvm { @@ -29,7 +30,8 @@ class MBFIWrapper { BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); - Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const; + std::optional<uint64_t> + getBlockProfileCount(const MachineBasicBlock *MBB) const; raw_ostream &printBlockFreq(raw_ostream &OS, const MachineBasicBlock *MBB) const; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h index fb276ff117af..a039da1bd5b3 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h @@ -13,10 +13,10 @@ #ifndef LLVM_CODEGEN_MIRFORMATTER_H #define LLVM_CODEGEN_MIRFORMATTER_H -#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/raw_ostream.h" #include <cstdint> +#include <optional> namespace llvm { @@ -37,7 +37,7 @@ public: /// that we can have more meaningful mnemonic than a 64-bit integer. Passing /// None to OpIdx means the index is unknown. virtual void printImm(raw_ostream &OS, const MachineInstr &MI, - Optional<unsigned> OpIdx, int64_t Imm) const { + std::optional<unsigned> OpIdx, int64_t Imm) const { OS << Imm; } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h index aa9891a80a32..e1606e7c0ea7 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h @@ -17,13 +17,11 @@ #ifndef LLVM_CODEGEN_MIRPARSER_MIRPARSER_H #define LLVM_CODEGEN_MIRPARSER_MIRPARSER_H -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" #include <functional> #include <memory> +#include <optional> namespace llvm { @@ -36,7 +34,7 @@ class MachineModuleInfo; class SMDiagnostic; class StringRef; -typedef llvm::function_ref<Optional<std::string>(StringRef)> +typedef llvm::function_ref<std::optional<std::string>(StringRef, StringRef)> DataLayoutCallbackTy; /// This class initializes machine functions by applying the state loaded from @@ -53,8 +51,9 @@ public: /// /// A new, empty module is created if the LLVM IR isn't present. /// \returns nullptr if a parsing error occurred. - std::unique_ptr<Module> parseIRModule( - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + std::unique_ptr<Module> + parseIRModule(DataLayoutCallbackTy DataLayoutCallback = + [](StringRef, StringRef) { return std::nullopt; }); /// Parses MachineFunctions in the MIR file and add them to the given /// MachineModuleInfo \p MMI. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRYamlMapping.h index 25247437b641..62911c2bd741 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -14,7 +14,6 @@ #ifndef LLVM_CODEGEN_MIRYAMLMAPPING_H #define LLVM_CODEGEN_MIRYAMLMAPPING_H -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" @@ -23,6 +22,7 @@ #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cstdint> +#include <optional> #include <string> #include <vector> @@ -245,11 +245,11 @@ struct MachineStackObject { ObjectType Type = DefaultType; int64_t Offset = 0; uint64_t Size = 0; - MaybeAlign Alignment = None; + MaybeAlign Alignment = std::nullopt; TargetStackID::Value StackID; StringValue CalleeSavedRegister; bool CalleeSavedRestored = true; - Optional<int64_t> LocalOffset; + std::optional<int64_t> LocalOffset; StringValue DebugVar; StringValue DebugExpr; StringValue DebugLoc; @@ -285,13 +285,14 @@ template <> struct MappingTraits<MachineStackObject> { YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); if (Object.Type != MachineStackObject::VariableSized) YamlIO.mapRequired("size", Object.Size); - YamlIO.mapOptional("alignment", Object.Alignment, None); + YamlIO.mapOptional("alignment", Object.Alignment, std::nullopt); YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default); YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored, true); - YamlIO.mapOptional("local-offset", Object.LocalOffset, Optional<int64_t>()); + YamlIO.mapOptional("local-offset", Object.LocalOffset, + std::optional<int64_t>()); YamlIO.mapOptional("debug-info-variable", Object.DebugVar, StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("debug-info-expression", Object.DebugExpr, @@ -311,7 +312,7 @@ struct FixedMachineStackObject { ObjectType Type = DefaultType; int64_t Offset = 0; uint64_t Size = 0; - MaybeAlign Alignment = None; + MaybeAlign Alignment = std::nullopt; TargetStackID::Value StackID; bool IsImmutable = false; bool IsAliased = false; @@ -361,7 +362,7 @@ template <> struct MappingTraits<FixedMachineStackObject> { FixedMachineStackObject::DefaultType); // Don't print the default type. YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); YamlIO.mapOptional("size", Object.Size, (uint64_t)0); - YamlIO.mapOptional("alignment", Object.Alignment, None); + YamlIO.mapOptional("alignment", Object.Alignment, std::nullopt); YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default); if (Object.Type != FixedMachineStackObject::SpillSlot) { YamlIO.mapOptional("isImmutable", Object.IsImmutable, false); @@ -521,7 +522,7 @@ namespace yaml { struct MachineConstantPoolValue { UnsignedValue ID; StringValue Value; - MaybeAlign Alignment = None; + MaybeAlign Alignment = std::nullopt; bool IsTargetSpecific = false; bool operator==(const MachineConstantPoolValue &Other) const { @@ -535,7 +536,7 @@ template <> struct MappingTraits<MachineConstantPoolValue> { static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) { YamlIO.mapRequired("id", Constant.ID); YamlIO.mapOptional("value", Constant.Value, StringValue()); - YamlIO.mapOptional("alignment", Constant.Alignment, None); + YamlIO.mapOptional("alignment", Constant.Alignment, std::nullopt); YamlIO.mapOptional("isTargetSpecific", Constant.IsTargetSpecific, false); } }; @@ -687,7 +688,7 @@ template <> struct MappingTraits<std::unique_ptr<MachineFunctionInfo>> { struct MachineFunction { StringRef Name; - MaybeAlign Alignment = None; + MaybeAlign Alignment = std::nullopt; bool ExposesReturnsTwice = false; // GISel MachineFunctionProperties. bool Legalized = false; @@ -706,9 +707,10 @@ struct MachineFunction { bool FailsVerification = false; bool TracksDebugUserValues = false; + bool UseDebugInstrRef = false; std::vector<VirtualRegisterDefinition> VirtualRegisters; std::vector<MachineFunctionLiveIn> LiveIns; - Optional<std::vector<FlowStringValue>> CalleeSavedRegisters; + std::optional<std::vector<FlowStringValue>> CalleeSavedRegisters; // TODO: Serialize the various register masks. // Frame information MachineFrameInfo FrameInfo; @@ -726,7 +728,7 @@ struct MachineFunction { template <> struct MappingTraits<MachineFunction> { static void mapping(IO &YamlIO, MachineFunction &MF) { YamlIO.mapRequired("name", MF.Name); - YamlIO.mapOptional("alignment", MF.Alignment, None); + YamlIO.mapOptional("alignment", MF.Alignment, std::nullopt); YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice, false); YamlIO.mapOptional("legalized", MF.Legalized, false); YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false); @@ -740,6 +742,7 @@ template <> struct MappingTraits<MachineFunction> { YamlIO.mapOptional("hasEHCatchret", MF.HasEHCatchret, false); YamlIO.mapOptional("hasEHScopes", MF.HasEHScopes, false); YamlIO.mapOptional("hasEHFunclets", MF.HasEHFunclets, false); + YamlIO.mapOptional("debugInstrRef", MF.UseDebugInstrRef, false); YamlIO.mapOptional("failsVerification", MF.FailsVerification, false); YamlIO.mapOptional("tracksDebugUserValues", MF.TracksDebugUserValues, @@ -749,7 +752,7 @@ template <> struct MappingTraits<MachineFunction> { YamlIO.mapOptional("liveins", MF.LiveIns, std::vector<MachineFunctionLiveIn>()); YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters, - Optional<std::vector<FlowStringValue>>()); + std::optional<std::vector<FlowStringValue>>()); YamlIO.mapOptional("frameInfo", MF.FrameInfo, MachineFrameInfo()); YamlIO.mapOptional("fixedStack", MF.FixedStackObjects, std::vector<FixedMachineStackObject>()); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h index ddfbd4018590..1ab24b554f5b 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -126,7 +126,7 @@ private: using const_probability_iterator = std::vector<BranchProbability>::const_iterator; - Optional<uint64_t> IrrLoopHeaderWeight; + std::optional<uint64_t> IrrLoopHeaderWeight; /// Keep track of the physical registers that are livein of the basicblock. using LiveInVector = std::vector<RegisterMaskPair>; @@ -143,9 +143,13 @@ private: /// Indicate that this basic block is entered via an exception handler. bool IsEHPad = false; - /// Indicate that this basic block is potentially the target of an indirect - /// branch. - bool AddressTaken = false; + /// Indicate that this MachineBasicBlock is referenced somewhere other than + /// as predecessor/successor, a terminator MachineInstr, or a jump table. + bool MachineBlockAddressTaken = false; + + /// If this MachineBasicBlock corresponds to an IR-level "blockaddress" + /// constant, this contains a pointer to that block. + BasicBlock *AddressTakenIRBlock = nullptr; /// Indicate that this basic block needs its symbol be emitted regardless of /// whether the flow just falls-through to it. @@ -165,6 +169,10 @@ private: /// Indicate that this basic block is the entry block of a cleanup funclet. bool IsCleanupFuncletEntry = false; + /// Fixed unique ID assigned to this basic block upon creation. Used with + /// basic block sections and basic block labels. + std::optional<unsigned> BBID; + /// With basic block sections, this stores the Section ID of the basic block. MBBSectionID SectionID{0}; @@ -216,12 +224,35 @@ public: /// Return a formatted string to identify this block and its parent function. std::string getFullName() const; - /// Test whether this block is potentially the target of an indirect branch. - bool hasAddressTaken() const { return AddressTaken; } + /// Test whether this block is used as as something other than the target + /// of a terminator, exception-handling target, or jump table. This is + /// either the result of an IR-level "blockaddress", or some form + /// of target-specific branch lowering. + bool hasAddressTaken() const { + return MachineBlockAddressTaken || AddressTakenIRBlock; + } + + /// Test whether this block is used as something other than the target of a + /// terminator, exception-handling target, jump table, or IR blockaddress. + /// For example, its address might be loaded into a register, or + /// stored in some branch table that isn't part of MachineJumpTableInfo. + bool isMachineBlockAddressTaken() const { return MachineBlockAddressTaken; } + + /// Test whether this block is the target of an IR BlockAddress. (There can + /// more than one MBB associated with an IR BB where the address is taken.) + bool isIRBlockAddressTaken() const { return AddressTakenIRBlock; } + + /// Retrieves the BasicBlock which corresponds to this MachineBasicBlock. + BasicBlock *getAddressTakenIRBlock() const { return AddressTakenIRBlock; } + + /// Set this block to indicate that its address is used as something other + /// than the target of a terminator, exception-handling target, jump table, + /// or IR-level "blockaddress". + void setMachineBlockAddressTaken() { MachineBlockAddressTaken = true; } - /// Set this block to reflect that it potentially is the target of an indirect - /// branch. - void setHasAddressTaken() { AddressTaken = true; } + /// Set this block to reflect that it corresponds to an IR-level basic block + /// with a BlockAddress. + void setAddressTakenIRBlock(BasicBlock *BB) { AddressTakenIRBlock = BB; } /// Test whether this block must have its label emitted. bool hasLabelMustBeEmitted() const { return LabelMustBeEmitted; } @@ -593,6 +624,14 @@ public: void setIsEndSection(bool V = true) { IsEndSection = V; } + std::optional<unsigned> getBBID() const { return BBID; } + + /// Returns the BBID of the block when BBAddrMapVersion >= 2, otherwise + /// returns `MachineBasicBlock::Number`. + /// TODO: Remove this function when version 1 is deprecated and replace its + /// uses with `getBBID()`. + unsigned getBBIDOrNumber() const; + /// Returns the section ID of this basic block. MBBSectionID getSectionID() const { return SectionID; } @@ -602,6 +641,12 @@ public: ((unsigned)SectionID.Type) + SectionID.Number; } + /// Sets the fixed BBID of this basic block. + void setBBID(unsigned V) { + assert(!BBID.has_value() && "Cannot change BBID."); + BBID = V; + } + /// Sets the section ID for this basic block. void setSectionID(MBBSectionID V) { SectionID = V; } @@ -748,10 +793,15 @@ public: /// Return the fallthrough block if the block can implicitly /// transfer control to the block after it by falling off the end of - /// it. This should return null if it can reach the block after - /// it, but it uses an explicit branch to do so (e.g., a table - /// jump). Non-null return is a conservative answer. - MachineBasicBlock *getFallThrough(); + /// it. If an explicit branch to the fallthrough block is not allowed, + /// set JumpToFallThrough to be false. Non-null return is a conservative + /// answer. + MachineBasicBlock *getFallThrough(bool JumpToFallThrough = false); + + /// Return the fallthrough block if the block can implicitly + /// transfer control to it's successor, whether by a branch or + /// a fallthrough. Non-null return is a conservative answer. + MachineBasicBlock *getLogicalFallThrough() { return getFallThrough(true); } /// Return true if the block can implicitly transfer control to the /// block after it by falling off the end of it. This should return @@ -788,6 +838,11 @@ public: /// instr_iterator instead. instr_iterator getFirstInstrTerminator(); + /// Finds the first terminator in a block by scanning forward. This can handle + /// cases in GlobalISel where there may be non-terminator instructions between + /// terminators, for which getFirstTerminator() will not work correctly. + iterator getFirstTerminatorForward(); + /// Returns an iterator to the first non-debug instruction in the basic block, /// or end(). Skip any pseudo probe operation if \c SkipPseudoOp is true. /// Pseudo probes are like debug instructions which do not turn into real @@ -1094,7 +1149,7 @@ public: /// Return the EHCatchret Symbol for this basic block. MCSymbol *getEHCatchretSymbol() const; - Optional<uint64_t> getIrrLoopHeaderWeight() const { + std::optional<uint64_t> getIrrLoopHeaderWeight() const { return IrrLoopHeaderWeight; } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index 6c442d3d07bd..2290d26c0827 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -13,11 +13,11 @@ #ifndef LLVM_CODEGEN_MACHINEBLOCKFREQUENCYINFO_H #define LLVM_CODEGEN_MACHINEBLOCKFREQUENCYINFO_H -#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/BlockFrequency.h" #include <cstdint> #include <memory> +#include <optional> namespace llvm { @@ -69,8 +69,9 @@ public: return getBlockFreq(MBB).getFrequency() * (1.0f / getEntryFreq()); } - Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const; - Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const; + std::optional<uint64_t> + getBlockProfileCount(const MachineBasicBlock *MBB) const; + std::optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const; bool isIrrLoopHeader(const MachineBasicBlock *MBB) const; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCFGPrinter.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCFGPrinter.h new file mode 100644 index 000000000000..ea3ff5a5c828 --- /dev/null +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCFGPrinter.h @@ -0,0 +1,92 @@ +//===-- MachineCFGPrinter.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CFGPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/DOTGraphTraits.h" + +namespace llvm { + +template <class GraphType> struct GraphTraits; +class DOTMachineFuncInfo { +private: + const MachineFunction *F; + +public: + DOTMachineFuncInfo(const MachineFunction *F) : F(F) {} + + const MachineFunction *getFunction() const { return this->F; } +}; + +template <> +struct GraphTraits<DOTMachineFuncInfo *> + : public GraphTraits<const MachineBasicBlock *> { + static NodeRef getEntryNode(DOTMachineFuncInfo *CFGInfo) { + return &(CFGInfo->getFunction()->front()); + } + + // nodes_iterator/begin/end - Allow iteration over all nodes in the graph + using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>; + + static nodes_iterator nodes_begin(DOTMachineFuncInfo *CFGInfo) { + return nodes_iterator(CFGInfo->getFunction()->begin()); + } + + static nodes_iterator nodes_end(DOTMachineFuncInfo *CFGInfo) { + return nodes_iterator(CFGInfo->getFunction()->end()); + } + + static size_t size(DOTMachineFuncInfo *CFGInfo) { + return CFGInfo->getFunction()->size(); + } +}; + +template <> +struct DOTGraphTraits<DOTMachineFuncInfo *> : public DefaultDOTGraphTraits { + + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + static void eraseComment(std::string &OutStr, unsigned &I, unsigned Idx) { + OutStr.erase(OutStr.begin() + I, OutStr.begin() + Idx); + --I; + } + + static std::string getSimpleNodeLabel(const MachineBasicBlock *Node, + DOTMachineFuncInfo *) { + return SimpleNodeLabelString(Node); + } + + static std::string getCompleteNodeLabel( + const MachineBasicBlock *Node, DOTMachineFuncInfo *, + function_ref<void(raw_string_ostream &, const MachineBasicBlock &)> + HandleBasicBlock = + [](raw_string_ostream &OS, + const MachineBasicBlock &Node) -> void { OS << Node; }, + function_ref<void(std::string &, unsigned &, unsigned)> + HandleComment = eraseComment) { + return CompleteNodeLabelString(Node, HandleBasicBlock, HandleComment); + } + + std::string getNodeLabel(const MachineBasicBlock *Node, + DOTMachineFuncInfo *CFGInfo) { + if (isSimple()) + return getSimpleNodeLabel(Node, CFGInfo); + + return getCompleteNodeLabel(Node, CFGInfo); + } + + static std::string getGraphName(DOTMachineFuncInfo *CFGInfo) { + return "Machine CFG for '" + CFGInfo->getFunction()->getName().str() + + "' function"; + } +}; +} // namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index 68c95679d466..39e70d583710 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -169,6 +169,12 @@ enum class MachineCombinerPattern { FMULv4i32_indexed_OP2, FMULv8i16_indexed_OP1, FMULv8i16_indexed_OP2, + + // RISCV FMADD, FMSUB, FNMSUB patterns + FMADD_AX, + FMADD_XA, + FMSUB, + FNMSUB, }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h index 3f89f2076d50..1c210632f9e1 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h @@ -15,9 +15,8 @@ #define LLVM_CODEGEN_MACHINECYCLEANALYSIS_H #include "llvm/ADT/GenericCycleInfo.h" -#include "llvm/CodeGen/MachineSSAContext.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/InitializePasses.h" +#include "llvm/CodeGen/MachineSSAContext.h" namespace llvm { diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 7ea731b46655..7d11d63d4066 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Support/Alignment.h" #include <cassert> #include <vector> @@ -486,14 +487,21 @@ public: return Objects[ObjectIdx + NumFixedObjects].Alignment; } + /// Should this stack ID be considered in MaxAlignment. + bool contributesToMaxAlignment(uint8_t StackID) { + return StackID == TargetStackID::Default || + StackID == TargetStackID::ScalableVector; + } + /// setObjectAlignment - Change the alignment of the specified stack object. void setObjectAlignment(int ObjectIdx, Align Alignment) { assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); Objects[ObjectIdx + NumFixedObjects].Alignment = Alignment; - // Only ensure max alignment for the default stack. - if (getStackID(ObjectIdx) == 0) + // Only ensure max alignment for the default and scalable vector stack. + uint8_t StackID = getStackID(ObjectIdx); + if (contributesToMaxAlignment(StackID)) ensureMaxAlignment(Alignment); } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h index fc1188186ac4..220d18d15821 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h @@ -99,9 +99,10 @@ struct MachineFunctionInfo { /// supplied allocator. /// /// This function can be overridden in a derive class. - template<typename Ty> - static Ty *create(BumpPtrAllocator &Allocator, MachineFunction &MF) { - return new (Allocator.Allocate<Ty>()) Ty(MF); + template <typename FuncInfoTy, typename SubtargetTy = TargetSubtargetInfo> + static FuncInfoTy *create(BumpPtrAllocator &Allocator, const Function &F, + const SubtargetTy *STI) { + return new (Allocator.Allocate<FuncInfoTy>()) FuncInfoTy(F, STI); } template <typename Ty> @@ -280,6 +281,7 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { // Keep track of the function section. MCSection *Section = nullptr; + // Catchpad unwind destination info for wasm EH. // Keeps track of Wasm exception handling related data. This will be null for // functions that aren't using a wasm EH personality. WasmEHFuncInfo *WasmEHInfo = nullptr; @@ -373,6 +375,9 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { bool HasEHScopes = false; bool HasEHFunclets = false; + /// BBID to assign to the next basic block of this function. + unsigned NextBBID = 0; + /// Section Type for basic blocks, only relevant with basic block sections. BasicBlockSection BBSectionsType = BasicBlockSection::None; @@ -522,6 +527,10 @@ public: /// during register allocation. See DebugPHIRegallocPos. DenseMap<unsigned, DebugPHIRegallocPos> DebugPHIPositions; + /// Flag for whether this function contains DBG_VALUEs (false) or + /// DBG_INSTR_REF (true). + bool UseDebugInstrRef = false; + /// Create a substitution between one <instr,operand> value to a different, /// new value. void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, @@ -562,10 +571,17 @@ public: /// (or DBG_PHI). void finalizeDebugInstrRefs(); - /// Returns true if the function's variable locations should be tracked with + /// Determine whether, in the current machine configuration, we should use + /// instruction referencing or not. + bool shouldUseDebugInstrRef() const; + + /// Returns true if the function's variable locations are tracked with /// instruction referencing. bool useDebugInstrRef() const; + /// Set whether this function will use instruction referencing or not. + void setUseDebugInstrRef(bool UseInstrRef); + /// A reserved operand number representing the instructions memory operand, /// for instructions that have a stack spill fused into them. const static unsigned int DebugOperandMemNumber; @@ -752,14 +768,12 @@ public: /// template<typename Ty> Ty *getInfo() { - if (!MFInfo) - MFInfo = Ty::template create<Ty>(Allocator, *this); return static_cast<Ty*>(MFInfo); } template<typename Ty> const Ty *getInfo() const { - return const_cast<MachineFunction*>(this)->getInfo<Ty>(); + return static_cast<const Ty *>(MFInfo); } template <typename Ty> Ty *cloneInfo(const Ty &Old) { @@ -768,6 +782,9 @@ public: return static_cast<Ty *>(MFInfo); } + /// Initialize the target specific MachineFunctionInfo + void initTargetMachineFunctionInfo(const TargetSubtargetInfo &STI); + MachineFunctionInfo *cloneInfoFrom( const MachineFunction &OrigMF, const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) { @@ -1030,7 +1047,8 @@ public: /// the function. MachineInstr::ExtraInfo *createMIExtraInfo( ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol = nullptr, - MCSymbol *PostInstrSymbol = nullptr, MDNode *HeapAllocMarker = nullptr); + MCSymbol *PostInstrSymbol = nullptr, MDNode *HeapAllocMarker = nullptr, + MDNode *PCSections = nullptr, uint32_t CFIType = 0); /// Allocate a string and populate it with the given external symbol name. const char *createExternalSymbolName(StringRef Name); @@ -1055,7 +1073,7 @@ public: return FrameInstructions; } - LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst); + [[nodiscard]] unsigned addFrameInst(const MCCFIInstruction &Inst); /// Returns a reference to a list of symbols immediately following calls to /// _setjmp in the function. Used to construct the longjmp target table used @@ -1101,10 +1119,6 @@ public: /// Find or create an LandingPadInfo for the specified MachineBasicBlock. LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad); - /// Remap landing pad labels and remove any deleted landing pads. - void tidyLandingPads(DenseMap<MCSymbol *, uintptr_t> *LPMap = nullptr, - bool TidyIfNoBeginLabels = true); - /// Return a reference to the landing pad info for the current function. const std::vector<LandingPadInfo> &getLandingPads() const { return LandingPads; @@ -1120,22 +1134,11 @@ public: /// entry. MCSymbol *addLandingPad(MachineBasicBlock *LandingPad); - /// Provide the catch typeinfo for a landing pad. - void addCatchTypeInfo(MachineBasicBlock *LandingPad, - ArrayRef<const GlobalValue *> TyInfo); - - /// Provide the filter typeinfo for a landing pad. - void addFilterTypeInfo(MachineBasicBlock *LandingPad, - ArrayRef<const GlobalValue *> TyInfo); - - /// Add a cleanup action for a landing pad. - void addCleanup(MachineBasicBlock *LandingPad); - /// Return the type id for the specified typeinfo. This is function wide. unsigned getTypeIDFor(const GlobalValue *TI); /// Return the id of the filter encoded by TyIds. This is function wide. - int getFilterIDFor(std::vector<unsigned> &TyIds); + int getFilterIDFor(ArrayRef<unsigned> TyIds); /// Map the landing pad's EH symbol to the call site indexes. void setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h index 5f483a8d0312..272360e12372 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h @@ -144,24 +144,28 @@ private: /// /// This has to be defined eagerly due to the implementation constraints of /// `PointerSumType` where it is used. - class ExtraInfo final - : TrailingObjects<ExtraInfo, MachineMemOperand *, MCSymbol *, MDNode *> { + class ExtraInfo final : TrailingObjects<ExtraInfo, MachineMemOperand *, + MCSymbol *, MDNode *, uint32_t> { public: static ExtraInfo *create(BumpPtrAllocator &Allocator, ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol = nullptr, MCSymbol *PostInstrSymbol = nullptr, - MDNode *HeapAllocMarker = nullptr) { + MDNode *HeapAllocMarker = nullptr, + MDNode *PCSections = nullptr, + uint32_t CFIType = 0) { bool HasPreInstrSymbol = PreInstrSymbol != nullptr; bool HasPostInstrSymbol = PostInstrSymbol != nullptr; bool HasHeapAllocMarker = HeapAllocMarker != nullptr; + bool HasCFIType = CFIType != 0; + bool HasPCSections = PCSections != nullptr; auto *Result = new (Allocator.Allocate( - totalSizeToAlloc<MachineMemOperand *, MCSymbol *, MDNode *>( + totalSizeToAlloc<MachineMemOperand *, MCSymbol *, MDNode *, uint32_t>( MMOs.size(), HasPreInstrSymbol + HasPostInstrSymbol, - HasHeapAllocMarker), + HasHeapAllocMarker + HasPCSections, HasCFIType), alignof(ExtraInfo))) ExtraInfo(MMOs.size(), HasPreInstrSymbol, HasPostInstrSymbol, - HasHeapAllocMarker); + HasHeapAllocMarker, HasPCSections, HasCFIType); // Copy the actual data into the trailing objects. std::copy(MMOs.begin(), MMOs.end(), @@ -174,12 +178,17 @@ private: PostInstrSymbol; if (HasHeapAllocMarker) Result->getTrailingObjects<MDNode *>()[0] = HeapAllocMarker; + if (HasPCSections) + Result->getTrailingObjects<MDNode *>()[HasHeapAllocMarker] = + PCSections; + if (HasCFIType) + Result->getTrailingObjects<uint32_t>()[0] = CFIType; return Result; } ArrayRef<MachineMemOperand *> getMMOs() const { - return makeArrayRef(getTrailingObjects<MachineMemOperand *>(), NumMMOs); + return ArrayRef(getTrailingObjects<MachineMemOperand *>(), NumMMOs); } MCSymbol *getPreInstrSymbol() const { @@ -196,6 +205,16 @@ private: return HasHeapAllocMarker ? getTrailingObjects<MDNode *>()[0] : nullptr; } + MDNode *getPCSections() const { + return HasPCSections + ? getTrailingObjects<MDNode *>()[HasHeapAllocMarker] + : nullptr; + } + + uint32_t getCFIType() const { + return HasCFIType ? getTrailingObjects<uint32_t>()[0] : 0; + } + private: friend TrailingObjects; @@ -208,6 +227,8 @@ private: const bool HasPreInstrSymbol; const bool HasPostInstrSymbol; const bool HasHeapAllocMarker; + const bool HasPCSections; + const bool HasCFIType; // Implement the `TrailingObjects` internal API. size_t numTrailingObjects(OverloadToken<MachineMemOperand *>) const { @@ -217,16 +238,20 @@ private: return HasPreInstrSymbol + HasPostInstrSymbol; } size_t numTrailingObjects(OverloadToken<MDNode *>) const { - return HasHeapAllocMarker; + return HasHeapAllocMarker + HasPCSections; + } + size_t numTrailingObjects(OverloadToken<uint32_t>) const { + return HasCFIType; } // Just a boring constructor to allow us to initialize the sizes. Always use // the `create` routine above. ExtraInfo(int NumMMOs, bool HasPreInstrSymbol, bool HasPostInstrSymbol, - bool HasHeapAllocMarker) + bool HasHeapAllocMarker, bool HasPCSections, bool HasCFIType) : NumMMOs(NumMMOs), HasPreInstrSymbol(HasPreInstrSymbol), HasPostInstrSymbol(HasPostInstrSymbol), - HasHeapAllocMarker(HasHeapAllocMarker) {} + HasHeapAllocMarker(HasHeapAllocMarker), HasPCSections(HasPCSections), + HasCFIType(HasCFIType) {} }; /// Enumeration of the kinds of inline extra info available. It is important @@ -567,7 +592,7 @@ public: /// Returns the total number of definitions. unsigned getNumDefs() const { - return getNumExplicitDefs() + MCID->getNumImplicitDefs(); + return getNumExplicitDefs() + MCID->implicit_defs().size(); } /// Returns true if the instruction has implicit definition. @@ -636,17 +661,17 @@ public: /// Returns a range over all operands that are used to determine the variable /// location for this DBG_VALUE instruction. iterator_range<mop_iterator> debug_operands() { - assert(isDebugValue() && "Must be a debug value instruction."); - return isDebugValueList() - ? make_range(operands_begin() + 2, operands_end()) - : make_range(operands_begin(), operands_begin() + 1); + assert((isDebugValueLike()) && "Must be a debug value instruction."); + return isNonListDebugValue() + ? make_range(operands_begin(), operands_begin() + 1) + : make_range(operands_begin() + 2, operands_end()); } /// \copydoc debug_operands() iterator_range<const_mop_iterator> debug_operands() const { - assert(isDebugValue() && "Must be a debug value instruction."); - return isDebugValueList() - ? make_range(operands_begin() + 2, operands_end()) - : make_range(operands_begin(), operands_begin() + 1); + assert((isDebugValueLike()) && "Must be a debug value instruction."); + return isNonListDebugValue() + ? make_range(operands_begin(), operands_begin() + 1) + : make_range(operands_begin() + 2, operands_end()); } /// Returns a range over all explicit operands that are register definitions. /// Implicit definition are not included! @@ -690,7 +715,7 @@ public: return {}; if (Info.is<EIIK_MMO>()) - return makeArrayRef(Info.getAddrOfZeroTagPointer(), 1); + return ArrayRef(Info.getAddrOfZeroTagPointer(), 1); if (ExtraInfo *EI = Info.get<EIIK_OutOfLine>()) return EI->getMMOs(); @@ -757,6 +782,26 @@ public: return nullptr; } + /// Helper to extract PCSections metadata target sections. + MDNode *getPCSections() const { + if (!Info) + return nullptr; + if (ExtraInfo *EI = Info.get<EIIK_OutOfLine>()) + return EI->getPCSections(); + + return nullptr; + } + + /// Helper to extract a CFI type hash if one has been added. + uint32_t getCFIType() const { + if (!Info) + return 0; + if (ExtraInfo *EI = Info.get<EIIK_OutOfLine>()) + return EI->getCFIType(); + + return 0; + } + /// API for querying MachineInstr properties. They are the same as MCInstrDesc /// queries but they are bundle aware. @@ -923,6 +968,8 @@ public: /// For example, if the instruction has a unique labels attached /// to it, duplicating it would cause multiple definition errors. bool isNotDuplicable(QueryType Type = AnyInBundle) const { + if (getPreInstrSymbol() || getPostInstrSymbol()) + return true; return hasProperty(MCID::NotDuplicable, Type); } @@ -1156,6 +1203,13 @@ public: bool isIdenticalTo(const MachineInstr &Other, MICheckType Check = CheckDefs) const; + /// Returns true if this instruction is a debug instruction that represents an + /// identical debug value to \p Other. + /// This function considers these debug instructions equivalent if they have + /// identical variables, debug locations, and debug operands, and if the + /// DIExpressions combined with the directness flags are equivalent. + bool isEquivalentDbgInstr(const MachineInstr &Other) const; + /// Unlink 'this' from the containing basic block, and return it without /// deleting it. /// @@ -1216,6 +1270,7 @@ public: } bool isDebugLabel() const { return getOpcode() == TargetOpcode::DBG_LABEL; } bool isDebugRef() const { return getOpcode() == TargetOpcode::DBG_INSTR_REF; } + bool isDebugValueLike() const { return isDebugValue() || isDebugRef(); } bool isDebugPHI() const { return getOpcode() == TargetOpcode::DBG_PHI; } bool isDebugInstr() const { return isDebugValue() || isDebugLabel() || isDebugRef() || isDebugPHI(); @@ -1639,16 +1694,16 @@ public: bool allDefsAreDead() const; /// Return a valid size if the instruction is a spill instruction. - Optional<unsigned> getSpillSize(const TargetInstrInfo *TII) const; + std::optional<unsigned> getSpillSize(const TargetInstrInfo *TII) const; /// Return a valid size if the instruction is a folded spill instruction. - Optional<unsigned> getFoldedSpillSize(const TargetInstrInfo *TII) const; + std::optional<unsigned> getFoldedSpillSize(const TargetInstrInfo *TII) const; /// Return a valid size if the instruction is a restore instruction. - Optional<unsigned> getRestoreSize(const TargetInstrInfo *TII) const; + std::optional<unsigned> getRestoreSize(const TargetInstrInfo *TII) const; /// Return a valid size if the instruction is a folded restore instruction. - Optional<unsigned> + std::optional<unsigned> getFoldedRestoreSize(const TargetInstrInfo *TII) const; /// Copy implicit register operands from specified @@ -1786,6 +1841,13 @@ public: /// instruction is removed or duplicated. void setHeapAllocMarker(MachineFunction &MF, MDNode *MD); + // Set metadata on instructions that say which sections to emit instruction + // addresses into. + void setPCSections(MachineFunction &MF, MDNode *MD); + + /// Set the CFI type for the instruction. + void setCFIType(MachineFunction &MF, uint32_t Type); + /// Return the MIFlags which represent both MachineInstrs. This /// should be used when merging two MachineInstrs into one. This routine does /// not modify the MIFlags of this MachineInstr. @@ -1864,7 +1926,8 @@ private: /// based on the number of pointers. void setExtraInfo(MachineFunction &MF, ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol, - MDNode *HeapAllocMarker); + MDNode *HeapAllocMarker, MDNode *PCSections, + uint32_t CFIType); }; /// Special DenseMapInfo traits to compare MachineInstr* by *value* of the diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index 80f30231aef2..c35bdc0c2b44 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -235,8 +235,8 @@ public: const MachineInstrBuilder &addMetadata(const MDNode *MD) const { MI->addOperand(*MF, MachineOperand::CreateMetadata(MD)); - assert((MI->isDebugValue() ? static_cast<bool>(MI->getDebugVariable()) - : true) && + assert((MI->isDebugValueLike() ? static_cast<bool>(MI->getDebugVariable()) + : true) && "first MDNode argument of a DBG_VALUE not a variable"); assert((MI->isDebugLabel() ? static_cast<bool>(MI->getDebugLabel()) : true) && @@ -310,6 +310,12 @@ public: } } + const MachineInstrBuilder &setPCSections(MDNode *MD) const { + if (MD) + MI->setPCSections(*MF, MD); + return *this; + } + /// Copy all the implicit operands from OtherMI onto this one. const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const { @@ -324,17 +330,42 @@ public: } }; +/// Set of metadata that should be preserved when using BuildMI(). This provides +/// a more convenient way of preserving DebugLoc and PCSections. +class MIMetadata { +public: + MIMetadata() = default; + MIMetadata(DebugLoc DL, MDNode *PCSections = nullptr) + : DL(std::move(DL)), PCSections(PCSections) {} + MIMetadata(const DILocation *DI, MDNode *PCSections = nullptr) + : DL(DI), PCSections(PCSections) {} + explicit MIMetadata(const Instruction &From) + : DL(From.getDebugLoc()), + PCSections(From.getMetadata(LLVMContext::MD_pcsections)) {} + explicit MIMetadata(const MachineInstr &From) + : DL(From.getDebugLoc()), PCSections(From.getPCSections()) {} + + const DebugLoc &getDL() const { return DL; } + MDNode *getPCSections() const { return PCSections; } + +private: + DebugLoc DL; + MDNode *PCSections = nullptr; +}; + /// Builder interface. Specify how to create the initial instruction itself. -inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, +inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID) { - return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL)); + return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, MIMD.getDL())) + .setPCSections(MIMD.getPCSections()); } /// This version of the builder sets up the first operand as a /// destination virtual register. -inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, +inline MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID, Register DestReg) { - return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL)) + return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, MIMD.getDL())) + .setPCSections(MIMD.getPCSections()) .addReg(DestReg, RegState::Define); } @@ -343,12 +374,14 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, /// operand as a destination virtual register. inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, - const DebugLoc &DL, const MCInstrDesc &MCID, - Register DestReg) { + const MIMetadata &MIMD, + const MCInstrDesc &MCID, Register DestReg) { MachineFunction &MF = *BB.getParent(); - MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); + MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI).addReg(DestReg, RegState::Define); + return MachineInstrBuilder(MF, MI) + .setPCSections(MIMD.getPCSections()) + .addReg(DestReg, RegState::Define); } /// This version of the builder inserts the newly-built instruction before @@ -359,28 +392,31 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, /// added to the same bundle. inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::instr_iterator I, - const DebugLoc &DL, const MCInstrDesc &MCID, - Register DestReg) { + const MIMetadata &MIMD, + const MCInstrDesc &MCID, Register DestReg) { MachineFunction &MF = *BB.getParent(); - MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); + MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI).addReg(DestReg, RegState::Define); + return MachineInstrBuilder(MF, MI) + .setPCSections(MIMD.getPCSections()) + .addReg(DestReg, RegState::Define); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, - const DebugLoc &DL, const MCInstrDesc &MCID, - Register DestReg) { + const MIMetadata &MIMD, + const MCInstrDesc &MCID, Register DestReg) { // Calling the overload for instr_iterator is always correct. However, the // definition is not available in headers, so inline the check. if (I.isInsideBundle()) - return BuildMI(BB, MachineBasicBlock::instr_iterator(I), DL, MCID, DestReg); - return BuildMI(BB, MachineBasicBlock::iterator(I), DL, MCID, DestReg); + return BuildMI(BB, MachineBasicBlock::instr_iterator(I), MIMD, MCID, + DestReg); + return BuildMI(BB, MachineBasicBlock::iterator(I), MIMD, MCID, DestReg); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr *I, - const DebugLoc &DL, const MCInstrDesc &MCID, - Register DestReg) { - return BuildMI(BB, *I, DL, MCID, DestReg); + const MIMetadata &MIMD, + const MCInstrDesc &MCID, Register DestReg) { + return BuildMI(BB, *I, MIMD, MCID, DestReg); } /// This version of the builder inserts the newly-built instruction before the @@ -388,53 +424,55 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr *I, /// destination register. inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, - const DebugLoc &DL, + const MIMetadata &MIMD, const MCInstrDesc &MCID) { MachineFunction &MF = *BB.getParent(); - MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); + MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI); + return MachineInstrBuilder(MF, MI).setPCSections(MIMD.getPCSections()); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::instr_iterator I, - const DebugLoc &DL, + const MIMetadata &MIMD, const MCInstrDesc &MCID) { MachineFunction &MF = *BB.getParent(); - MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); + MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insert(I, MI); - return MachineInstrBuilder(MF, MI); + return MachineInstrBuilder(MF, MI).setPCSections(MIMD.getPCSections()); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, - const DebugLoc &DL, + const MIMetadata &MIMD, const MCInstrDesc &MCID) { // Calling the overload for instr_iterator is always correct. However, the // definition is not available in headers, so inline the check. if (I.isInsideBundle()) - return BuildMI(BB, MachineBasicBlock::instr_iterator(I), DL, MCID); - return BuildMI(BB, MachineBasicBlock::iterator(I), DL, MCID); + return BuildMI(BB, MachineBasicBlock::instr_iterator(I), MIMD, MCID); + return BuildMI(BB, MachineBasicBlock::iterator(I), MIMD, MCID); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr *I, - const DebugLoc &DL, + const MIMetadata &MIMD, const MCInstrDesc &MCID) { - return BuildMI(BB, *I, DL, MCID); + return BuildMI(BB, *I, MIMD, MCID); } /// This version of the builder inserts the newly-built instruction at the end /// of the given MachineBasicBlock, and does NOT take a destination register. -inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, +inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, + const MIMetadata &MIMD, const MCInstrDesc &MCID) { - return BuildMI(*BB, BB->end(), DL, MCID); + return BuildMI(*BB, BB->end(), MIMD, MCID); } /// This version of the builder inserts the newly-built instruction at the /// end of the given MachineBasicBlock, and sets up the first operand as a /// destination virtual register. -inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, +inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, + const MIMetadata &MIMD, const MCInstrDesc &MCID, Register DestReg) { - return BuildMI(*BB, BB->end(), DL, MCID, DestReg); + return BuildMI(*BB, BB->end(), MIMD, MCID, DestReg); } /// This version of the builder builds a DBG_VALUE intrinsic @@ -446,13 +484,6 @@ MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, Register Reg, const MDNode *Variable, const MDNode *Expr); -/// This version of the builder builds a DBG_VALUE intrinsic -/// for a MachineOperand. -MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, - const MCInstrDesc &MCID, bool IsIndirect, - const MachineOperand &MO, const MDNode *Variable, - const MDNode *Expr); - /// This version of the builder builds a DBG_VALUE or DBG_VALUE_LIST intrinsic /// for a MachineOperand. MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, @@ -469,16 +500,8 @@ MachineInstrBuilder BuildMI(MachineBasicBlock &BB, Register Reg, const MDNode *Variable, const MDNode *Expr); -/// This version of the builder builds a DBG_VALUE intrinsic -/// for a machine operand and inserts it at position I. -MachineInstrBuilder BuildMI(MachineBasicBlock &BB, - MachineBasicBlock::iterator I, const DebugLoc &DL, - const MCInstrDesc &MCID, bool IsIndirect, - MachineOperand &MO, const MDNode *Variable, - const MDNode *Expr); - -/// This version of the builder builds a DBG_VALUE or DBG_VALUE_LIST intrinsic -/// for a machine operand and inserts it at position I. +/// This version of the builder builds a DBG_VALUE, DBG_INSTR_REF, or +/// DBG_VALUE_LIST intrinsic for a machine operand and inserts it at position I. MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, @@ -532,7 +555,7 @@ inline unsigned getRegState(const MachineOperand &RegOp) { getUndefRegState(RegOp.isUndef()) | getInternalReadRegState(RegOp.isInternalRead()) | getDebugRegState(RegOp.isDebug()) | - getRenamableRegState(Register::isPhysicalRegister(RegOp.getReg()) && + getRenamableRegState(RegOp.getReg().isPhysical() && RegOp.isRenamable()); } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h index 61240924e5e1..ea07e365d465 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -34,6 +34,7 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/IR/PassManager.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" #include <memory> #include <utility> @@ -45,7 +46,6 @@ class Function; class LLVMTargetMachine; class MachineFunction; class Module; -class MCSymbol; //===----------------------------------------------------------------------===// /// This class can be derived from and used by targets to hold private @@ -94,10 +94,6 @@ class MachineModuleInfo { /// \name Exception Handling /// \{ - /// Vector of all personality functions ever seen. Used to emit common EH - /// frames. - std::vector<const Function *> Personalities; - /// The current call site index being processed, if any. 0 if none. unsigned CurCallSite; @@ -195,13 +191,6 @@ public: /// none. unsigned getCurrentCallSite() { return CurCallSite; } - /// Provide the personality function for the exception information. - void addPersonality(const Function *Personality); - - /// Return array of personality functions ever seen. - const std::vector<const Function *>& getPersonalities() const { - return Personalities; - } /// \} // MMI owes MCContext. It should never be invalidated. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h index c88e72cdc1d9..75710a4542e4 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h @@ -68,7 +68,8 @@ public: MO_IntrinsicID, ///< Intrinsic ID for ISel MO_Predicate, ///< Generic predicate for ISel MO_ShuffleMask, ///< Other IR Constant for ISel (shuffle masks) - MO_Last = MO_ShuffleMask + MO_DbgInstrRef, ///< Integer indices referring to an instruction+operand + MO_Last = MO_DbgInstrRef }; private: @@ -184,6 +185,11 @@ private: MachineOperand *Next; } Reg; + struct { // For MO_DbgInstrRef. + unsigned InstrIdx; + unsigned OpIdx; + } InstrRef; + /// OffsetedInfo - This struct contains the offset and an object identifier. /// this represent the object as with an optional offset from it. struct { @@ -281,8 +287,8 @@ public: /// \param TypeToPrint specifies the generic type to be printed on uses and /// defs. It can be determined using MachineInstr::getTypeToPrint. /// \param OpIdx - specifies the index of the operand in machine instruction. - /// This will be used by target dependent MIR formatter. Could be None if the - /// index is unknown, e.g. called by dump(). + /// This will be used by target dependent MIR formatter. Could be std::nullopt + /// if the index is unknown, e.g. called by dump(). /// \param PrintDef - whether we want to print `def` on an operand which /// isDef. Sometimes, if the operand is printed before '=', we don't print /// `def`. @@ -299,7 +305,7 @@ public: /// information from it's parent. /// \param IntrinsicInfo - same as \p TRI. void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint, - Optional<unsigned> OpIdx, bool PrintDef, bool IsStandalone, + std::optional<unsigned> OpIdx, bool PrintDef, bool IsStandalone, bool ShouldPrintRegisterTies, unsigned TiedOperandIdx, const TargetRegisterInfo *TRI, const TargetIntrinsicInfo *IntrinsicInfo) const; @@ -347,6 +353,7 @@ public: /// isMetadata - Tests if this is a MO_Metadata operand. bool isMetadata() const { return OpKind == MO_Metadata; } bool isMCSymbol() const { return OpKind == MO_MCSymbol; } + bool isDbgInstrRef() const { return OpKind == MO_DbgInstrRef; } bool isCFIIndex() const { return OpKind == MO_CFIIndex; } bool isIntrinsicID() const { return OpKind == MO_IntrinsicID; } bool isPredicate() const { return OpKind == MO_Predicate; } @@ -584,6 +591,16 @@ public: return Contents.Sym; } + unsigned getInstrRefInstrIndex() const { + assert(isDbgInstrRef() && "Wrong MachineOperand accessor"); + return Contents.InstrRef.InstrIdx; + } + + unsigned getInstrRefOpIndex() const { + assert(isDbgInstrRef() && "Wrong MachineOperand accessor"); + return Contents.InstrRef.OpIdx; + } + unsigned getCFIIndex() const { assert(isCFIIndex() && "Wrong MachineOperand accessor"); return Contents.CFIIndex; @@ -695,6 +712,15 @@ public: Contents.MD = MD; } + void setInstrRefInstrIndex(unsigned InstrIdx) { + assert(isDbgInstrRef() && "Wrong MachineOperand mutator"); + Contents.InstrRef.InstrIdx = InstrIdx; + } + void setInstrRefOpIndex(unsigned OpIdx) { + assert(isDbgInstrRef() && "Wrong MachineOperand mutator"); + Contents.InstrRef.OpIdx = OpIdx; + } + void setMBB(MachineBasicBlock *MBB) { assert(isMBB() && "Wrong MachineOperand mutator"); Contents.MBB = MBB; @@ -763,6 +789,10 @@ public: void ChangeToTargetIndex(unsigned Idx, int64_t Offset, unsigned TargetFlags = 0); + /// Replace this operand with an Instruction Reference. + void ChangeToDbgInstrRef(unsigned InstrIdx, unsigned OpIdx, + unsigned TargetFlags = 0); + /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. @@ -919,6 +949,13 @@ public: return Op; } + static MachineOperand CreateDbgInstrRef(unsigned InstrIdx, unsigned OpIdx) { + MachineOperand Op(MachineOperand::MO_DbgInstrRef); + Op.Contents.InstrRef.InstrIdx = InstrIdx; + Op.Contents.InstrRef.OpIdx = OpIdx; + return Op; + } + static MachineOperand CreateCFIIndex(unsigned CFIIndex) { MachineOperand Op(MachineOperand::MO_CFIIndex); Op.Contents.CFIIndex = CFIIndex; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h index cb0998984dfb..2b177e6763d3 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" +#include <optional> namespace llvm { class MachineBasicBlock; @@ -201,7 +202,7 @@ private: /// Compute hotness from IR value (currently assumed to be a block) if PGO is /// available. - Optional<uint64_t> computeHotness(const MachineBasicBlock &MBB); + std::optional<uint64_t> computeHotness(const MachineBasicBlock &MBB); /// Similar but use value from \p OptDiag and update hotness there. void computeHotness(DiagnosticInfoMIROptimization &Remark); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassRegistry.def index 7748055f5d35..8efd1d2e95e9 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -43,6 +43,8 @@ FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ()) FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) +FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, ()) +FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, ()) FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ()) FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) @@ -149,6 +151,7 @@ DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ()) DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ()) DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ()) DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass, ()) DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ()) DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ()) DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ()) @@ -158,6 +161,7 @@ DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass, () DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass, ()) DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass, ()) DUMMY_MACHINE_FUNCTION_PASS("removeredundantdebugvalues", RemoveRedundantDebugValuesPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter, ()) DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass, ()) DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass, ()) DUMMY_MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass, ()) @@ -200,4 +204,7 @@ DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ()) DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ()) DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ()) DUMMY_MACHINE_FUNCTION_PASS("print-machine-cycles", MachineCycleInfoPrinterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadata, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-uniformity", MachineUniformityInfoWrapperPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("print-machine-uniformity", MachineUniformityInfoPrinterPass, ()) #undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h index fc1cc0a879ca..c0d05ae73c5a 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h @@ -41,6 +41,7 @@ #define LLVM_CODEGEN_MACHINEPIPELINER_H #include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -58,6 +59,7 @@ class NodeSet; class SMSchedule; extern cl::opt<bool> SwpEnableCopyToPhi; +extern cl::opt<int> SwpForceIssueWidth; /// The main class in the implementation of the target independent /// software pipeliner pass. @@ -444,46 +446,80 @@ class ResourceManager { private: const MCSubtargetInfo *STI; const MCSchedModel &SM; + const TargetSubtargetInfo *ST; + const TargetInstrInfo *TII; + SwingSchedulerDAG *DAG; const bool UseDFA; - std::unique_ptr<DFAPacketizer> DFAResources; + /// DFA resources for each slot + llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources; + /// Modulo Reservation Table. When a resource with ID R is consumed in cycle + /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F) + llvm::SmallVector<llvm::SmallVector<uint64_t, DefaultProcResSize>> MRT; + /// The number of scheduled micro operations for each slot. Micro operations + /// are assumed to be scheduled one per cycle, starting with the cycle in + /// which the instruction is scheduled. + llvm::SmallVector<int> NumScheduledMops; /// Each processor resource is associated with a so-called processor resource /// mask. This vector allows to correlate processor resource IDs with /// processor resource masks. There is exactly one element per each processor /// resource declared by the scheduling model. llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks; + int InitiationInterval; + /// The number of micro operations that can be scheduled at a cycle. + int IssueWidth; + + int calculateResMIIDFA() const; + /// Check if MRT is overbooked + bool isOverbooked() const; + /// Reserve resources on MRT + void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle); + /// Unreserve resources on MRT + void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle); + + /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor. + /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C, + /// II). + int positiveModulo(int Dividend, int Divisor) const { + assert(Divisor > 0); + int R = Dividend % Divisor; + if (R < 0) + R += Divisor; + return R; + } - llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceCount; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dumpMRT() const; +#endif public: - ResourceManager(const TargetSubtargetInfo *ST) - : STI(ST), SM(ST->getSchedModel()), UseDFA(ST->useDFAforSMS()), + ResourceManager(const TargetSubtargetInfo *ST, SwingSchedulerDAG *DAG) + : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()), + DAG(DAG), UseDFA(ST->useDFAforSMS()), ProcResourceMasks(SM.getNumProcResourceKinds(), 0), - ProcResourceCount(SM.getNumProcResourceKinds(), 0) { - if (UseDFA) - DFAResources.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST)); + IssueWidth(SM.IssueWidth) { initProcResourceVectors(SM, ProcResourceMasks); + if (IssueWidth <= 0) + // If IssueWidth is not specified, set a sufficiently large value + IssueWidth = 100; + if (SwpForceIssueWidth > 0) + IssueWidth = SwpForceIssueWidth; } void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks); - /// Check if the resources occupied by a MCInstrDesc are available in - /// the current state. - bool canReserveResources(const MCInstrDesc *MID) const; - - /// Reserve the resources occupied by a MCInstrDesc and change the current - /// state to reflect that change. - void reserveResources(const MCInstrDesc *MID); /// Check if the resources occupied by a machine instruction are available /// in the current state. - bool canReserveResources(const MachineInstr &MI) const; + bool canReserveResources(SUnit &SU, int Cycle); /// Reserve the resources occupied by a machine instruction and change the /// current state to reflect that change. - void reserveResources(const MachineInstr &MI); + void reserveResources(SUnit &SU, int Cycle); + + int calculateResMII() const; - /// Reset the state - void clearResources(); + /// Initialize resources with the initiation interval II. + void init(int II); }; /// This class represents the scheduled code. The main data structure is a @@ -521,8 +557,9 @@ private: ResourceManager ProcItinResources; public: - SMSchedule(MachineFunction *mf) - : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), ProcItinResources(&ST) {} + SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG) + : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), + ProcItinResources(&ST, DAG) {} void reset() { ScheduledInstrs.clear(); @@ -533,7 +570,10 @@ public: } /// Set the initiation interval for this schedule. - void setInitiationInterval(int ii) { InitiationInterval = ii; } + void setInitiationInterval(int ii) { + InitiationInterval = ii; + ProcItinResources.init(ii); + } /// Return the initiation interval for this schedule. int getInitiationInterval() const { return InitiationInterval; } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index b2c5f12106af..7f0c24e4e115 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -17,6 +17,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/iterator_range.h" @@ -56,11 +57,15 @@ public: virtual ~Delegate() = default; virtual void MRI_NoteNewVirtualRegister(Register Reg) = 0; + virtual void MRI_NotecloneVirtualRegister(Register NewReg, + Register SrcReg) { + MRI_NoteNewVirtualRegister(NewReg); + } }; private: MachineFunction *MF; - Delegate *TheDelegate = nullptr; + SmallPtrSet<Delegate *, 1> TheDelegates; /// True if subregister liveness is tracked. const bool TracksSubRegLiveness; @@ -154,19 +159,28 @@ public: void resetDelegate(Delegate *delegate) { // Ensure another delegate does not take over unless the current - // delegate first unattaches itself. If we ever need to multicast - // notifications, we will need to change to using a list. - assert(TheDelegate == delegate && - "Only the current delegate can perform reset!"); - TheDelegate = nullptr; + // delegate first unattaches itself. + assert(TheDelegates.count(delegate) && + "Only an existing delegate can perform reset!"); + TheDelegates.erase(delegate); } - void setDelegate(Delegate *delegate) { - assert(delegate && !TheDelegate && - "Attempted to set delegate to null, or to change it without " + void addDelegate(Delegate *delegate) { + assert(delegate && !TheDelegates.count(delegate) && + "Attempted to add null delegate, or to change it without " "first resetting it!"); - TheDelegate = delegate; + TheDelegates.insert(delegate); + } + + void noteNewVirtualRegister(Register Reg) { + for (auto *TheDelegate : TheDelegates) + TheDelegate->MRI_NoteNewVirtualRegister(Reg); + } + + void noteCloneVirtualRegister(Register NewReg, Register SrcReg) { + for (auto *TheDelegate : TheDelegates) + TheDelegate->MRI_NotecloneVirtualRegister(NewReg, SrcReg); } //===--------------------------------------------------------------------===// @@ -584,6 +598,11 @@ public: /// multiple uses. bool hasOneNonDBGUser(Register RegNo) const; + + /// hasAtMostUses - Return true if the given register has at most \p MaxUsers + /// non-debug user instructions. + bool hasAtMostUserInstrs(Register Reg, unsigned MaxUsers) const; + /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. @@ -738,7 +757,7 @@ public: /// Get the low-level type of \p Reg or LLT{} if Reg is not a generic /// (target independent) virtual register. LLT getType(Register Reg) const { - if (Register::isVirtualRegister(Reg) && VRegToType.inBounds(Reg)) + if (Reg.isVirtual() && VRegToType.inBounds(Reg)) return VRegToType[Reg]; return LLT{}; } @@ -780,7 +799,7 @@ public: /// addRegAllocationHint - Add a register allocation hint to the hints /// vector for VReg. void addRegAllocationHint(Register VReg, Register PrefReg) { - assert(Register::isVirtualRegister(VReg)); + assert(VReg.isVirtual()); RegAllocHints[VReg].second.push_back(PrefReg); } @@ -895,6 +914,18 @@ public: /// of reserved registers before allocation begins. void freezeReservedRegs(const MachineFunction&); + /// reserveReg -- Mark a register as reserved so checks like isAllocatable + /// will not suggest using it. This should not be used during the middle + /// of a function walk, or when liveness info is available. + void reserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI) { + assert(reservedRegsFrozen() && + "Reserved registers haven't been frozen yet. "); + MCRegAliasIterator R(PhysReg, TRI, true); + + for (; R.isValid(); ++R) + ReservedRegs.set(*R); + } + /// reservedRegsFrozen - Returns true after freezeReservedRegs() was called /// to ensure the set of reserved registers stays constant. bool reservedRegsFrozen() const { diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSSAContext.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSSAContext.h index f59d7cf8a522..e3b2dc459881 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSSAContext.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSSAContext.h @@ -26,10 +26,17 @@ class Register; template <typename _FunctionT> class GenericSSAContext; template <typename, bool> class DominatorTreeBase; -inline auto successors(MachineBasicBlock *BB) { return BB->successors(); } -inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); } -inline unsigned succ_size(MachineBasicBlock *BB) { return BB->succ_size(); } -inline unsigned pred_size(MachineBasicBlock *BB) { return BB->pred_size(); } +inline auto successors(const MachineBasicBlock *BB) { return BB->successors(); } +inline auto predecessors(const MachineBasicBlock *BB) { + return BB->predecessors(); +} +inline unsigned succ_size(const MachineBasicBlock *BB) { + return BB->succ_size(); +} +inline unsigned pred_size(const MachineBasicBlock *BB) { + return BB->pred_size(); +} +inline auto instrs(const MachineBasicBlock &BB) { return BB.instrs(); } template <> class GenericSSAContext<MachineFunction> { const MachineRegisterInfo *RegInfo = nullptr; @@ -40,15 +47,25 @@ public: using FunctionT = MachineFunction; using InstructionT = MachineInstr; using ValueRefT = Register; + using ConstValueRefT = Register; + static const Register ValueRefNull; using DominatorTreeT = DominatorTreeBase<BlockT, false>; - static MachineBasicBlock *getEntryBlock(MachineFunction &F); - void setFunction(MachineFunction &Fn); MachineFunction *getFunction() const { return MF; } - Printable print(MachineBasicBlock *Block) const; - Printable print(MachineInstr *Inst) const; + static MachineBasicBlock *getEntryBlock(MachineFunction &F); + static void appendBlockDefs(SmallVectorImpl<Register> &defs, + const MachineBasicBlock &block); + static void appendBlockTerms(SmallVectorImpl<MachineInstr *> &terms, + MachineBasicBlock &block); + static void appendBlockTerms(SmallVectorImpl<const MachineInstr *> &terms, + const MachineBasicBlock &block); + MachineBasicBlock *getDefBlock(Register) const; + static bool isConstantValuePhi(const MachineInstr &Phi); + + Printable print(const MachineBasicBlock *Block) const; + Printable print(const MachineInstr *Inst) const; Printable print(Register Value) const; }; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h index 8000c9db428d..997c3a4f74a1 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -674,8 +674,33 @@ private: // scheduled instruction. SmallVector<unsigned, 16> ReservedCycles; - // For each PIdx, stores first index into ReservedCycles that corresponds to - // it. + /// For each PIdx, stores first index into ReservedCycles that corresponds to + /// it. + /// + /// For example, consider the following 3 resources (ResourceCount = + /// 3): + /// + /// +------------+--------+ + /// |ResourceName|NumUnits| + /// +------------+--------+ + /// | X | 2 | + /// +------------+--------+ + /// | Y | 3 | + /// +------------+--------+ + /// | Z | 1 | + /// +------------+--------+ + /// + /// In this case, the total number of resource instances is 6. The + /// vector \ref ReservedCycles will have a slot for each instance. The + /// vector \ref ReservedCyclesIndex will track at what index the first + /// instance of the resource is found in the vector of \ref + /// ReservedCycles: + /// + /// Indexes of instances in ReservedCycles + /// 0 1 2 3 4 5 + /// ReservedCyclesIndex[0] = 0; [X0, X1, + /// ReservedCyclesIndex[1] = 2; Y0, Y1, Y2 + /// ReservedCyclesIndex[2] = 5; Z SmallVector<unsigned, 16> ReservedCyclesIndex; // For each PIdx, stores the resource group IDs of its subunits @@ -802,6 +827,8 @@ public: /// available instruction, or NULL if there are multiple candidates. SUnit *pickOnlyChoice(); + /// Dump the state of the information that tracks resource usage. + void dumpReservedCycles() const; void dumpScheduledState() const; }; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineTraceMetrics.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineTraceMetrics.h index 46b57365e653..89c9c94455d9 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineTraceMetrics.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineTraceMetrics.h @@ -49,7 +49,6 @@ #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -284,9 +283,9 @@ public: /// classes are included. For the caller to account for extra machine /// instructions, it must first resolve each instruction's scheduling class. unsigned getResourceLength( - ArrayRef<const MachineBasicBlock *> Extrablocks = None, - ArrayRef<const MCSchedClassDesc *> ExtraInstrs = None, - ArrayRef<const MCSchedClassDesc *> RemoveInstrs = None) const; + ArrayRef<const MachineBasicBlock *> Extrablocks = std::nullopt, + ArrayRef<const MCSchedClassDesc *> ExtraInstrs = std::nullopt, + ArrayRef<const MCSchedClassDesc *> RemoveInstrs = std::nullopt) const; /// Return the length of the (data dependency) critical path through the /// trace. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h new file mode 100644 index 000000000000..614f09caa3c7 --- /dev/null +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h @@ -0,0 +1,35 @@ +//===- MachineUniformityAnalysis.h ---------------------------*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Uniformity info and uniformity-aware uniform info for Machine IR +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H +#define LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H + +#include "llvm/ADT/GenericUniformityInfo.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineSSAContext.h" + +namespace llvm { + +extern template class GenericUniformityInfo<MachineSSAContext>; +using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>; + +/// \brief Compute the uniform information of a Machine IR function. +MachineUniformityInfo +computeMachineUniformityInfo(MachineFunction &F, + const MachineCycleInfo &cycleInfo, + const MachineDomTree &domTree); + +} // namespace llvm + +#endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ModuloSchedule.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ModuloSchedule.h index c515101e80fd..b9213ab58aa4 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ModuloSchedule.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ModuloSchedule.h @@ -192,7 +192,8 @@ private: ValueMapTy *VRMap, MBBVectorTy &PrologBBs); void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB, ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs); + ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs); void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, @@ -200,8 +201,9 @@ private: bool IsLast); void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, InstrMapTy &InstrMap, - unsigned LastStageNum, unsigned CurStageNum, bool IsLast); + ValueMapTy *VRMap, ValueMapTy *VRMapPhi, + InstrMapTy &InstrMap, unsigned LastStageNum, + unsigned CurStageNum, bool IsLast); void removeDeadInstructions(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs); void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h index 9822f8013e91..b331c9a19fd1 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h @@ -63,6 +63,10 @@ namespace llvm { createMachineFunctionPrinterPass(raw_ostream &OS, const std::string &Banner =""); + /// StackFramePrinter pass - This pass prints out the machine function's + /// stack frame to the given stream as a debugging tool. + MachineFunctionPass *createStackFrameLayoutAnalysisPass(); + /// MIRPrinting pass - this pass prints out the LLVM IR into the given stream /// using the MIR serialization format. MachineFunctionPass *createPrintMIRPass(raw_ostream &OS); @@ -79,6 +83,10 @@ namespace llvm { /// matching during instruction selection. FunctionPass *createCodeGenPreparePass(); + /// This pass implements generation of target-specific intrinsics to support + /// handling of complex number arithmetic + FunctionPass *createComplexDeinterleavingPass(const TargetMachine *TM); + /// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg /// load-linked/store-conditional loops. extern char &AtomicExpandID; @@ -260,6 +268,10 @@ namespace llvm { /// It merges disjoint allocas to reduce the stack size. extern char &StackColoringID; + /// StackFramePrinter - This pass prints the stack frame layout and variable + /// mappings. + extern char &StackFrameLayoutAnalysisPassID; + /// IfConverter - This pass performs machine code if conversion. extern char &IfConverterID; @@ -330,6 +342,10 @@ namespace llvm { MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr); + /// MachineLateInstrsCleanup - This pass removes redundant identical + /// instructions after register allocation and rematerialization. + extern char &MachineLateInstrsCleanupID; + /// PeepholeOptimizer - This pass performs peephole optimizations - /// like extension and comparison eliminations. extern char &PeepholeOptimizerID; @@ -405,9 +421,16 @@ namespace llvm { /// the intrinsic for later emission to the StackMap. extern char &StackMapLivenessID; + // MachineSanitizerBinaryMetadata - appends/finalizes sanitizer binary + // metadata after llvm SanitizerBinaryMetadata pass. + extern char &MachineSanitizerBinaryMetadataID; + /// RemoveRedundantDebugValues pass. extern char &RemoveRedundantDebugValuesID; + /// MachineCFGPrinter pass. + extern char &MachineCFGPrinterID; + /// LiveDebugValues pass extern char &LiveDebugValuesID; @@ -484,6 +507,12 @@ namespace llvm { /// predicate mask. FunctionPass *createExpandVectorPredicationPass(); + // Expands large div/rem instructions. + FunctionPass *createExpandLargeDivRemPass(); + + // Expands large div/rem instructions. + FunctionPass *createExpandLargeFpConvertPass(); + // This pass expands memcmp() to load/stores. FunctionPass *createExpandMemCmpPass(); @@ -514,7 +543,7 @@ namespace llvm { FunctionPass *createPseudoProbeInserter(); /// Create IR Type Promotion pass. \see TypePromotion.cpp - FunctionPass *createTypePromotionPass(); + FunctionPass *createTypePromotionLegacyPass(); /// Add Flow Sensitive Discriminators. PassNum specifies the /// sequence number of this pass (starting from 1). diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h index a323ee9dc396..43eb051c136b 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h @@ -233,6 +233,7 @@ #include <cstdint> #include <cstring> #include <map> +#include <memory> #include <set> #include <unordered_map> #include <utility> @@ -644,6 +645,9 @@ namespace rdf { struct DataFlowGraph { DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, + const MachineDominanceFrontier &mdf); + DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, + const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi); NodeBase *ptr(NodeId N) const; @@ -861,6 +865,9 @@ namespace rdf { IA.Addr->removeMember(RA, *this); } + // Default TOI object, if not given in the constructor. + std::unique_ptr<TargetOperandInfo> DefaultTOI; + MachineFunction &MF; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; @@ -927,6 +934,8 @@ namespace rdf { const DataFlowGraph &G; }; + template <typename T> Print(const T &, const DataFlowGraph &) -> Print<T>; + template <typename T> struct PrintNode : Print<NodeAddr<T>> { PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g) diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/Register.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/Register.h index 9dc3e98fe837..2f2d58f5185b 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/Register.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/Register.h @@ -75,7 +75,7 @@ public: /// Convert a virtual register number to a 0-based index. /// The first virtual register in a function will get the index 0. static unsigned virtReg2Index(Register Reg) { - assert(isVirtualRegister(Reg) && "Not a virtual register"); + assert(Reg.isVirtual() && "Not a virtual register"); return Reg & ~MCRegister::VirtualRegFlag; } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterClassInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterClassInfo.h index 39c72a42c433..0e50d2feb9b2 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterClassInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterClassInfo.h @@ -38,7 +38,7 @@ class RegisterClassInfo { RCInfo() = default; operator ArrayRef<MCPhysReg>() const { - return makeArrayRef(Order.get(), NumRegs); + return ArrayRef(Order.get(), NumRegs); } }; @@ -52,10 +52,9 @@ class RegisterClassInfo { const MachineFunction *MF = nullptr; const TargetRegisterInfo *TRI = nullptr; - // Callee saved registers of last MF. Assumed to be valid until the next - // runOnFunction() call. - // Used only to determine if an update was made to CalleeSavedAliases. - const MCPhysReg *CalleeSavedRegs = nullptr; + // Callee saved registers of last MF. + // Used only to determine if an update for CalleeSavedAliases is necessary. + SmallVector<MCPhysReg, 16> LastCalleeSavedRegs; // Map register alias to the callee saved Register. SmallVector<MCPhysReg, 4> CalleeSavedAliases; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterPressure.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterPressure.h index c40c0eec80ec..1164b60a11eb 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterPressure.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterPressure.h @@ -537,6 +537,11 @@ public: void dump() const; + void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, + LaneBitmask NewMask); + void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, + LaneBitmask NewMask); + protected: /// Add Reg to the live out set and increase max pressure. void discoverLiveOut(RegisterMaskPair Pair); @@ -547,11 +552,6 @@ protected: /// after the current position. SlotIndex getCurrSlot() const; - void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, - LaneBitmask NewMask); - void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, - LaneBitmask NewMask); - void bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs); void bumpUpwardPressure(const MachineInstr *MI); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h index fb3900b4a9c1..dc8f02e28adf 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -29,6 +29,7 @@ #include <cassert> #include <cstdint> #include <list> +#include <string> #include <utility> #include <vector> diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h index 1169e0116ec8..aa1936c2757e 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -68,6 +68,7 @@ class ConstantInt; class DataLayout; struct fltSemantics; class FunctionLoweringInfo; +class FunctionVarLocs; class GlobalValue; struct KnownBits; class LegacyDivergenceAnalysis; @@ -222,6 +223,7 @@ class SelectionDAG { const SelectionDAGTargetInfo *TSI = nullptr; const TargetLowering *TLI = nullptr; const TargetLibraryInfo *LibInfo = nullptr; + const FunctionVarLocs *FnVarLocs = nullptr; MachineFunction *MF; Pass *SDAGISelPass = nullptr; LLVMContext *Context; @@ -237,6 +239,12 @@ class SelectionDAG { ProfileSummaryInfo *PSI = nullptr; BlockFrequencyInfo *BFI = nullptr; + /// List of non-single value types. + FoldingSet<SDVTListNode> VTListMap; + + /// Pool allocation for misc. objects that are created once per SelectionDAG. + BumpPtrAllocator Allocator; + /// The starting token. SDNode EntryNode; @@ -263,22 +271,20 @@ class SelectionDAG { BumpPtrAllocator OperandAllocator; ArrayRecycler<SDUse> OperandRecycler; - /// Pool allocation for misc. objects that are created once per SelectionDAG. - BumpPtrAllocator Allocator; - /// Tracks dbg_value and dbg_label information through SDISel. SDDbgInfo *DbgInfo; using CallSiteInfo = MachineFunction::CallSiteInfo; using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl; - struct CallSiteDbgInfo { + struct NodeExtraInfo { CallSiteInfo CSInfo; MDNode *HeapAllocSite = nullptr; + MDNode *PCSections = nullptr; bool NoMerge = false; }; - - DenseMap<const SDNode *, CallSiteDbgInfo> SDCallSiteDbgInfo; + /// Out-of-line extra information for SDNodes. + DenseMap<const SDNode *, NodeExtraInfo> SDEI; /// PersistentId counter to be used when inserting the next /// SDNode to this SelectionDAG. We do not place that under @@ -287,9 +293,6 @@ class SelectionDAG { /// benefits (see discussion with @thakis in D120714). uint16_t NextPersistentId = 0; - /// Are instruction referencing variable locations desired for this function? - bool UseInstrRefDebugInfo = false; - public: /// Clients of various APIs that cause global effects on /// the DAG can optionally implement this interface. This allows the clients @@ -336,6 +339,19 @@ public: virtual void anchor(); }; + struct DAGNodeInsertedListener : public DAGUpdateListener { + std::function<void(SDNode *)> Callback; + + DAGNodeInsertedListener(SelectionDAG &DAG, + std::function<void(SDNode *)> Callback) + : DAGUpdateListener(DAG), Callback(std::move(Callback)) {} + + void NodeInserted(SDNode *N) override { Callback(N); } + + private: + virtual void anchor(); + }; + /// Help to insert SDNodeFlags automatically in transforming. Use /// RAII to save and resume flags in current scope. class FlagInserter { @@ -435,8 +451,8 @@ public: /// Prepare this SelectionDAG to process code in the given MachineFunction. void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - LegacyDivergenceAnalysis * Divergence, - ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin); + LegacyDivergenceAnalysis *Divergence, ProfileSummaryInfo *PSIin, + BlockFrequencyInfo *BFIin, FunctionVarLocs const *FnVarLocs); void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) { FLI = FuncInfo; @@ -459,6 +475,9 @@ public: const TargetLibraryInfo &getLibInfo() const { return *LibInfo; } const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; } const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; } + /// Returns the result of the AssignmentTrackingAnalysis pass if it's + /// available, otherwise return nullptr. + const FunctionVarLocs *getFunctionVarLocs() const { return FnVarLocs; } LLVMContext *getContext() const { return Context; } OptimizationRemarkEmitter &getORE() const { return *ORE; } ProfileSummaryInfo *getPSI() const { return PSI; } @@ -708,19 +727,19 @@ public: SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags = 0) { return getJumpTable(JTI, VT, true, TargetFlags); } - SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align = None, - int Offs = 0, bool isT = false, - unsigned TargetFlags = 0); + SDValue getConstantPool(const Constant *C, EVT VT, + MaybeAlign Align = std::nullopt, int Offs = 0, + bool isT = false, unsigned TargetFlags = 0); SDValue getTargetConstantPool(const Constant *C, EVT VT, - MaybeAlign Align = None, int Offset = 0, + MaybeAlign Align = std::nullopt, int Offset = 0, unsigned TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT, - MaybeAlign Align = None, int Offs = 0, + MaybeAlign Align = std::nullopt, int Offs = 0, bool isT = false, unsigned TargetFlags = 0); SDValue getTargetConstantPool(MachineConstantPoolValue *C, EVT VT, - MaybeAlign Align = None, int Offset = 0, + MaybeAlign Align = std::nullopt, int Offset = 0, unsigned TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } @@ -761,7 +780,7 @@ public: SDVTList VTs = getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Glue }; return getNode(ISD::CopyToReg, dl, VTs, - makeArrayRef(Ops, Glue.getNode() ? 4 : 3)); + ArrayRef(Ops, Glue.getNode() ? 4 : 3)); } // Similar to last getCopyToReg() except parameter Reg is a SDValue @@ -770,7 +789,7 @@ public: SDVTList VTs = getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Reg, N, Glue }; return getNode(ISD::CopyToReg, dl, VTs, - makeArrayRef(Ops, Glue.getNode() ? 4 : 3)); + ArrayRef(Ops, Glue.getNode() ? 4 : 3)); } SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT) { @@ -787,7 +806,7 @@ public: SDVTList VTs = getVTList(VT, MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, getRegister(Reg, VT), Glue }; return getNode(ISD::CopyFromReg, dl, VTs, - makeArrayRef(Ops, Glue.getNode() ? 3 : 2)); + ArrayRef(Ops, Glue.getNode() ? 3 : 2)); } SDValue getCondCode(ISD::CondCode Cond); @@ -848,6 +867,16 @@ public: return getNode(ISD::SPLAT_VECTOR, DL, VT, Op); } + /// Returns a node representing a splat of one value into all lanes + /// of the provided vector type. This is a utility which returns + /// either a BUILD_VECTOR or SPLAT_VECTOR depending on the + /// scalability of the desired vector type. + SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op) { + assert(VT.isVector() && "Can't splat to non-vector type"); + return VT.isScalableVector() ? + getSplatVector(VT, DL, Op) : getSplatBuildVector(VT, DL, Op); + } + /// Returns a vector of type ResVT whose elements contain the linear sequence /// <0, Step, Step * 2, Step * 3, ...> SDValue getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal); @@ -871,6 +900,38 @@ public: std::pair<SDValue, SDValue> getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT); + /// Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode. + static unsigned getOpcode_EXTEND(unsigned Opcode) { + switch (Opcode) { + case ISD::ANY_EXTEND: + case ISD::ANY_EXTEND_VECTOR_INREG: + return ISD::ANY_EXTEND; + case ISD::ZERO_EXTEND: + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ISD::ZERO_EXTEND; + case ISD::SIGN_EXTEND: + case ISD::SIGN_EXTEND_VECTOR_INREG: + return ISD::SIGN_EXTEND; + } + llvm_unreachable("Unknown opcode"); + } + + /// Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode. + static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) { + switch (Opcode) { + case ISD::ANY_EXTEND: + case ISD::ANY_EXTEND_VECTOR_INREG: + return ISD::ANY_EXTEND_VECTOR_INREG; + case ISD::ZERO_EXTEND: + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ISD::ZERO_EXTEND_VECTOR_INREG; + case ISD::SIGN_EXTEND: + case ISD::SIGN_EXTEND_VECTOR_INREG: + return ISD::SIGN_EXTEND_VECTOR_INREG; + } + llvm_unreachable("Unknown opcode"); + } + /// Convert Op, which must be of integer type, to the /// integer type VT, by either any-extending or truncating it. SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT); @@ -902,6 +963,9 @@ public: /// BooleanContent for type OpVT or truncating it. SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT); + /// Create negative operation as (SUB 0, Val). + SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT); + /// Create a bitwise NOT operation as (XOR Val, -1). SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT); @@ -913,6 +977,21 @@ public: SDValue getVPLogicalNOT(const SDLoc &DL, SDValue Val, SDValue Mask, SDValue EVL, EVT VT); + /// Convert a vector-predicated Op, which must be an integer vector, to the + /// vector-type VT, by performing either vector-predicated zext or truncating + /// it. The Op will be returned as-is if Op and VT are vectors containing + /// integer with same width. + SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, + SDValue EVL); + + /// Convert a vector-predicated Op, which must be of integer type, to the + /// vector-type integer type VT, by either truncating it or performing either + /// vector-predicated zero or sign extension as appropriate extension for the + /// pointer's semantics. This function just redirects to getVPZExtOrTrunc + /// right now. + SDValue getVPPtrExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, + SDValue EVL); + /// Returns sum of the base pointer and offset. /// Unlike getObjectPtrOffset this does not set NoUnsignedWrap by default. SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, @@ -964,6 +1043,13 @@ public: return getNode(ISD::CALLSEQ_END, DL, NodeTys, Ops); } + SDValue getCALLSEQ_END(SDValue Chain, uint64_t Size1, uint64_t Size2, + SDValue Glue, const SDLoc &DL) { + return getCALLSEQ_END( + Chain, getIntPtrConstant(Size1, DL, /*isTarget=*/true), + getIntPtrConstant(Size2, DL, /*isTarget=*/true), Glue, DL); + } + /// Return true if the result of this operation is always undefined. bool isUndef(unsigned Opcode, ArrayRef<SDValue> Ops); @@ -1082,9 +1168,9 @@ public: ISD::CondCode Cond, SDValue Chain = SDValue(), bool IsSignaling = false) { assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() && - "Cannot compare scalars to vectors"); + "Vector/scalar operand type mismatch for setcc"); assert(LHS.getValueType().isVector() == VT.isVector() && - "Cannot compare scalars to vectors"); + "Vector/scalar result type mismatch for setcc"); assert(Cond != ISD::SETCC_INVALID && "Cannot create a setCC of an invalid node."); if (Chain) @@ -1176,7 +1262,8 @@ public: inline SDValue getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, - EVT MemVT, MachinePointerInfo PtrInfo, MaybeAlign Alignment = None, + EVT MemVT, MachinePointerInfo PtrInfo, + MaybeAlign Alignment = std::nullopt, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()) { @@ -1217,6 +1304,8 @@ public: const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr); /// FIXME: Remove once transition to Align is over. + LLVM_DEPRECATED("Use the getLoad function that takes a MaybeAlign instead", + "") inline SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment, @@ -1234,16 +1323,6 @@ public: MaybeAlign Alignment = MaybeAlign(), MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()); - /// FIXME: Remove once transition to Align is over. - inline SDValue - getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, - SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, - unsigned Alignment, - MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, - const AAMDNodes &AAInfo = AAMDNodes()) { - return getExtLoad(ExtType, dl, VT, Chain, Ptr, PtrInfo, MemVT, - MaybeAlign(Alignment), MMOFlags, AAInfo); - } SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO); @@ -1267,6 +1346,8 @@ public: Ranges); } /// FIXME: Remove once transition to Align is over. + LLVM_DEPRECATED("Use the getLoad function that takes a MaybeAlign instead", + "") inline SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, @@ -1301,6 +1382,7 @@ public: MMOFlags, AAInfo); } /// FIXME: Remove once transition to Align is over. + LLVM_DEPRECATED("Use the version that takes a MaybeAlign instead", "") inline SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment, @@ -1327,6 +1409,7 @@ public: AAInfo); } /// FIXME: Remove once transition to Align is over. + LLVM_DEPRECATED("Use the version that takes a MaybeAlign instead", "") inline SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment, @@ -1811,16 +1894,6 @@ public: /// function mirrors \c llvm::salvageDebugInfo. void salvageDebugInfo(SDNode &N); - /// Signal whether instruction referencing variable locations are desired for - /// this function's debug-info. - void useInstrRefDebugInfo(bool Flag) { - UseInstrRefDebugInfo = Flag; - } - - bool getUseInstrRefDebugInfo() const { - return UseInstrRefDebugInfo; - } - void dump() const; /// In most cases this function returns the ABI alignment for a given type, @@ -1856,14 +1929,6 @@ public: SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl); - /// See if the specified operand can be simplified with the knowledge that - /// only the bits specified by DemandedBits are used. If so, return the - /// simpler operand, otherwise return a null SDValue. - /// - /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can - /// simplify nodes with multiple uses more aggressively.) - SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits); - /// Return true if the sign bit of Op is known to be zero. /// We use this predicate to simplify operations downstream. bool SignBitIsZero(SDValue Op, unsigned Depth = 0) const; @@ -1890,6 +1955,10 @@ public: bool MaskedValueIsAllOnes(SDValue Op, const APInt &Mask, unsigned Depth = 0) const; + /// For each demanded element of a vector, see if it is known to be zero. + APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, + unsigned Depth = 0) const; + /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. @@ -1981,6 +2050,32 @@ public: /*PoisonOnly*/ true, Depth); } + /// Return true if Op can create undef or poison from non-undef & non-poison + /// operands. The DemandedElts argument limits the check to the requested + /// vector elements. + /// + /// \p ConsiderFlags controls whether poison producing flags on the + /// instruction are considered. This can be used to see if the instruction + /// could still introduce undef or poison even without poison generating flags + /// which might be on the instruction. (i.e. could the result of + /// Op->dropPoisonGeneratingFlags() still create poison or undef) + bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, + bool PoisonOnly = false, + bool ConsiderFlags = true, + unsigned Depth = 0) const; + + /// Return true if Op can create undef or poison from non-undef & non-poison + /// operands. + /// + /// \p ConsiderFlags controls whether poison producing flags on the + /// instruction are considered. This can be used to see if the instruction + /// could still introduce undef or poison even without poison generating flags + /// which might be on the instruction. (i.e. could the result of + /// Op->dropPoisonGeneratingFlags() still create poison or undef) + bool canCreateUndefOrPoison(SDValue Op, bool PoisonOnly = false, + bool ConsiderFlags = true, + unsigned Depth = 0) const; + /// Return true if the specified operand is an ISD::ADD with a ConstantSDNode /// on the right-hand side, or if it is an ISD::OR with a ConstantSDNode that /// is guaranteed to have the same semantics as an ADD. This handles the @@ -1988,9 +2083,9 @@ public: /// X|Cst == X+Cst iff X&Cst = 0. bool isBaseWithConstantOffset(SDValue Op) const; - /// Test whether the given SDValue is known to never be NaN. If \p SNaN is - /// true, returns if \p Op is known to never be a signaling NaN (it may still - /// be a qNaN). + /// Test whether the given SDValue (or all elements of it, if it is a + /// vector) is known to never be NaN. If \p SNaN is true, returns if \p Op is + /// known to never be a signaling NaN (it may still be a qNaN). bool isKnownNeverNaN(SDValue Op, bool SNaN = false, unsigned Depth = 0) const; /// \returns true if \p Op is known to never be a signaling NaN. @@ -2085,8 +2180,8 @@ public: bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const; - /// Infer alignment of a load / store address. Return None if it cannot be - /// inferred. + /// Infer alignment of a load / store address. Return std::nullopt if it + /// cannot be inferred. MaybeAlign InferPtrAlign(SDValue Ptr) const; /// Compute the VTs needed for the low/hi parts of a type @@ -2150,34 +2245,45 @@ public: /// Set CallSiteInfo to be associated with Node. void addCallSiteInfo(const SDNode *Node, CallSiteInfoImpl &&CallInfo) { - SDCallSiteDbgInfo[Node].CSInfo = std::move(CallInfo); + SDEI[Node].CSInfo = std::move(CallInfo); } /// Return CallSiteInfo associated with Node, or a default if none exists. CallSiteInfo getCallSiteInfo(const SDNode *Node) { - auto I = SDCallSiteDbgInfo.find(Node); - return I != SDCallSiteDbgInfo.end() ? std::move(I->second).CSInfo - : CallSiteInfo(); + auto I = SDEI.find(Node); + return I != SDEI.end() ? std::move(I->second).CSInfo : CallSiteInfo(); } /// Set HeapAllocSite to be associated with Node. void addHeapAllocSite(const SDNode *Node, MDNode *MD) { - SDCallSiteDbgInfo[Node].HeapAllocSite = MD; + SDEI[Node].HeapAllocSite = MD; } /// Return HeapAllocSite associated with Node, or nullptr if none exists. MDNode *getHeapAllocSite(const SDNode *Node) const { - auto I = SDCallSiteDbgInfo.find(Node); - return I != SDCallSiteDbgInfo.end() ? I->second.HeapAllocSite : nullptr; + auto I = SDEI.find(Node); + return I != SDEI.end() ? I->second.HeapAllocSite : nullptr; + } + /// Set PCSections to be associated with Node. + void addPCSections(const SDNode *Node, MDNode *MD) { + SDEI[Node].PCSections = MD; + } + /// Return PCSections associated with Node, or nullptr if none exists. + MDNode *getPCSections(const SDNode *Node) const { + auto It = SDEI.find(Node); + return It != SDEI.end() ? It->second.PCSections : nullptr; } /// Set NoMergeSiteInfo to be associated with Node if NoMerge is true. void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge) { if (NoMerge) - SDCallSiteDbgInfo[Node].NoMerge = NoMerge; + SDEI[Node].NoMerge = NoMerge; } /// Return NoMerge info associated with Node. bool getNoMergeSiteInfo(const SDNode *Node) const { - auto I = SDCallSiteDbgInfo.find(Node); - return I != SDCallSiteDbgInfo.end() ? I->second.NoMerge : false; + auto I = SDEI.find(Node); + return I != SDEI.end() ? I->second.NoMerge : false; } + /// Copy extra info associated with one node to another. + void copyExtraInfo(SDNode *From, SDNode *To); + /// Return the current function's default denormal handling kind for the given /// floating point type. DenormalMode getDenormalMode(EVT VT) const { @@ -2190,6 +2296,23 @@ public: SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags); + /// Some opcodes may create immediate undefined behavior when used with some + /// values (integer division-by-zero for example). Therefore, these operations + /// are not generally safe to move around or change. + bool isSafeToSpeculativelyExecute(unsigned Opcode) const { + switch (Opcode) { + case ISD::SDIV: + case ISD::SREM: + case ISD::SDIVREM: + case ISD::UDIV: + case ISD::UREM: + case ISD::UDIVREM: + return false; + default: + return true; + } + } + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); @@ -2218,9 +2341,6 @@ private: SDNode *FindNodeOrInsertPos(const FoldingSetNodeID &ID, const SDLoc &DL, void *&InsertPos); - /// List of non-single value types. - FoldingSet<SDVTListNode> VTListMap; - /// Maps to auto-CSE operations. std::vector<CondCodeSDNode*> CondCodeNodes; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h index e23eebec81db..3d0f836b0c75 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -33,7 +33,7 @@ class BaseIndexOffset { private: SDValue Base; SDValue Index; - Optional<int64_t> Offset; + std::optional<int64_t> Offset; bool IsIndexSignExt = false; public: @@ -49,6 +49,9 @@ public: SDValue getBase() const { return Base; } SDValue getIndex() { return Index; } SDValue getIndex() const { return Index; } + void addToOffset(int64_t VectorOff) { + Offset = Offset.value_or(0) + VectorOff; + } bool hasValidOffset() const { return Offset.has_value(); } int64_t getOffset() const { return *Offset; } @@ -79,9 +82,9 @@ public: // Returns true `Op0` and `Op1` can be proven to alias/not alias, in // which case `IsAlias` is set to true/false. static bool computeAliasing(const SDNode *Op0, - const Optional<int64_t> NumBytes0, + const std::optional<int64_t> NumBytes0, const SDNode *Op1, - const Optional<int64_t> NumBytes1, + const std::optional<int64_t> NumBytes1, const SelectionDAG &DAG, bool &IsAlias); /// Parses tree in N for base, index, offset addresses. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h index e152503f9e1a..b7c5bec91051 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -21,6 +21,7 @@ namespace llvm { class AAResults; +class AssumptionCache; class TargetInstrInfo; class TargetMachine; class SelectionDAGBuilder; @@ -48,21 +49,19 @@ public: SelectionDAG *CurDAG; std::unique_ptr<SelectionDAGBuilder> SDB; AAResults *AA = nullptr; + AssumptionCache *AC = nullptr; GCFunctionInfo *GFI = nullptr; CodeGenOpt::Level OptLevel; const TargetInstrInfo *TII; const TargetLowering *TLI; bool FastISelFailed; SmallPtrSet<const Instruction *, 4> ElidedArgCopyInstrs; - bool UseInstrRefDebugInfo = false; /// Current optimization remark emitter. /// Used to report things like combines and FastISel failures. std::unique_ptr<OptimizationRemarkEmitter> ORE; - static char ID; - - explicit SelectionDAGISel(TargetMachine &tm, + explicit SelectionDAGISel(char &ID, TargetMachine &tm, CodeGenOpt::Level OL = CodeGenOpt::Default); ~SelectionDAGISel() override; @@ -321,6 +320,7 @@ private: void Select_FREEZE(SDNode *N); void Select_ARITH_FENCE(SDNode *N); + void Select_MEMBARRIER(SDNode *N); void pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, SDValue Operand, SDLoc DL); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 389fbce72ad0..187d179e3403 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -118,10 +118,17 @@ bool isBuildVectorOfConstantSDNodes(const SDNode *N); /// ConstantFPSDNode or undef. bool isBuildVectorOfConstantFPSDNodes(const SDNode *N); +/// Returns true if the specified node is a vector where all elements can +/// be truncated to the specified element size without a loss in meaning. +bool isVectorShrinkable(const SDNode *N, unsigned NewEltSize, bool Signed); + /// Return true if the node has at least one operand and all operands of the /// specified node are ISD::UNDEF. bool allOperandsUndef(const SDNode *N); +/// Return true if the specified node is FREEZE(UNDEF). +bool isFreezeUndef(const SDNode *N); + } // end namespace ISD //===----------------------------------------------------------------------===// @@ -298,6 +305,7 @@ public: /// This returns the SDNode that contains this Use. SDNode *getUser() { return User; } + const SDNode *getUser() const { return User; } /// Get the next SDUse in the use list. SDUse *getNext() const { return Next; } @@ -455,12 +463,19 @@ public: class SDNode : public FoldingSetNode, public ilist_node<SDNode> { private: /// The operation that this node performs. - int16_t NodeType; + int32_t NodeType; + +public: + /// Unique and persistent id per SDNode in the DAG. Used for debug printing. + /// We do not place that under `#if LLVM_ENABLE_ABI_BREAKING_CHECKS` + /// intentionally because it adds unneeded complexity without noticeable + /// benefits (see discussion with @thakis in D120714). + uint16_t PersistentId; protected: // We define a set of mini-helper classes to help us interpret the bits in our // SubclassData. These are designed to fit within a uint16_t so they pack - // with NodeType. + // with PersistentId. #if defined(_AIX) && (!defined(__GNUC__) || defined(__clang__)) // Except for GCC; by default, AIX compilers store bit-fields in 4-byte words @@ -615,13 +630,9 @@ private: SDNodeFlags Flags; -public: - /// Unique and persistent id per SDNode in the DAG. Used for debug printing. - /// We do not place that under `#if LLVM_ENABLE_ABI_BREAKING_CHECKS` - /// intentionally because it adds unneeded complexity without noticeable - /// benefits (see discussion with @thakis in D120714). - uint16_t PersistentId; + uint32_t CFIType = 0; +public: //===--------------------------------------------------------------------===// // Accessors // @@ -630,7 +641,7 @@ public: /// pre-isel nodes (those for which isMachineOpcode returns false), these /// are the opcode values in the ISD and <target>ISD namespaces. For /// post-isel opcodes, see getMachineOpcode. - unsigned getOpcode() const { return (unsigned short)NodeType; } + unsigned getOpcode() const { return (unsigned)NodeType; } /// Test if this node has a target-specific opcode (in the /// \<target\>ISD namespace). @@ -747,6 +758,7 @@ public: use_iterator() = default; use_iterator(const use_iterator &I) = default; + use_iterator &operator=(const use_iterator &) = default; bool operator==(const use_iterator &x) const { return Op == x.Op; } bool operator!=(const use_iterator &x) const { @@ -915,7 +927,7 @@ public: op_iterator op_begin() const { return OperandList; } op_iterator op_end() const { return OperandList+NumOperands; } - ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); } + ArrayRef<SDUse> ops() const { return ArrayRef(op_begin(), op_end()); } /// Iterator for directly iterating over the operand SDValue's. struct value_op_iterator @@ -964,6 +976,9 @@ public: /// If Flags is not in a defined state then this has no effect. void intersectFlagsWith(const SDNodeFlags Flags); + void setCFIType(uint32_t Type) { CFIType = Type; } + uint32_t getCFIType() const { return CFIType; } + /// Return the number of values defined/returned by this operator. unsigned getNumValues() const { return NumValues; } @@ -1276,8 +1291,6 @@ public: /// Returns alignment and volatility of the memory access Align getOriginalAlign() const { return MMO->getBaseAlign(); } Align getAlign() const { return MMO->getAlign(); } - // FIXME: Remove once transition to getAlign is over. - unsigned getAlignment() const { return MMO->getAlign().value(); } /// Return the SubclassData value, without HasDebugValue. This contains an /// encoding of the volatile flag, as well as bits used by subclasses. This @@ -1406,6 +1419,8 @@ public: case ISD::ATOMIC_LOAD_FSUB: case ISD::ATOMIC_LOAD_FMAX: case ISD::ATOMIC_LOAD_FMIN: + case ISD::ATOMIC_LOAD_UINC_WRAP: + case ISD::ATOMIC_LOAD_UDEC_WRAP: case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: case ISD::MLOAD: @@ -1473,6 +1488,8 @@ public: N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || N->getOpcode() == ISD::ATOMIC_LOAD_FMAX || N->getOpcode() == ISD::ATOMIC_LOAD_FMIN || + N->getOpcode() == ISD::ATOMIC_LOAD_UINC_WRAP || + N->getOpcode() == ISD::ATOMIC_LOAD_UDEC_WRAP || N->getOpcode() == ISD::ATOMIC_LOAD || N->getOpcode() == ISD::ATOMIC_STORE; } @@ -1522,7 +1539,7 @@ protected: public: ArrayRef<int> getMask() const { EVT VT = getValueType(0); - return makeArrayRef(Mask, VT.getVectorNumElements()); + return ArrayRef(Mask, VT.getVectorNumElements()); } int getMaskElt(unsigned Idx) const { @@ -1677,6 +1694,12 @@ bool isOneConstant(SDValue V); /// Returns true if \p V is a constant min signed integer value. bool isMinSignedConstant(SDValue V); +/// Returns true if \p V is a neutral element of Opc with Flags. +/// When OperandNo is 0, it checks that V is a left identity. Otherwise, it +/// checks that V is a right identity. +bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, + unsigned OperandNo); + /// Return the non-bitcasted source operand of \p V if it exists. /// If \p V is not a bitcasted value, it is returned as-is. SDValue peekThroughBitcasts(SDValue V); @@ -1723,7 +1746,8 @@ bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false); /// Return true if the value is a constant 1 integer or a splatted vector of a /// constant 1 integer (with no undefs). -/// Does not permit build vector implicit truncation. +/// Build vector implicit truncation is allowed, but the truncated bits need to +/// be zero. bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false); /// Return true if the value is a constant -1 integer or a splatted vector of a @@ -2081,9 +2105,9 @@ public: bool isConstant() const; /// If this BuildVector is constant and represents the numerical series - /// <a, a+n, a+2n, a+3n, ...> where a is integer and n is a non-zero integer, - /// the value <a,n> is returned. - Optional<std::pair<APInt, APInt>> isConstantSequence() const; + /// "<a, a+n, a+2n, a+3n, ...>" where a is integer and n is a non-zero integer, + /// the value "<a,n>" is returned. + std::optional<std::pair<APInt, APInt>> isConstantSequence() const; /// Recast bit data \p SrcBitElements to \p DstEltSizeInBits wide elements. /// Undef elements are treated as zero, and entirely undefined elements are @@ -2911,10 +2935,10 @@ public: if (NumMemRefs == 0) return {}; if (NumMemRefs == 1) - return makeArrayRef(MemRefs.getAddrOfPtr1(), 1); + return ArrayRef(MemRefs.getAddrOfPtr1(), 1); // Otherwise we have an actual array. - return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs); + return ArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs); } mmo_iterator memoperands_begin() const { return memoperands().begin(); } mmo_iterator memoperands_end() const { return memoperands().end(); } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h index 942a47c6cc7d..403a94c53dc4 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h @@ -108,9 +108,6 @@ class raw_ostream; PointerIntPair<IndexListEntry*, 2, unsigned> lie; - SlotIndex(IndexListEntry *entry, unsigned slot) - : lie(entry, slot) {} - IndexListEntry* listEntry() const { assert(isValid() && "Attempt to compare reserved index."); #ifdef EXPENSIVE_CHECKS @@ -139,6 +136,11 @@ class raw_ostream; /// Construct an invalid index. SlotIndex() = default; + // Creates a SlotIndex from an IndexListEntry and a slot. Generally should + // not be used. This method is only public to facilitate writing certain + // unit tests. + SlotIndex(IndexListEntry *entry, unsigned slot) : lie(entry, slot) {} + // Construct a new slot index from the given one, and set the slot. SlotIndex(const SlotIndex &li, Slot s) : lie(li.listEntry(), unsigned(s)) { assert(lie.getPointer() != nullptr && @@ -215,8 +217,12 @@ class raw_ostream; } /// Return the scaled distance from this index to the given one, where all - /// slots on the same instruction have zero distance. - int getInstrDistance(SlotIndex other) const { + /// slots on the same instruction have zero distance, assuming that the slot + /// indices are packed as densely as possible. There are normally gaps + /// between instructions, so this assumption often doesn't hold. This + /// results in this function often returning a value greater than the actual + /// instruction distance. + int getApproxInstrDistance(SlotIndex other) const { return (other.listEntry()->getIndex() - listEntry()->getIndex()) / Slot_Count; } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h index 01cc9bc37931..467e31f17bc8 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h @@ -243,6 +243,14 @@ public: unsigned getGCPointerMap(SmallVectorImpl<std::pair<unsigned, unsigned>> &GCMap); + /// Return true if Reg is used only in operands which can be folded to + /// stack usage. + bool isFoldableReg(Register Reg) const; + + /// Return true if Reg is used only in operands of MI which can be folded to + /// stack usage and MI is a statepoint instruction. + static bool isFoldableReg(const MachineInstr *MI, Register Reg); + private: const MachineInstr *MI; unsigned NumDefs; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/StackProtector.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/StackProtector.h index b96c0c74fabc..6150684236c8 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/StackProtector.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/StackProtector.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" @@ -35,6 +36,8 @@ class Type; class StackProtector : public FunctionPass { private: + static constexpr unsigned DefaultSSPBufferSize = 8; + /// A mapping of AllocaInsts to their required SSP layout. using SSPLayoutMap = DenseMap<const AllocaInst *, MachineFrameInfo::SSPLayoutKind>; @@ -49,7 +52,7 @@ private: Function *F; Module *M; - DominatorTree *DT; + std::optional<DomTreeUpdater> DTU; /// Layout - Mapping of allocations to the required SSPLayoutKind. /// StackProtector analysis will update this map when determining if an @@ -58,7 +61,7 @@ private: /// The minimum size of buffers that will receive stack smashing /// protection when -fstack-protection is used. - unsigned SSPBufferSize = 0; + unsigned SSPBufferSize = DefaultSSPBufferSize; /// VisitedPHIs - The set of PHI nodes visited when determining /// if a variable's reference has been taken. This set diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TailDuplicator.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TailDuplicator.h index 94e8092319d7..8fdce301c0cc 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -115,8 +115,7 @@ private: bool canCompletelyDuplicateBB(MachineBasicBlock &BB); bool duplicateSimpleBB(MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, - const DenseSet<Register> &RegsUsedByPhi, - SmallVectorImpl<MachineInstr *> &Copies); + const DenseSet<Register> &RegsUsedByPhi); bool tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, MachineBasicBlock *ForcedLayoutPred, diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 72f69f4c6b77..ee5d87e0ce2e 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -16,10 +16,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/None.h" +#include "llvm/ADT/Uniformity.h" #include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -54,11 +53,14 @@ class ScheduleDAGMI; class ScheduleHazardRecognizer; class SDNode; class SelectionDAG; +class SMSchedule; +class SwingSchedulerDAG; class RegScavenger; class TargetRegisterClass; class TargetRegisterInfo; class TargetSchedModel; class TargetSubtargetInfo; +enum class MachineCombinerPattern; template <class T> class SmallVectorImpl; @@ -108,6 +110,11 @@ public: return Opc <= TargetOpcode::GENERIC_OP_END; } + static bool isGenericAtomicRMWOpcode(unsigned Opc) { + return Opc >= TargetOpcode::GENERIC_ATOMICRMW_OP_START && + Opc <= TargetOpcode::GENERIC_ATOMICRMW_OP_END; + } + /// Given a machine instruction descriptor, returns the register /// class constraint for OpNum, or NULL. virtual @@ -729,6 +736,13 @@ public: /// update with no users being pipelined. virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0; + /// Return true if the proposed schedule should used. Otherwise return + /// false to not pipeline the loop. This function should be used to ensure + /// that pipelined loops meet target-specific quality heuristics. + virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) { + return true; + } + /// Create a condition to determine if the trip count of the loop is greater /// than TC, where TC is always one more than for the previous prologue or /// 0 if this is being called for the outermost prologue. @@ -740,7 +754,7 @@ public: /// /// Note: This hook is guaranteed to be called from the innermost to the /// outermost prologue of the loop being software pipelined. - virtual Optional<bool> + virtual std::optional<bool> createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond) = 0; @@ -995,9 +1009,9 @@ protected: /// If the specific machine instruction is a instruction that moves/copies /// value from one register to another register return destination and source /// registers as machine operands. - virtual Optional<DestSourcePair> + virtual std::optional<DestSourcePair> isCopyInstrImpl(const MachineInstr &MI) const { - return None; + return std::nullopt; } /// Return true if the given terminator MI is not expected to spill. This @@ -1018,7 +1032,7 @@ public: /// For COPY-instruction the method naturally returns destination and source /// registers as machine operands, for all other instructions the method calls /// target-dependent implementation. - Optional<DestSourcePair> isCopyInstr(const MachineInstr &MI) const { + std::optional<DestSourcePair> isCopyInstr(const MachineInstr &MI) const { if (MI.isCopy()) { return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; } @@ -1029,9 +1043,9 @@ public: /// immediate value and a physical register, and stores the result in /// the given physical register \c Reg, return a pair of the source /// register and the offset which has been added. - virtual Optional<RegImmPair> isAddImmediate(const MachineInstr &MI, - Register Reg) const { - return None; + virtual std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI, + Register Reg) const { + return std::nullopt; } /// Returns true if MI is an instruction that defines Reg to have a constant @@ -1046,24 +1060,36 @@ public: /// Store the specified register of the given register class to the specified /// stack frame index. The store instruction is to be added to the given /// machine basic block before the specified machine instruction. If isKill - /// is true, the register operand is the last use and must be marked kill. + /// is true, the register operand is the last use and must be marked kill. If + /// \p SrcReg is being directly spilled as part of assigning a virtual + /// register, \p VReg is the register being assigned. This additional register + /// argument is needed for certain targets when invoked from RegAllocFast to + /// map the spilled physical register to its virtual register. A null register + /// can be passed elsewhere. virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { llvm_unreachable("Target didn't implement " "TargetInstrInfo::storeRegToStackSlot!"); } /// Load the specified register of the given register class from the specified /// stack frame index. The load instruction is to be added to the given - /// machine basic block before the specified machine instruction. + /// machine basic block before the specified machine instruction. If \p + /// DestReg is being directly reloaded as part of assigning a virtual + /// register, \p VReg is the register being assigned. This additional register + /// argument is needed for certain targets when invoked from RegAllocFast to + /// map the loaded physical register to its virtual register. A null register + /// can be passed elsewhere. virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { llvm_unreachable("Target didn't implement " "TargetInstrInfo::loadRegFromStackSlot!"); } @@ -1134,8 +1160,8 @@ public: /// Return true if target supports reassociation of instructions in machine /// combiner pass to reduce register pressure for a given BB. virtual bool - shouldReduceRegisterPressure(MachineBasicBlock *MBB, - RegisterClassInfo *RegClassInfo) const { + shouldReduceRegisterPressure(const MachineBasicBlock *MBB, + const RegisterClassInfo *RegClassInfo) const { return false; } @@ -1156,17 +1182,29 @@ public: /// will be set to true. bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) const; - /// Return true when \P Inst is both associative and commutative. - virtual bool isAssociativeAndCommutative(const MachineInstr &Inst) const { + /// Return true when \P Inst is both associative and commutative. If \P Invert + /// is true, then the inverse of \P Inst operation must be tested. + virtual bool isAssociativeAndCommutative(const MachineInstr &Inst, + bool Invert = false) const { return false; } + /// Return the inverse operation opcode if it exists for \P Opcode (e.g. add + /// for sub and vice versa). + virtual std::optional<unsigned> getInverseOpcode(unsigned Opcode) const { + return std::nullopt; + } + + /// Return true when \P Opcode1 or its inversion is equal to \P Opcode2. + bool areOpcodesEqualOrInverse(unsigned Opcode1, unsigned Opcode2) const; + /// Return true when \P Inst has reassociable operands in the same \P MBB. virtual bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const; /// Return true when \P Inst has reassociable sibling. - bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const; + virtual bool hasReassociableSibling(const MachineInstr &Inst, + bool &Commuted) const; /// When getMachineCombinerPatterns() finds patterns, this function generates /// the instructions that could replace the original code sequence. The client @@ -1192,6 +1230,15 @@ public: SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const; + /// Reassociation of some instructions requires inverse operations (e.g. + /// (X + A) - Y => (X - Y) + A). This method returns a pair of new opcodes + /// (new root opcode, new prev opcode) that must be used to reassociate \P + /// Root and \P Prev accoring to \P Pattern. + std::pair<unsigned, unsigned> + getReassociationOpcodes(MachineCombinerPattern Pattern, + const MachineInstr &Root, + const MachineInstr &Prev) const; + /// The limit on resource length extension we accept in MachineCombiner Pass. virtual int getExtendResourceLenLimit() const { return 0; } @@ -1365,10 +1412,10 @@ public: /// MachineInstr that is accessing memory. These values are returned as a /// struct ExtAddrMode which contains all relevant information to make up the /// address. - virtual Optional<ExtAddrMode> + virtual std::optional<ExtAddrMode> getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const { - return None; + return std::nullopt; } /// Returns true if MI's Def is NullValueReg, and the MI @@ -1430,6 +1477,13 @@ public: /// Returns true if the instruction is already predicated. virtual bool isPredicated(const MachineInstr &MI) const { return false; } + /// Assumes the instruction is already predicated and returns true if the + /// instruction can be predicated again. + virtual bool canPredicatePredicatedInstr(const MachineInstr &MI) const { + assert(isPredicated(MI) && "Instruction is not predicated"); + return false; + } + // Returns a MIRPrinter comment for this machine operand. virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, @@ -1831,7 +1885,7 @@ public: /// defined by this method. virtual ArrayRef<std::pair<int, const char *>> getSerializableTargetIndices() const { - return None; + return std::nullopt; } /// Decompose the machine operand's target flags into two values - the direct @@ -1848,7 +1902,7 @@ public: /// defined by this method. virtual ArrayRef<std::pair<unsigned, const char *>> getSerializableDirectMachineOperandTargetFlags() const { - return None; + return std::nullopt; } /// Return an array that contains the bitmask target flag values and their @@ -1858,7 +1912,7 @@ public: /// defined by this method. virtual ArrayRef<std::pair<unsigned, const char *>> getSerializableBitmaskMachineOperandTargetFlags() const { - return None; + return std::nullopt; } /// Return an array that contains the MMO target flag values and their @@ -1868,7 +1922,7 @@ public: /// defined by this method. virtual ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> getSerializableMachineMemOperandTargetFlags() const { - return None; + return std::nullopt; } /// Determines whether \p Inst is a tail call instruction. Override this @@ -1969,8 +2023,8 @@ public: /// Produce the expression describing the \p MI loading a value into /// the physical register \p Reg. This hook should only be used with /// \p MIs belonging to VReg-less functions. - virtual Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI, - Register Reg) const; + virtual std::optional<ParamLoadedValue> + describeLoadedValue(const MachineInstr &MI, Register Reg) const; /// Given the generic extension instruction \p ExtMI, returns true if this /// extension is a likely candidate for being folded into an another @@ -2000,6 +2054,21 @@ public: return MI.getOperand(0); } + /// Return the uniformity behavior of the given instruction. + virtual InstructionUniformity + getInstructionUniformity(const MachineInstr &MI) const { + return InstructionUniformity::Default; + } + + /// Returns true if the given \p MI defines a TargetIndex operand that can be + /// tracked by their offset, can have values, and can have debug info + /// associated with it. If so, sets \p Index and \p Offset of the target index + /// operand. + virtual bool isExplicitTargetIndexDef(const MachineInstr &MI, int &Index, + int64_t &Offset) const { + return false; + } + private: mutable std::unique_ptr<MIRFormatter> Formatter; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h index 1bb2a8e50c07..639d48e342ef 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h @@ -25,9 +25,9 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLArrayExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/ComplexDeinterleavingPass.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/LowLevelType.h" @@ -49,7 +49,6 @@ #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstructionCost.h" #include "llvm/Support/MachineValueType.h" #include <algorithm> #include <cassert> @@ -63,12 +62,14 @@ namespace llvm { +class AssumptionCache; class CCState; class CCValAssign; class Constant; class FastISel; class FunctionLoweringInfo; class GlobalValue; +class Loop; class GISelKnownBits; class IntrinsicInst; class IRBuilderBase; @@ -258,6 +259,8 @@ public: MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. BitTestIntrinsic, // Use a target-specific intrinsic for special bit // operations; used by X86. + CmpArithIntrinsic,// Use a target-specific intrinsic for special compare + // operations; used by X86. Expand, // Generic expansion in terms of other atomic operations. // Rewrite to a non-atomic form for use in a known non-preemptible @@ -298,7 +301,7 @@ public: bool IsSwiftAsync : 1; bool IsSwiftError : 1; bool IsCFGuardTarget : 1; - MaybeAlign Alignment = None; + MaybeAlign Alignment = std::nullopt; Type *IndirectType = nullptr; ArgListEntry() @@ -422,8 +425,10 @@ public: return MachineMemOperand::MONone; } - MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI, - const DataLayout &DL) const; + MachineMemOperand::Flags + getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL, + AssumptionCache *AC = nullptr, + const TargetLibraryInfo *LibInfo = nullptr) const; MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, const DataLayout &DL) const; MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI, @@ -571,24 +576,7 @@ public: /// dag combiner. virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, - const MachineMemOperand &MMO) const { - // Don't do if we could do an indexed load on the original type, but not on - // the new one. - if (!LoadVT.isSimple() || !BitcastVT.isSimple()) - return true; - - MVT LoadMVT = LoadVT.getSimpleVT(); - - // Don't bother doing this if it's just going to be promoted again later, as - // doing so might interfere with other combines. - if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && - getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) - return false; - - bool Fast = false; - return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT, - MMO, &Fast) && Fast; - } + const MachineMemOperand &MMO) const; /// Return true if the following transform is beneficial: /// (store (y (conv x)), y*)) -> (store x, (x*)) @@ -622,12 +610,12 @@ public: } /// Return true if it is cheap to speculate a call to intrinsic cttz. - virtual bool isCheapToSpeculateCttz() const { + virtual bool isCheapToSpeculateCttz(Type *Ty) const { return false; } /// Return true if it is cheap to speculate a call to intrinsic ctlz. - virtual bool isCheapToSpeculateCtlz() const { + virtual bool isCheapToSpeculateCtlz(Type *Ty) const { return false; } @@ -801,6 +789,9 @@ public: return true; } + // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X)) + virtual bool preferScalarizeSplat(unsigned Opc) const { return true; } + /// Return true if the target wants to use the optimization that /// turns ext(promotableInst1(...(promotableInstN(load)))) into /// promotedInst1(...(promotedInstN(ext(load)))). @@ -834,8 +825,8 @@ public: virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const; - /// Return the ValueType for comparison libcalls. Comparions libcalls include - /// floating point comparion calls, and Ordered/Unordered check calls on + /// Return the ValueType for comparison libcalls. Comparison libcalls include + /// floating point comparison calls, and Ordered/Unordered check calls on /// floating point numbers. virtual MVT::SimpleValueType getCmpLibcallReturnType() const; @@ -924,11 +915,19 @@ public: return RepRegClassCostForVT[VT.SimpleTy]; } - /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS - /// instructions, and false if a library call is preferred (e.g for code-size - /// reasons). - virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { - return true; + /// Return the preferred strategy to legalize tihs SHIFT instruction, with + /// \p ExpansionFactor being the recursion depth - how many expansion needed. + enum class ShiftLegalizationStrategy { + ExpandToParts, + ExpandThroughStack, + LowerToLibcall + }; + virtual ShiftLegalizationStrategy + preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, + unsigned ExpansionFactor) const { + if (ExpansionFactor == 1) + return ShiftLegalizationStrategy::ExpandToParts; + return ShiftLegalizationStrategy::ExpandThroughStack; } /// Return true if the target has native support for the specified value type. @@ -936,7 +935,7 @@ public: /// promotions or expansions. bool isTypeLegal(EVT VT) const { assert(!VT.isSimple() || - (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)); + (unsigned)VT.getSimpleVT().SimpleTy < std::size(RegClassForVT)); return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; } @@ -964,6 +963,22 @@ public: return ValueTypeActions; } + /// Return pair that represents the legalization kind (first) that needs to + /// happen to EVT (second) in order to type-legalize it. + /// + /// First: how we should legalize values of this type, either it is already + /// legal (return 'Legal') or we need to promote it to a larger type (return + /// 'Promote'), or we need to expand it into multiple registers of smaller + /// integer type (return 'Expand'). 'Custom' is not an option. + /// + /// Second: for types supported by the target, this is an identity function. + /// For types that must be promoted to larger types, this returns the larger + /// type to promote to. For integer types that are larger than the largest + /// integer register, this contains one step in the expansion to get to the + /// smaller register. For illegal floating point types, this returns the + /// integer type to transform to. + LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; + /// Return how we should legalize values of this type, either it is already /// legal (return 'Legal') or we need to promote it to a larger type (return /// 'Promote'), or we need to expand it into multiple registers of smaller @@ -981,7 +996,7 @@ public: /// register, this contains one step in the expansion to get to the smaller /// register. For illegal floating point types, this returns the integer type /// to transform to. - EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { + virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { return getTypeConversion(Context, VT).second; } @@ -1034,6 +1049,10 @@ public: // value representing memory location PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; + // Fallback address space for use if ptrVal is nullptr. std::nullopt means + // unknown address space. + std::optional<unsigned> fallbackAddressSpace; + int offset = 0; // offset off of ptrVal uint64_t size = 0; // the size of the memory location // (taken from memVT if zero) @@ -1095,7 +1114,8 @@ public: if (VT.isExtended()) return Expand; // If a target-specific SDNode requires legalization, require the target // to provide custom legalization for it. - if (Op >= array_lengthof(OpActions[0])) return Custom; + if (Op >= std::size(OpActions[0])) + return Custom; return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } @@ -1397,13 +1417,25 @@ public: return false; } + // Return true if the target supports a scatter/gather instruction with + // indices which are scaled by the particular value. Note that all targets + // must by definition support scale of 1. + virtual bool isLegalScaleForGatherScatter(uint64_t Scale, + uint64_t ElemSize) const { + // MGATHER/MSCATTER are only required to support scaling by one or by the + // element size. + if (Scale != ElemSize && Scale != 1) + return false; + return true; + } + /// Return how the condition code should be treated: either it is legal, needs /// to be expanded to some other code sequence, or the target has a custom /// expander for it. LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const { - assert((unsigned)CC < array_lengthof(CondCodeActions) && - ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && + assert((unsigned)CC < std::size(CondCodeActions) && + ((unsigned)VT.SimpleTy >> 3) < std::size(CondCodeActions[0]) && "Table isn't big enough!"); // See setCondCodeAction for how this is encoded. uint32_t Shift = 4 * (VT.SimpleTy & 0x7); @@ -1511,7 +1543,7 @@ public: /// Return the type of registers that this ValueType will eventually require. MVT getRegisterType(MVT VT) const { - assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)); + assert((unsigned)VT.SimpleTy < std::size(RegisterTypeForVT)); return RegisterTypeForVT[VT.SimpleTy]; } @@ -1519,7 +1551,7 @@ public: MVT getRegisterType(LLVMContext &Context, EVT VT) const { if (VT.isSimple()) { assert((unsigned)VT.getSimpleVT().SimpleTy < - array_lengthof(RegisterTypeForVT)); + std::size(RegisterTypeForVT)); return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; } if (VT.isVector()) { @@ -1549,10 +1581,10 @@ public: /// instance with i128 inline assembly operands on SystemZ. virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, - Optional<MVT> RegisterVT = None) const { + std::optional<MVT> RegisterVT = std::nullopt) const { if (VT.isSimple()) { assert((unsigned)VT.getSimpleVT().SimpleTy < - array_lengthof(NumRegistersForVT)); + std::size(NumRegistersForVT)); return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; } if (VT.isVector()) { @@ -1620,7 +1652,7 @@ public: /// If true, the target has custom DAG combine transformations that it can /// perform for the specified node. bool hasTargetDAGCombine(ISD::NodeType NT) const { - assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); + assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); } @@ -1686,15 +1718,16 @@ public: /// /// This function returns true if the target allows unaligned memory accesses /// of the specified type in the given address space. If true, it also returns - /// whether the unaligned memory access is "fast" in the last argument by - /// reference. This is used, for example, in situations where an array - /// copy/move/set is converted to a sequence of store operations. Its use - /// helps to ensure that such replacements don't generate code that causes an - /// alignment error (trap) on the target machine. + /// a relative speed of the unaligned memory access in the last argument by + /// reference. The higher the speed number the faster the operation comparing + /// to a number returned by another such call. This is used, for example, in + /// situations where an array copy/move/set is converted to a sequence of + /// store operations. Its use helps to ensure that such replacements don't + /// generate code that causes an alignment error (trap) on the target machine. virtual bool allowsMisalignedMemoryAccesses( EVT, unsigned AddrSpace = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool * /*Fast*/ = nullptr) const { + unsigned * /*Fast*/ = nullptr) const { return false; } @@ -1702,51 +1735,51 @@ public: virtual bool allowsMisalignedMemoryAccesses( LLT, unsigned AddrSpace = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool * /*Fast*/ = nullptr) const { + unsigned * /*Fast*/ = nullptr) const { return false; } /// This function returns true if the memory access is aligned or if the /// target allows this specific unaligned memory access. If the access is - /// allowed, the optional final parameter returns if the access is also fast - /// (as defined by the target). + /// allowed, the optional final parameter returns a relative speed of the + /// access (as defined by the target). bool allowsMemoryAccessForAlignment( LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// Return true if the memory access of this type is aligned or if the target /// allows this specific unaligned access for the given MachineMemOperand. - /// If the access is allowed, the optional final parameter returns if the - /// access is also fast (as defined by the target). + /// If the access is allowed, the optional final parameter returns a relative + /// speed of the access (as defined by the target). bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// Return true if the target supports a memory access of this type for the /// given address space and alignment. If the access is allowed, the optional - /// final parameter returns if the access is also fast (as defined by the - /// target). + /// final parameter returns the relative speed of the access (as defined by + /// the target). virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// Return true if the target supports a memory access of this type for the /// given MachineMemOperand. If the access is allowed, the optional - /// final parameter returns if the access is also fast (as defined by the + /// final parameter returns the relative access speed (as defined by the /// target). bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// LLT handling variant. bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, const MachineMemOperand &MMO, - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// Returns the target specific optimal type for load and store operations as /// a result of memset, memcpy, and memmove lowering. @@ -1876,11 +1909,11 @@ public: /// Returns the name of the symbol used to emit stack probes or the empty /// string if not applicable. - virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; } + virtual bool hasStackProbeSymbol(const MachineFunction &MF) const { return false; } - virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; } + virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; } - virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { + virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const { return ""; } @@ -1905,10 +1938,6 @@ public: /// Get the ISD node that corresponds to the Instruction class opcode. int InstructionOpcodeToISD(unsigned Opcode) const; - /// Estimate the cost of type-legalization and the legalized type. - std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL, - Type *Ty) const; - /// @} //===--------------------------------------------------------------------===// @@ -1923,6 +1952,18 @@ public: return MaxAtomicSizeInBitsSupported; } + /// Returns the size in bits of the maximum div/rem the backend supports. + /// Larger operations will be expanded by ExpandLargeDivRem. + unsigned getMaxDivRemBitWidthSupported() const { + return MaxDivRemBitWidthSupported; + } + + /// Returns the size in bits of the maximum larget fp convert the backend + /// supports. Larger operations will be expanded by ExpandLargeFPConvert. + unsigned getMaxLargeFPConvertBitWidthSupported() const { + return MaxLargeFPConvertBitWidthSupported; + } + /// Returns the size of the smallest cmpxchg or ll/sc instruction /// the backend supports. Any smaller operations are widened in /// AtomicExpandPass. @@ -1942,6 +1983,13 @@ public: return false; } + /// Whether AtomicExpandPass should automatically insert a trailing fence + /// without reducing the ordering for this atomic. Defaults to false. + virtual bool + shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const { + return false; + } + /// Perform a load-linked operation on Addr, returning a "Value *" with the /// corresponding pointee type. This may entail some non-trivial operations to /// truncate or reconstruct types that will be illegal in the backend. See @@ -1960,7 +2008,8 @@ public: /// Perform a masked atomicrmw using a target-specific intrinsic. This /// represents the core LL/SC loop which will be lowered at a late stage by - /// the backend. + /// the backend. The target-specific intrinsic returns the loaded value and + /// is not responsible for masking and shifting the result. virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, @@ -1969,6 +2018,14 @@ public: llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); } + /// Perform a atomicrmw expansion using a target-specific way. This is + /// expected to be called when masked atomicrmw and bit test atomicrmw don't + /// work, and the target supports another way to lower atomicrmw. + virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const { + llvm_unreachable( + "Generic atomicrmw expansion unimplemented on this target"); + } + /// Perform a bit test atomicrmw using a target-specific intrinsic. This /// represents the combined bit test intrinsic which will be lowered at a late /// stage by the backend. @@ -1977,9 +2034,18 @@ public: "Bit test atomicrmw expansion unimplemented on this target"); } + /// Perform a atomicrmw which the result is only used by comparison, using a + /// target-specific intrinsic. This represents the combined atomic and compare + /// intrinsic which will be lowered at a late stage by the backend. + virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const { + llvm_unreachable( + "Compare arith atomicrmw expansion unimplemented on this target"); + } + /// Perform a masked cmpxchg using a target-specific intrinsic. This /// represents the core LL/SC loop which will be lowered at a late stage by - /// the backend. + /// the backend. The target-specific intrinsic returns the loaded value and + /// is not responsible for masking and shifting the result. virtual Value *emitMaskedAtomicCmpXchgIntrinsic( IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { @@ -2208,7 +2274,7 @@ public: if (Exponent < 0) Exponent = -Exponent; return !OptForSize || - (countPopulation((unsigned int)Exponent) + Log2_32(Exponent) < 7); + (llvm::popcount((unsigned int)Exponent) + Log2_32(Exponent) < 7); } //===--------------------------------------------------------------------===// @@ -2285,7 +2351,7 @@ protected: /// specified value type. This indicates the selector can handle values of /// that class natively. void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { - assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)); + assert((unsigned)VT.SimpleTy < std::size(RegClassForVT)); RegClassForVT[VT.SimpleTy] = RC; } @@ -2302,7 +2368,7 @@ protected: /// type and indicate what to do about it. Note that VT may refer to either /// the type of a result or that of an operand of Op. void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { - assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); + assert(Op < std::size(OpActions[0]) && "Table isn't big enough!"); OpActions[(unsigned)VT.SimpleTy][Op] = Action; } void setOperationAction(ArrayRef<unsigned> Ops, MVT VT, @@ -2404,7 +2470,7 @@ protected: void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, MVT VT, LegalizeAction Action) { for (auto CC : CCs) { - assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && + assert(VT.isValid() && (unsigned)CC < std::size(CondCodeActions) && "Table isn't big enough!"); assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the @@ -2441,7 +2507,7 @@ protected: /// PerformDAGCombine virtual method. void setTargetDAGCombine(ArrayRef<ISD::NodeType> NTs) { for (auto NT : NTs) { - assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); + assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7); } } @@ -2478,6 +2544,18 @@ protected: MaxAtomicSizeInBitsSupported = SizeInBits; } + /// Set the size in bits of the maximum div/rem the backend supports. + /// Larger operations will be expanded by ExpandLargeDivRem. + void setMaxDivRemBitWidthSupported(unsigned SizeInBits) { + MaxDivRemBitWidthSupported = SizeInBits; + } + + /// Set the size in bits of the maximum fp convert the backend supports. + /// Larger operations will be expanded by ExpandLargeFPConvert. + void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) { + MaxLargeFPConvertBitWidthSupported = SizeInBits; + } + /// Sets the minimum cmpxchg or ll/sc size supported by the backend. void setMinCmpXchgSizeInBits(unsigned SizeInBits) { MinCmpXchgSizeInBits = SizeInBits; @@ -2533,22 +2611,6 @@ public: Type *Ty, unsigned AddrSpace, Instruction *I = nullptr) const; - /// Return the cost of the scaling factor used in the addressing mode - /// represented by AM for this target, for a load/store of the specified type. - /// - /// If the AM is supported, the return value must be >= 0. - /// If the AM is not supported, it returns a negative value. - /// TODO: Handle pre/postinc as well. - /// TODO: Remove default argument - virtual InstructionCost getScalingFactorCost(const DataLayout &DL, - const AddrMode &AM, Type *Ty, - unsigned AS = 0) const { - // Default: assume that any scaling factor used in a legal AM is free. - if (isLegalAddressingMode(DL, AM, Ty, AS)) - return 0; - return -1; - } - /// Return true if the specified immediate is legal icmp immediate, that is /// the target has icmp instructions which can compare a register against the /// immediate without having to materialize the immediate into a register. @@ -2789,6 +2851,13 @@ public: return false; } + /// Try to optimize extending or truncating conversion instructions (like + /// zext, trunc, fptoui, uitofp) for the target. + virtual bool optimizeExtendOrTruncateConversion(Instruction *I, + Loop *L) const { + return false; + } + /// Return true if the target supplies and combines to a paired load /// two loaded values of type LoadedType next to each other in memory. /// RequiredAlignment gives the minimal alignment constraints that must be met @@ -3066,6 +3135,26 @@ public: return isOperationLegalOrCustom(Op, VT); } + /// Does this target support complex deinterleaving + virtual bool isComplexDeinterleavingSupported() const { return false; } + + /// Does this target support complex deinterleaving with the given operation + /// and type + virtual bool isComplexDeinterleavingOperationSupported( + ComplexDeinterleavingOperation Operation, Type *Ty) const { + return false; + } + + /// Create the IR node for the given complex deinterleaving operation. + /// If one cannot be created using all the given inputs, nullptr should be + /// returned. + virtual Value *createComplexDeinterleavingIR( + Instruction *I, ComplexDeinterleavingOperation OperationType, + ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, + Value *Accumulator = nullptr) const { + return nullptr; + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // @@ -3182,6 +3271,14 @@ private: /// Accesses larger than this will be expanded by AtomicExpandPass. unsigned MaxAtomicSizeInBitsSupported; + /// Size in bits of the maximum div/rem size the backend supports. + /// Larger operations will be expanded by ExpandLargeDivRem. + unsigned MaxDivRemBitWidthSupported; + + /// Size in bits of the maximum larget fp convert size the backend + /// supports. Larger operations will be expanded by ExpandLargeFPConvert. + unsigned MaxLargeFPConvertBitWidthSupported; + /// Size in bits of the minimum cmpxchg or ll/sc operation the /// backend supports. unsigned MinCmpXchgSizeInBits; @@ -3255,8 +3352,6 @@ private: ValueTypeActionImpl ValueTypeActions; private: - LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; - /// Targets can specify ISD nodes that they would like PerformDAGCombine /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this /// array. @@ -3499,6 +3594,21 @@ public: /// legal. It is frequently not legal in PIC relocation models. virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + /// Return true if the operand with index OpNo corresponding to a target + /// branch, for example, in following case + /// + /// call void asm "lea r8, $0\0A\09call qword ptr ${1:P}\0A\09ret", + /// "*m,*m,~{r8},~{dirflag},~{fpsr},~{flags}" + /// ([9 x i32]* @Arr), void (...)* @sincos_asm) + /// + /// the operand $1 (sincos_asm) is target branch in inline asm, but the + /// operand $0 (Arr) is not. + virtual bool + isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, + unsigned OpNo) const { + return false; + } + bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const; @@ -3774,6 +3884,14 @@ public: SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const; + /// Return true if Op can create undef or poison from non-undef & non-poison + /// operands. The DemandedElts argument limits the check to the requested + /// vector elements. + virtual bool + canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, + const SelectionDAG &DAG, bool PoisonOnly, + bool ConsiderFlags, unsigned Depth) const; + /// Tries to build a legal vector shuffle using the provided parameters /// or equivalent variations. The Mask argument maybe be modified as the /// function tries different variations. @@ -3798,6 +3916,7 @@ public: /// indicating any elements which may be undef in the output \p UndefElts. virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, + const SelectionDAG &DAG, unsigned Depth = 0) const; /// Returns true if the given Opc is considered a canonical constant for the @@ -3930,6 +4049,9 @@ public: return false; } + /// Return true if the target supports kcfi operand bundles. + virtual bool supportKCFIBundles() const { return false; } + /// Perform necessary initialization to handle a subset of CSRs explicitly /// via copies. This function is called at the beginning of instruction /// selection. @@ -3955,22 +4077,34 @@ public: NegatibleCost &Cost, unsigned Depth = 0) const; - /// This is the helper function to return the newly negated expression only - /// when the cost is cheaper. - SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOps, bool OptForSize, - unsigned Depth = 0) const { + SDValue getCheaperOrNeutralNegatedExpression( + SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, + const NegatibleCost CostThreshold = NegatibleCost::Neutral, + unsigned Depth = 0) const { NegatibleCost Cost = NegatibleCost::Expensive; SDValue Neg = getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); - if (Neg && Cost == NegatibleCost::Cheaper) + if (!Neg) + return SDValue(); + + if (Cost <= CostThreshold) return Neg; + // Remove the new created node to avoid the side effect to the DAG. - if (Neg && Neg->use_empty()) + if (Neg->use_empty()) DAG.RemoveDeadNode(Neg.getNode()); return SDValue(); } + /// This is the helper function to return the newly negated expression only + /// when the cost is cheaper. + SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOps, bool OptForSize, + unsigned Depth = 0) const { + return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize, + NegatibleCost::Cheaper, Depth); + } + /// This is the helper function to return the newly negated expression if /// the cost is not expensive. SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, @@ -3986,10 +4120,26 @@ public: /// Target-specific splitting of values into parts that fit a register /// storing a legal type - virtual bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, - SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, - Optional<CallingConv::ID> CC) const { + virtual bool splitValueIntoRegisterParts( + SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { + return false; + } + + /// Allows the target to handle physreg-carried dependency + /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether + /// to add the edge to the dependency graph. + /// Def - input: Selection DAG node defininfg physical register + /// User - input: Selection DAG node using physical register + /// Op - input: Number of User operand + /// PhysReg - inout: set to the physical register if the edge is + /// necessary, unchanged otherwise + /// Cost - inout: physical register copy cost. + /// Returns 'true' is the edge is necessary, 'false' otherwise + virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + unsigned &PhysReg, int &Cost) const { return false; } @@ -3998,7 +4148,7 @@ public: joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, - Optional<CallingConv::ID> CC) const { + std::optional<CallingConv::ID> CC) const { return SDValue(); } @@ -4049,6 +4199,7 @@ public: SmallVector<SDValue, 32> OutVals; SmallVector<ISD::InputArg, 32> Ins; SmallVector<SDValue, 4> InVals; + const ConstantInt *CFIType = nullptr; CallLoweringInfo(SelectionDAG &DAG) : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), @@ -4171,6 +4322,11 @@ public: return *this; } + CallLoweringInfo &setCFIType(const ConstantInt *Type) { + CFIType = Type; + return *this; + } + ArgListTy &getArgs() { return Args; } @@ -4558,9 +4714,16 @@ public: const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const; + // Targets may override this function to collect operands from the CallInst + // and for example, lower them into the SelectionDAG operands. + virtual void CollectTargetIntrinsicOperands(const CallInst &I, + SmallVectorImpl<SDValue> &Ops, + SelectionDAG &DAG) const; + //===--------------------------------------------------------------------===// // Div utility functions // + SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl<SDNode *> &Created) const; SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, @@ -4684,6 +4847,26 @@ public: SDValue LL = SDValue(), SDValue LH = SDValue(), SDValue RL = SDValue(), SDValue RH = SDValue()) const; + /// Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit + /// urem by constant and other arithmetic ops. The n/2-bit urem by constant + /// will be expanded by DAGCombiner. This is not possible for all constant + /// divisors. + /// \param N Node to expand + /// \param Result A vector that will be filled with the lo and high parts of + /// the results. For *DIVREM, this will be the quotient parts followed + /// by the remainder parts. + /// \param HiLoVT The value type to use for the Lo and Hi parts. Should be + /// half of VT. + /// \param LL Low bits of the LHS of the operation. You can use this + /// parameter if you want to control how low bits are extracted from + /// the LHS. + /// \param LH High bits of the LHS of the operation. See LL for meaning. + /// \returns true if the node has been expanded, false if it has not. + bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl<SDValue> &Result, + EVT HiLoVT, SelectionDAG &DAG, + SDValue LL = SDValue(), + SDValue LH = SDValue()) const; + /// Expand funnel shift. /// \param N Node to expand /// \returns The expansion if successful, SDValue() otherwise @@ -4749,18 +4932,38 @@ public: /// \returns The expansion result or SDValue() if it fails. SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const; + /// Expand VP_CTPOP nodes. + /// \returns The expansion result or SDValue() if it fails. + SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const; + /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, /// vector nodes can only succeed if all operations are legal/custom. /// \param N Node to expand /// \returns The expansion result or SDValue() if it fails. SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const; + /// Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes. + /// \param N Node to expand + /// \returns The expansion result or SDValue() if it fails. + SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const; + + /// Expand CTTZ via Table Lookup. + /// \param N Node to expand + /// \returns The expansion result or SDValue() if it fails. + SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SDValue Op, unsigned NumBitsPerElt) const; + /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, /// vector nodes can only succeed if all operations are legal/custom. /// \param N Node to expand /// \returns The expansion result or SDValue() if it fails. SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const; + /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes. + /// \param N Node to expand + /// \returns The expansion result or SDValue() if it fails. + SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const; + /// Expand ABS nodes. Expands vector/scalar ABS nodes, /// vector nodes can only succeed if all operations are legal/custom. /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) @@ -4776,12 +4979,22 @@ public: /// \returns The expansion result or SDValue() if it fails. SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const; + /// Expand VP_BSWAP nodes. Expands VP_BSWAP nodes with + /// i16/i32/i64 scalar types. Returns SDValue() if expand fails. \param N Node + /// to expand \returns The expansion result or SDValue() if it fails. + SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const; + /// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes. /// Returns SDValue() if expand fails. /// \param N Node to expand /// \returns The expansion result or SDValue() if it fails. SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const; + /// Expand VP_BITREVERSE nodes. Expands VP_BITREVERSE nodes with + /// i8/i16/i32/i64 scalar types. \param N Node to expand \returns The + /// expansion result or SDValue() if it fails. + SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const; + /// Turn load of vector type into a load of the individual elements. /// \param LD load to expand /// \returns BUILD_VECTOR and TokenFactor nodes. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 04369a5bfe0d..5eb1a644ffba 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -54,10 +54,12 @@ public: const uint16_t *SuperRegIndices; const LaneBitmask LaneMask; /// Classes with a higher priority value are assigned first by register - /// allocators using a greedy heuristic. The value is in the range [0,63]. - /// Values >= 32 should be used with care since they may overlap with other - /// fields in the allocator's priority heuristics. + /// allocators using a greedy heuristic. The value is in the range [0,31]. const uint8_t AllocationPriority; + + // Change allocation priority heuristic used by greedy. + const bool GlobalPriority; + /// Configurable target specific flags. const uint8_t TSFlags; /// Whether the class supports two (or more) disjunct subregister indices. @@ -198,7 +200,7 @@ public: /// /// By default, this method returns all registers in the class. ArrayRef<MCPhysReg> getRawAllocationOrder(const MachineFunction &MF) const { - return OrderFunc ? OrderFunc(MF) : makeArrayRef(begin(), getNumRegs()); + return OrderFunc ? OrderFunc(MF) : ArrayRef(begin(), getNumRegs()); } /// Returns the combination of all lane masks of register in this class. @@ -355,7 +357,7 @@ public: unsigned NumRegs = getNumRegs(); assert(Idx < InfoDesc->NumCosts && "CostPerUse index out of bounds"); - return makeArrayRef(&InfoDesc->CostPerUse[Idx * NumRegs], NumRegs); + return ArrayRef(&InfoDesc->CostPerUse[Idx * NumRegs], NumRegs); } /// Return true if the register is in the allocation of any register class. @@ -523,6 +525,16 @@ public: /// markSuperRegs() and checkAllSuperRegsMarked() in this case. virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0; + /// Returns either a string explaining why the given register is reserved for + /// this function, or an empty optional if no explanation has been written. + /// The absence of an explanation does not mean that the register is not + /// reserved (meaning, you should check that PhysReg is in fact reserved + /// before calling this). + virtual std::optional<std::string> + explainReservedReg(const MachineFunction &MF, MCRegister PhysReg) const { + return {}; + } + /// Returns false if we can't guarantee that Physreg, specified as an IR asm /// clobber constraint, will be preserved across the statement. virtual bool isAsmClobberable(const MachineFunction &MF, @@ -626,6 +638,14 @@ public: return RC; } + /// Return a register class that can be used for a subregister copy from/into + /// \p SuperRC at \p SubRegIdx. + virtual const TargetRegisterClass * + getSubRegisterClass(const TargetRegisterClass *SuperRC, + unsigned SubRegIdx) const { + return nullptr; + } + /// Return the subregister index you get from composing /// two subregister indices. /// @@ -673,6 +693,14 @@ public: static void dumpReg(Register Reg, unsigned SubRegIndex = 0, const TargetRegisterInfo *TRI = nullptr); + /// Return target defined base register class for a physical register. + /// This is the register class with the lowest BaseClassOrder containing the + /// register. + /// Will be nullptr if the register is not in any base register class. + virtual const TargetRegisterClass *getPhysRegBaseClass(MCRegister Reg) const { + return nullptr; + } + protected: /// Overridden by TableGen in targets that have sub-registers. virtual unsigned composeSubRegIndicesImpl(unsigned, unsigned) const { @@ -1007,6 +1035,12 @@ public: return false; } + /// Process frame indices in reverse block order. This changes the behavior of + /// the RegScavenger passed to eliminateFrameIndex. If this is true targets + /// should scavengeRegisterBackwards in eliminateFrameIndex. New targets + /// should prefer reverse scavenging behavior. + virtual bool supportsBackwardScavenger() const { return false; } + /// This method must be overriden to eliminate abstract frame indices from /// instructions which may use them. The instruction referenced by the /// iterator contains an MO_FrameIndex operand which must be eliminated by @@ -1014,7 +1048,9 @@ public: /// as long as it keeps the iterator pointing at the finished product. /// SPAdj is the SP adjustment due to call frame setup instruction. /// FIOperandNum is the FI operand number. - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + /// Returns true if the current instruction was removed and the iterator + /// is not longer valid + virtual bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const = 0; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index dbd678b75d05..1312ae602162 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/PBQPRAConstraint.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CodeGen.h" #include <memory> @@ -29,6 +30,7 @@ class APInt; class MachineFunction; class ScheduleDAGMutation; class CallLowering; +class GlobalValue; class InlineAsmLowering; class InstrItineraryData; struct InstrStage; @@ -308,6 +310,14 @@ public: unsigned PhysReg) const { return false; } + + /// Classify a global function reference. This mainly used to fetch target + /// special flags for lowering a function address. For example mark a function + /// call should be plt or pc-related addressing. + virtual unsigned char + classifyGlobalFunctionReference(const GlobalValue *GV) const { + return 0; + } }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TypePromotion.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TypePromotion.h new file mode 100644 index 000000000000..efe58232cdcd --- /dev/null +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TypePromotion.h @@ -0,0 +1,35 @@ +//===- TypePromotion.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Defines an IR pass for type promotion. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TYPEPROMOTION_H +#define LLVM_CODEGEN_TYPEPROMOTION_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; +class TargetMachine; + +class TypePromotionPass : public PassInfoMixin<TypePromotionPass> { +private: + const TargetMachine *TM; + +public: + TypePromotionPass(const TargetMachine *TM): TM(TM) { } + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_TYPEPROMOTION_H diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h index 48d265476ca8..af4c8ab40e82 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h @@ -346,11 +346,11 @@ namespace llvm { /// Return the size of the specified fixed width value type in bits. The /// function will assert if the type is scalable. uint64_t getFixedSizeInBits() const { - return getSizeInBits().getFixedSize(); + return getSizeInBits().getFixedValue(); } uint64_t getScalarSizeInBits() const { - return getScalarType().getSizeInBits().getFixedSize(); + return getScalarType().getSizeInBits().getFixedValue(); } /// Return the number of bytes overwritten by a store of the specified value @@ -361,13 +361,13 @@ namespace llvm { /// base size. TypeSize getStoreSize() const { TypeSize BaseSize = getSizeInBits(); - return {(BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable()}; + return {(BaseSize.getKnownMinValue() + 7) / 8, BaseSize.isScalable()}; } // Return the number of bytes overwritten by a store of this value type or // this value type's element type in the case of a vector. uint64_t getScalarStoreSize() const { - return getScalarType().getStoreSize().getFixedSize(); + return getScalarType().getStoreSize().getFixedValue(); } /// Return the number of bits overwritten by a store of the specified value @@ -388,7 +388,7 @@ namespace llvm { unsigned BitWidth = getSizeInBits(); if (BitWidth <= 8) return EVT(MVT::i8); - return getIntegerVT(Context, 1 << Log2_32_Ceil(BitWidth)); + return getIntegerVT(Context, llvm::bit_ceil(BitWidth)); } /// Finds the smallest simple value type that is greater than or equal to diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td index 2194800b7ba9..c22553855c55 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.td @@ -47,184 +47,195 @@ def v128i1 : ValueType<128, 24>; // 128 x i1 vector value def v256i1 : ValueType<256, 25>; // 256 x i1 vector value def v512i1 : ValueType<512, 26>; // 512 x i1 vector value def v1024i1 : ValueType<1024, 27>; // 1024 x i1 vector value - -def v128i2 : ValueType<256, 28>; // 128 x i2 vector value - -def v64i4 : ValueType<256, 29>; // 64 x i4 vector value - -def v1i8 : ValueType<8, 30>; // 1 x i8 vector value -def v2i8 : ValueType<16, 31>; // 2 x i8 vector value -def v4i8 : ValueType<32, 32>; // 4 x i8 vector value -def v8i8 : ValueType<64, 33>; // 8 x i8 vector value -def v16i8 : ValueType<128, 34>; // 16 x i8 vector value -def v32i8 : ValueType<256, 35>; // 32 x i8 vector value -def v64i8 : ValueType<512, 36>; // 64 x i8 vector value -def v128i8 : ValueType<1024, 37>; // 128 x i8 vector value -def v256i8 : ValueType<2048, 38>; // 256 x i8 vector value -def v512i8 : ValueType<4096, 39>; // 512 x i8 vector value -def v1024i8 : ValueType<8192, 40>; // 1024 x i8 vector value - -def v1i16 : ValueType<16, 41>; // 1 x i16 vector value -def v2i16 : ValueType<32, 42>; // 2 x i16 vector value -def v3i16 : ValueType<48, 43>; // 3 x i16 vector value -def v4i16 : ValueType<64, 44>; // 4 x i16 vector value -def v8i16 : ValueType<128, 45>; // 8 x i16 vector value -def v16i16 : ValueType<256, 46>; // 16 x i16 vector value -def v32i16 : ValueType<512, 47>; // 32 x i16 vector value -def v64i16 : ValueType<1024, 48>; // 64 x i16 vector value -def v128i16 : ValueType<2048, 49>; // 128 x i16 vector value -def v256i16 : ValueType<4096, 50>; // 256 x i16 vector value -def v512i16 : ValueType<8192, 51>; // 512 x i16 vector value - -def v1i32 : ValueType<32, 52>; // 1 x i32 vector value -def v2i32 : ValueType<64, 53>; // 2 x i32 vector value -def v3i32 : ValueType<96, 54>; // 3 x i32 vector value -def v4i32 : ValueType<128, 55>; // 4 x i32 vector value -def v5i32 : ValueType<160, 56>; // 5 x i32 vector value -def v6i32 : ValueType<192, 57>; // 6 x f32 vector value -def v7i32 : ValueType<224, 58>; // 7 x f32 vector value -def v8i32 : ValueType<256, 59>; // 8 x i32 vector value -def v16i32 : ValueType<512, 60>; // 16 x i32 vector value -def v32i32 : ValueType<1024, 61>; // 32 x i32 vector value -def v64i32 : ValueType<2048, 62>; // 64 x i32 vector value -def v128i32 : ValueType<4096, 63>; // 128 x i32 vector value -def v256i32 : ValueType<8192, 64>; // 256 x i32 vector value -def v512i32 : ValueType<16384, 65>; // 512 x i32 vector value -def v1024i32 : ValueType<32768, 66>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536, 67>; // 2048 x i32 vector value - -def v1i64 : ValueType<64, 68>; // 1 x i64 vector value -def v2i64 : ValueType<128, 69>; // 2 x i64 vector value -def v3i64 : ValueType<192, 70>; // 3 x i64 vector value -def v4i64 : ValueType<256, 71>; // 4 x i64 vector value -def v8i64 : ValueType<512, 72>; // 8 x i64 vector value -def v16i64 : ValueType<1024, 73>; // 16 x i64 vector value -def v32i64 : ValueType<2048, 74>; // 32 x i64 vector value -def v64i64 : ValueType<4096, 75>; // 64 x i64 vector value -def v128i64 : ValueType<8192, 76>; // 128 x i64 vector value -def v256i64 : ValueType<16384, 77>; // 256 x i64 vector value - -def v1i128 : ValueType<128, 78>; // 1 x i128 vector value - -def v1f16 : ValueType<16, 79>; // 1 x f16 vector value -def v2f16 : ValueType<32, 80>; // 2 x f16 vector value -def v3f16 : ValueType<48, 81>; // 3 x f16 vector value -def v4f16 : ValueType<64, 82>; // 4 x f16 vector value -def v8f16 : ValueType<128, 83>; // 8 x f16 vector value -def v16f16 : ValueType<256, 84>; // 16 x f16 vector value -def v32f16 : ValueType<512, 85>; // 32 x f16 vector value -def v64f16 : ValueType<1024, 86>; // 64 x f16 vector value -def v128f16 : ValueType<2048, 87>; // 128 x f16 vector value -def v256f16 : ValueType<4096, 88>; // 256 x f16 vector value -def v512f16 : ValueType<8192, 89>; // 512 x f16 vector value - -def v2bf16 : ValueType<32, 90>; // 2 x bf16 vector value -def v3bf16 : ValueType<48, 91>; // 3 x bf16 vector value -def v4bf16 : ValueType<64, 92>; // 4 x bf16 vector value -def v8bf16 : ValueType<128, 93>; // 8 x bf16 vector value -def v16bf16 : ValueType<256, 94>; // 16 x bf16 vector value -def v32bf16 : ValueType<512, 95>; // 32 x bf16 vector value -def v64bf16 : ValueType<1024, 96>; // 64 x bf16 vector value -def v128bf16 : ValueType<2048, 97>; // 128 x bf16 vector value - -def v1f32 : ValueType<32, 98>; // 1 x f32 vector value -def v2f32 : ValueType<64, 99>; // 2 x f32 vector value -def v3f32 : ValueType<96, 100>; // 3 x f32 vector value -def v4f32 : ValueType<128, 101>; // 4 x f32 vector value -def v5f32 : ValueType<160, 102>; // 5 x f32 vector value -def v6f32 : ValueType<192, 103>; // 6 x f32 vector value -def v7f32 : ValueType<224, 104>; // 7 x f32 vector value -def v8f32 : ValueType<256, 105>; // 8 x f32 vector value -def v16f32 : ValueType<512, 106>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 107>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 108>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 109>; // 128 x f32 vector value -def v256f32 : ValueType<8192, 110>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 111>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 112>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 113>; // 2048 x f32 vector value - -def v1f64 : ValueType<64, 114>; // 1 x f64 vector value -def v2f64 : ValueType<128, 115>; // 2 x f64 vector value -def v3f64 : ValueType<192, 116>; // 3 x f64 vector value -def v4f64 : ValueType<256, 117>; // 4 x f64 vector value -def v8f64 : ValueType<512, 118>; // 8 x f64 vector value -def v16f64 : ValueType<1024, 119>; // 16 x f64 vector value -def v32f64 : ValueType<2048, 120>; // 32 x f64 vector value -def v64f64 : ValueType<4096, 121>; // 64 x f64 vector value -def v128f64 : ValueType<8192, 122>; // 128 x f64 vector value -def v256f64 : ValueType<16384, 123>; // 256 x f64 vector value - -def nxv1i1 : ValueType<1, 124>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 125>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 126>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 127>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 128>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 129>; // n x 32 x i1 vector value -def nxv64i1 : ValueType<64, 130>; // n x 64 x i1 vector value - -def nxv1i8 : ValueType<8, 131>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 132>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 133>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 134>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 135>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 136>; // n x 32 x i8 vector value -def nxv64i8 : ValueType<512, 137>; // n x 64 x i8 vector value - -def nxv1i16 : ValueType<16, 138>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 139>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 140>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 141>; // n x 8 x i16 vector value -def nxv16i16 : ValueType<256, 142>; // n x 16 x i16 vector value -def nxv32i16 : ValueType<512, 143>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 144>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 145>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 146>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 147>; // n x 8 x i32 vector value -def nxv16i32 : ValueType<512, 148>; // n x 16 x i32 vector value -def nxv32i32 : ValueType<1024, 149>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 150>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 151>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 152>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 153>; // n x 8 x i64 vector value -def nxv16i64 : ValueType<1024, 154>; // n x 16 x i64 vector value -def nxv32i64 : ValueType<2048, 155>; // n x 32 x i64 vector value - -def nxv1f16 : ValueType<16, 156>; // n x 1 x f16 vector value -def nxv2f16 : ValueType<32, 157>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64, 158>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 159>; // n x 8 x f16 vector value -def nxv16f16 : ValueType<256, 160>; // n x 16 x f16 vector value -def nxv32f16 : ValueType<512, 161>; // n x 32 x f16 vector value - -def nxv1bf16 : ValueType<16, 162>; // n x 1 x bf16 vector value -def nxv2bf16 : ValueType<32, 163>; // n x 2 x bf16 vector value -def nxv4bf16 : ValueType<64, 164>; // n x 4 x bf16 vector value -def nxv8bf16 : ValueType<128, 165>; // n x 8 x bf16 vector value -def nxv16bf16 : ValueType<256, 166>; // n x 16 x bf16 vector value -def nxv32bf16 : ValueType<512, 167>; // n x 32 x bf16 vector value - -def nxv1f32 : ValueType<32, 168>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64, 169>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 170>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 171>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 172>; // n x 16 x f32 vector value - -def nxv1f64 : ValueType<64, 173>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 174>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 175>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 176>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64, 177>; // X86 MMX value -def FlagVT : ValueType<0, 178>; // Pre-RA sched glue -def isVoid : ValueType<0, 179>; // Produces no value -def untyped : ValueType<8, 180>; // Produces an untyped value -def funcref : ValueType<0, 181>; // WebAssembly's funcref type -def externref : ValueType<0, 182>; // WebAssembly's externref type -def x86amx : ValueType<8192, 183>; // X86 AMX value -def i64x8 : ValueType<512, 184>; // 8 Consecutive GPRs (AArch64) +def v2048i1 : ValueType<2048, 28>; // 2048 x i1 vector value + +def v128i2 : ValueType<256, 29>; // 128 x i2 vector value +def v256i2 : ValueType<512, 30>; // 256 x i2 vector value + +def v64i4 : ValueType<256, 31>; // 64 x i4 vector value +def v128i4 : ValueType<512, 32>; // 128 x i4 vector value + +def v1i8 : ValueType<8, 33>; // 1 x i8 vector value +def v2i8 : ValueType<16, 34>; // 2 x i8 vector value +def v4i8 : ValueType<32, 35>; // 4 x i8 vector value +def v8i8 : ValueType<64, 36>; // 8 x i8 vector value +def v16i8 : ValueType<128, 37>; // 16 x i8 vector value +def v32i8 : ValueType<256, 38>; // 32 x i8 vector value +def v64i8 : ValueType<512, 39>; // 64 x i8 vector value +def v128i8 : ValueType<1024, 40>; // 128 x i8 vector value +def v256i8 : ValueType<2048, 41>; // 256 x i8 vector value +def v512i8 : ValueType<4096, 42>; // 512 x i8 vector value +def v1024i8 : ValueType<8192, 43>; // 1024 x i8 vector value + +def v1i16 : ValueType<16, 44>; // 1 x i16 vector value +def v2i16 : ValueType<32, 45>; // 2 x i16 vector value +def v3i16 : ValueType<48, 46>; // 3 x i16 vector value +def v4i16 : ValueType<64, 47>; // 4 x i16 vector value +def v8i16 : ValueType<128, 48>; // 8 x i16 vector value +def v16i16 : ValueType<256, 49>; // 16 x i16 vector value +def v32i16 : ValueType<512, 50>; // 32 x i16 vector value +def v64i16 : ValueType<1024, 51>; // 64 x i16 vector value +def v128i16 : ValueType<2048, 52>; // 128 x i16 vector value +def v256i16 : ValueType<4096, 53>; // 256 x i16 vector value +def v512i16 : ValueType<8192, 54>; // 512 x i16 vector value + +def v1i32 : ValueType<32, 55>; // 1 x i32 vector value +def v2i32 : ValueType<64, 56>; // 2 x i32 vector value +def v3i32 : ValueType<96, 57>; // 3 x i32 vector value +def v4i32 : ValueType<128, 58>; // 4 x i32 vector value +def v5i32 : ValueType<160, 59>; // 5 x i32 vector value +def v6i32 : ValueType<192, 60>; // 6 x f32 vector value +def v7i32 : ValueType<224, 61>; // 7 x f32 vector value +def v8i32 : ValueType<256, 62>; // 8 x i32 vector value +def v9i32 : ValueType<288, 63>; // 9 x i32 vector value +def v10i32 : ValueType<320, 64>; // 10 x i32 vector value +def v11i32 : ValueType<352, 65>; // 11 x i32 vector value +def v12i32 : ValueType<384, 66>; // 12 x i32 vector value +def v16i32 : ValueType<512, 67>; // 16 x i32 vector value +def v32i32 : ValueType<1024, 68>; // 32 x i32 vector value +def v64i32 : ValueType<2048, 69>; // 64 x i32 vector value +def v128i32 : ValueType<4096, 70>; // 128 x i32 vector value +def v256i32 : ValueType<8192, 71>; // 256 x i32 vector value +def v512i32 : ValueType<16384, 72>; // 512 x i32 vector value +def v1024i32 : ValueType<32768, 73>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536, 74>; // 2048 x i32 vector value + +def v1i64 : ValueType<64, 75>; // 1 x i64 vector value +def v2i64 : ValueType<128, 76>; // 2 x i64 vector value +def v3i64 : ValueType<192, 77>; // 3 x i64 vector value +def v4i64 : ValueType<256, 78>; // 4 x i64 vector value +def v8i64 : ValueType<512, 79>; // 8 x i64 vector value +def v16i64 : ValueType<1024, 80>; // 16 x i64 vector value +def v32i64 : ValueType<2048, 81>; // 32 x i64 vector value +def v64i64 : ValueType<4096, 82>; // 64 x i64 vector value +def v128i64 : ValueType<8192, 83>; // 128 x i64 vector value +def v256i64 : ValueType<16384, 84>; // 256 x i64 vector value + +def v1i128 : ValueType<128, 85>; // 1 x i128 vector value + +def v1f16 : ValueType<16, 86>; // 1 x f16 vector value +def v2f16 : ValueType<32, 87>; // 2 x f16 vector value +def v3f16 : ValueType<48, 88>; // 3 x f16 vector value +def v4f16 : ValueType<64, 89>; // 4 x f16 vector value +def v8f16 : ValueType<128, 90>; // 8 x f16 vector value +def v16f16 : ValueType<256, 91>; // 16 x f16 vector value +def v32f16 : ValueType<512, 92>; // 32 x f16 vector value +def v64f16 : ValueType<1024, 93>; // 64 x f16 vector value +def v128f16 : ValueType<2048, 94>; // 128 x f16 vector value +def v256f16 : ValueType<4096, 95>; // 256 x f16 vector value +def v512f16 : ValueType<8192, 96>; // 512 x f16 vector value + +def v2bf16 : ValueType<32, 97>; // 2 x bf16 vector value +def v3bf16 : ValueType<48, 98>; // 3 x bf16 vector value +def v4bf16 : ValueType<64, 99>; // 4 x bf16 vector value +def v8bf16 : ValueType<128, 100>; // 8 x bf16 vector value +def v16bf16 : ValueType<256, 101>; // 16 x bf16 vector value +def v32bf16 : ValueType<512, 102>; // 32 x bf16 vector value +def v64bf16 : ValueType<1024, 103>; // 64 x bf16 vector value +def v128bf16 : ValueType<2048, 104>; // 128 x bf16 vector value + +def v1f32 : ValueType<32, 105>; // 1 x f32 vector value +def v2f32 : ValueType<64, 106>; // 2 x f32 vector value +def v3f32 : ValueType<96, 107>; // 3 x f32 vector value +def v4f32 : ValueType<128, 108>; // 4 x f32 vector value +def v5f32 : ValueType<160, 109>; // 5 x f32 vector value +def v6f32 : ValueType<192, 110>; // 6 x f32 vector value +def v7f32 : ValueType<224, 111>; // 7 x f32 vector value +def v8f32 : ValueType<256, 112>; // 8 x f32 vector value +def v9f32 : ValueType<288, 113>; // 9 x f32 vector value +def v10f32 : ValueType<320, 114>; // 10 x f32 vector value +def v11f32 : ValueType<352, 115>; // 11 x f32 vector value +def v12f32 : ValueType<384, 116>; // 12 x f32 vector value +def v16f32 : ValueType<512, 117>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 118>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 119>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 120>; // 128 x f32 vector value +def v256f32 : ValueType<8192, 121>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 122>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 123>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 124>; // 2048 x f32 vector value + +def v1f64 : ValueType<64, 125>; // 1 x f64 vector value +def v2f64 : ValueType<128, 126>; // 2 x f64 vector value +def v3f64 : ValueType<192, 127>; // 3 x f64 vector value +def v4f64 : ValueType<256, 128>; // 4 x f64 vector value +def v8f64 : ValueType<512, 129>; // 8 x f64 vector value +def v16f64 : ValueType<1024, 130>; // 16 x f64 vector value +def v32f64 : ValueType<2048, 131>; // 32 x f64 vector value +def v64f64 : ValueType<4096, 132>; // 64 x f64 vector value +def v128f64 : ValueType<8192, 133>; // 128 x f64 vector value +def v256f64 : ValueType<16384, 134>; // 256 x f64 vector value + +def nxv1i1 : ValueType<1, 135>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 136>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 137>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 138>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 139>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 140>; // n x 32 x i1 vector value +def nxv64i1 : ValueType<64, 141>; // n x 64 x i1 vector value + +def nxv1i8 : ValueType<8, 142>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 143>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 144>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 145>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 146>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 147>; // n x 32 x i8 vector value +def nxv64i8 : ValueType<512, 148>; // n x 64 x i8 vector value + +def nxv1i16 : ValueType<16, 149>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 150>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 151>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 152>; // n x 8 x i16 vector value +def nxv16i16 : ValueType<256, 153>; // n x 16 x i16 vector value +def nxv32i16 : ValueType<512, 154>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 155>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 156>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 157>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 158>; // n x 8 x i32 vector value +def nxv16i32 : ValueType<512, 159>; // n x 16 x i32 vector value +def nxv32i32 : ValueType<1024, 160>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 161>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 162>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 163>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 164>; // n x 8 x i64 vector value +def nxv16i64 : ValueType<1024, 165>; // n x 16 x i64 vector value +def nxv32i64 : ValueType<2048, 166>; // n x 32 x i64 vector value + +def nxv1f16 : ValueType<16, 167>; // n x 1 x f16 vector value +def nxv2f16 : ValueType<32, 168>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64, 169>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 170>; // n x 8 x f16 vector value +def nxv16f16 : ValueType<256, 171>; // n x 16 x f16 vector value +def nxv32f16 : ValueType<512, 172>; // n x 32 x f16 vector value + +def nxv1bf16 : ValueType<16, 173>; // n x 1 x bf16 vector value +def nxv2bf16 : ValueType<32, 174>; // n x 2 x bf16 vector value +def nxv4bf16 : ValueType<64, 175>; // n x 4 x bf16 vector value +def nxv8bf16 : ValueType<128, 176>; // n x 8 x bf16 vector value +def nxv16bf16 : ValueType<256, 177>; // n x 16 x bf16 vector value +def nxv32bf16 : ValueType<512, 178>; // n x 32 x bf16 vector value + +def nxv1f32 : ValueType<32, 179>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64, 180>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 181>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 182>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 183>; // n x 16 x f32 vector value + +def nxv1f64 : ValueType<64, 184>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 185>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 186>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 187>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64, 188>; // X86 MMX value +def FlagVT : ValueType<0, 189>; // Pre-RA sched glue +def isVoid : ValueType<0, 190>; // Produces no value +def untyped : ValueType<8, 191>; // Produces an untyped value +def funcref : ValueType<0, 192>; // WebAssembly's funcref type +def externref : ValueType<0, 193>; // WebAssembly's externref type +def x86amx : ValueType<8192, 194>; // X86 AMX value +def i64x8 : ValueType<512, 195>; // 8 Consecutive GPRs (AArch64) def token : ValueType<0, 248>; // TokenTy def MetadataVT : ValueType<0, 249>; // Metadata diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h index 8b55a45b61e8..60ee6493b1a1 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h @@ -50,8 +50,6 @@ struct WasmEHFuncInfo { } void setUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) { SrcToUnwindDest[BB] = Dest; - if (!UnwindDestToSrcs.count(Dest)) - UnwindDestToSrcs[Dest] = SmallPtrSet<BBOrMBB, 4>(); UnwindDestToSrcs[Dest].insert(BB); } bool hasUnwindDest(const BasicBlock *BB) const { @@ -76,8 +74,6 @@ struct WasmEHFuncInfo { } void setUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) { SrcToUnwindDest[MBB] = Dest; - if (!UnwindDestToSrcs.count(Dest)) - UnwindDestToSrcs[Dest] = SmallPtrSet<BBOrMBB, 4>(); UnwindDestToSrcs[Dest].insert(MBB); } bool hasUnwindDest(MachineBasicBlock *MBB) const { |