aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp1119
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp161
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp113
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h210
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp211
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp124
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h80
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp655
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp1009
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp1
94 files changed, 3647 insertions, 1744 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5984063627b0..5c64622c7245 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -561,8 +561,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
<< ":\n");
std::map<unsigned, BitVector> RenameRegisterMap;
unsigned SuperReg = 0;
- for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
- unsigned Reg = Regs[i];
+ for (unsigned Reg : Regs) {
if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
SuperReg = Reg;
@@ -584,8 +583,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
}
// All group registers should be a subreg of SuperReg.
- for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
- unsigned Reg = Regs[i];
+ for (unsigned Reg : Regs) {
if (Reg == SuperReg) continue;
bool IsSub = TRI->isSubRegister(SuperReg, Reg);
// FIXME: remove this once PR18663 has been properly fixed. For now,
@@ -646,8 +644,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// For each referenced group register (which must be a SuperReg or
// a subregister of SuperReg), find the corresponding subregister
// of NewSuperReg and make sure it is free to be renamed.
- for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
- unsigned Reg = Regs[i];
+ for (unsigned Reg : Regs) {
unsigned NewReg = 0;
if (Reg == SuperReg) {
NewReg = NewSuperReg;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index 7d8a73e12d3a..7e68e5e22879 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -712,8 +712,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
// The manipulations performed when we're looking through an insertvalue or
// an extractvalue would happen at the front of the RetPath list, so since
// we have to copy it anyway it's more efficient to create a reversed copy.
- SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend());
- SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend());
+ SmallVector<unsigned, 4> TmpRetPath(llvm::reverse(RetPath));
+ SmallVector<unsigned, 4> TmpCallPath(llvm::reverse(CallPath));
// Finally, we can check whether the value produced by the tail call at this
// index is compatible with the value we return.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 828cb760b82e..533f20535655 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -180,7 +180,7 @@ Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL,
Alignment = InAlign;
// If the GV has a specified alignment, take it into account.
- const MaybeAlign GVAlign(GV->getAlignment());
+ const MaybeAlign GVAlign(GV->getAlign());
if (!GVAlign)
return Alignment;
@@ -288,7 +288,11 @@ bool AsmPrinter::doInitialization(Module &M) {
// use the directive, where it would need the same conditionalization
// anyway.
const Triple &Target = TM.getTargetTriple();
- OutStreamer->emitVersionForTarget(Target, M.getSDKVersion());
+ Triple TVT(M.getDarwinTargetVariantTriple());
+ OutStreamer->emitVersionForTarget(
+ Target, M.getSDKVersion(),
+ M.getDarwinTargetVariantTriple().empty() ? nullptr : &TVT,
+ M.getDarwinTargetVariantSDKVersion());
// Allow the target to emit any magic that it wants at the start of the file.
emitStartOfAsmFile(M);
@@ -1856,6 +1860,17 @@ bool AsmPrinter::doFinalization(Module &M) {
continue;
OutStreamer->emitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference);
}
+ if (shouldEmitWeakSwiftAsyncExtendedFramePointerFlags()) {
+ auto SymbolName = "swift_async_extendedFramePointerFlags";
+ auto Global = M.getGlobalVariable(SymbolName);
+ if (!Global) {
+ auto Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ Global = new GlobalVariable(M, Int8PtrTy, false,
+ GlobalValue::ExternalWeakLinkage, nullptr,
+ SymbolName);
+ OutStreamer->emitSymbolAttribute(getSymbol(Global), MCSA_WeakReference);
+ }
+ }
}
// Print aliases in topological order, that is, for each alias a = b,
@@ -2502,6 +2517,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV))
return getObjFileLowering().lowerDSOLocalEquivalent(Equiv, TM);
+ if (const NoCFIValue *NC = dyn_cast<NoCFIValue>(CV))
+ return MCSymbolRefExpr::create(getSymbol(NC->getGlobalValue()), Ctx);
+
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
if (!CE) {
llvm_unreachable("Unknown constant value to lower!");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 85ff84484ced..d621108408f0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -611,8 +611,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
void CodeViewDebug::beginModule(Module *M) {
// If module doesn't have named metadata anchors or COFF debug section
// is not available, skip any debug info related stuff.
- if (!M->getNamedMetadata("llvm.dbg.cu") ||
- !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
+ NamedMDNode *CUs = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CUs || !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
Asm = nullptr;
return;
}
@@ -622,7 +622,6 @@ void CodeViewDebug::beginModule(Module *M) {
TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch());
// Get the current source language.
- NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
const MDNode *Node = *CUs->operands().begin();
const auto *CU = cast<DICompileUnit>(Node);
@@ -650,6 +649,7 @@ void CodeViewDebug::endModule() {
switchToDebugSectionForSymbol(nullptr);
MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols);
+ emitObjName();
emitCompilerInformation();
endCVSubsection(CompilerInfo);
@@ -785,6 +785,29 @@ void CodeViewDebug::emitTypeGlobalHashes() {
}
}
+void CodeViewDebug::emitObjName() {
+ MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_OBJNAME);
+
+ StringRef PathRef(Asm->TM.Options.ObjectFilenameForDebug);
+ llvm::SmallString<256> PathStore(PathRef);
+
+ if (PathRef.empty() || PathRef == "-") {
+ // Don't emit the filename if we're writing to stdout or to /dev/null.
+ PathRef = {};
+ } else {
+ llvm::sys::path::remove_dots(PathStore, /*remove_dot_dot=*/true);
+ PathRef = PathStore;
+ }
+
+ OS.AddComment("Signature");
+ OS.emitIntValue(0, 4);
+
+ OS.AddComment("Object name");
+ emitNullTerminatedSymbolName(OS, PathRef);
+
+ endSymbolRecord(CompilerEnd);
+}
+
namespace {
struct Version {
int Part[4];
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 6f88e15ee8fe..d1fc3cdccb20 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -302,6 +302,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitTypeGlobalHashes();
+ void emitObjName();
+
void emitCompilerInformation();
void emitBuildInfo();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 047676d4c11e..48134f1fd774 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1224,17 +1224,15 @@ void DwarfDebug::beginModule(Module *M) {
CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV]));
}
- for (auto *Ty : CUNode->getEnumTypes()) {
- // The enum types array by design contains pointers to
- // MDNodes rather than DIRefs. Unique them here.
+ for (auto *Ty : CUNode->getEnumTypes())
CU.getOrCreateTypeDIE(cast<DIType>(Ty));
- }
+
for (auto *Ty : CUNode->getRetainedTypes()) {
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
if (DIType *RT = dyn_cast<DIType>(Ty))
- // There is no point in force-emitting a forward declaration.
- CU.getOrCreateTypeDIE(RT);
+ // There is no point in force-emitting a forward declaration.
+ CU.getOrCreateTypeDIE(RT);
}
// Emit imported_modules last so that the relevant context is already
// available.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 976e35905144..6b6d63f14f87 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -536,6 +536,18 @@ void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) {
}
}
+void DwarfUnit::addAccess(DIE &Die, DINode::DIFlags Flags) {
+ if ((Flags & DINode::FlagAccessibility) == DINode::FlagProtected)
+ addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if ((Flags & DINode::FlagAccessibility) == DINode::FlagPrivate)
+ addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ else if ((Flags & DINode::FlagAccessibility) == DINode::FlagPublic)
+ addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+}
+
DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
if (!Context || isa<DIFile>(Context))
return &getUnitDie();
@@ -842,13 +854,17 @@ void DwarfUnit::addAnnotation(DIE &Buffer, DINodeArray Annotations) {
for (const Metadata *Annotation : Annotations->operands()) {
const MDNode *MD = cast<MDNode>(Annotation);
const MDString *Name = cast<MDString>(MD->getOperand(0));
-
- // Currently, only MDString is supported with btf_decl_tag attribute.
- const MDString *Value = cast<MDString>(MD->getOperand(1));
+ const auto &Value = MD->getOperand(1);
DIE &AnnotationDie = createAndAddDIE(dwarf::DW_TAG_LLVM_annotation, Buffer);
addString(AnnotationDie, dwarf::DW_AT_name, Name->getString());
- addString(AnnotationDie, dwarf::DW_AT_const_value, Value->getString());
+ if (const auto *Data = dyn_cast<MDString>(Value))
+ addString(AnnotationDie, dwarf::DW_AT_const_value, Data->getString());
+ else if (const auto *Data = dyn_cast<ConstantAsMetadata>(Value))
+ addConstantValue(AnnotationDie, Data->getValue()->getUniqueInteger(),
+ /*Unsigned=*/true);
+ else
+ assert(false && "Unsupported annotation value type");
}
}
@@ -1007,6 +1023,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (CTy->isForwardDecl())
addFlag(Buffer, dwarf::DW_AT_declaration);
+ // Add accessibility info if available.
+ addAccess(Buffer, CTy->getFlags());
+
// Add source line info if available.
if (!CTy->isForwardDecl())
addSourceLine(Buffer, CTy);
@@ -1308,15 +1327,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (SP->isNoReturn())
addFlag(SPDie, dwarf::DW_AT_noreturn);
- if (SP->isProtected())
- addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_protected);
- else if (SP->isPrivate())
- addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_private);
- else if (SP->isPublic())
- addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_public);
+ addAccess(SPDie, SP->getFlags());
if (SP->isExplicit())
addFlag(SPDie, dwarf::DW_AT_explicit);
@@ -1666,16 +1677,8 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
}
}
- if (DT->isProtected())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_protected);
- else if (DT->isPrivate())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_private);
- // Otherwise C++ member and base classes are considered public.
- else if (DT->isPublic())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_public);
+ addAccess(MemberDie, DT->getFlags());
+
if (DT->isVirtual())
addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
dwarf::DW_VIRTUALITY_virtual);
@@ -1717,15 +1720,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
// FIXME: We could omit private if the parent is a class_type, and
// public if the parent is something else.
- if (DT->isProtected())
- addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_protected);
- else if (DT->isPrivate())
- addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_private);
- else if (DT->isPublic())
- addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_public);
+ addAccess(StaticMemberDIE, DT->getFlags());
if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant()))
addConstantValue(StaticMemberDIE, CI, Ty);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 8140279adaef..54b0079dd7ce 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -226,6 +226,9 @@ public:
/// Add thrown types.
void addThrownTypes(DIE &Die, DINodeArray ThrownTypes);
+ /// Add the accessibility attribute.
+ void addAccess(DIE &Die, DINode::DIFlags Flags);
+
/// Add a new type attribute to the specified entity.
///
/// This takes and attribute parameter because DW_AT_friend attributes are
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index a9fb31d42679..3ade262d9af2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -112,16 +112,12 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
EmitCamlGlobal(M, AP, "frametable");
int NumDescriptors = 0;
- for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
- IE = Info.funcinfo_end();
- I != IE; ++I) {
- GCFunctionInfo &FI = **I;
- if (FI.getStrategy().getName() != getStrategy().getName())
+ for (std::unique_ptr<GCFunctionInfo> &FI :
+ llvm::make_range(Info.funcinfo_begin(), Info.funcinfo_end())) {
+ if (FI->getStrategy().getName() != getStrategy().getName())
// this function is managed by some other GC
continue;
- for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
- NumDescriptors++;
- }
+ NumDescriptors += FI->size();
}
if (NumDescriptors >= 1 << 16) {
@@ -131,35 +127,34 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AP.emitInt16(NumDescriptors);
AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
- for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
- IE = Info.funcinfo_end();
- I != IE; ++I) {
- GCFunctionInfo &FI = **I;
- if (FI.getStrategy().getName() != getStrategy().getName())
+ for (std::unique_ptr<GCFunctionInfo> &FI :
+ llvm::make_range(Info.funcinfo_begin(), Info.funcinfo_end())) {
+ if (FI->getStrategy().getName() != getStrategy().getName())
// this function is managed by some other GC
continue;
- uint64_t FrameSize = FI.getFrameSize();
+ uint64_t FrameSize = FI->getFrameSize();
if (FrameSize >= 1 << 16) {
// Very rude!
- report_fatal_error("Function '" + FI.getFunction().getName() +
+ report_fatal_error("Function '" + FI->getFunction().getName() +
"' is too large for the ocaml GC! "
"Frame size " +
Twine(FrameSize) +
">= 65536.\n"
"(" +
- Twine(reinterpret_cast<uintptr_t>(&FI)) + ")");
+ Twine(reinterpret_cast<uintptr_t>(FI.get())) + ")");
}
AP.OutStreamer->AddComment("live roots for " +
- Twine(FI.getFunction().getName()));
+ Twine(FI->getFunction().getName()));
AP.OutStreamer->AddBlankLine();
- for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
- size_t LiveCount = FI.live_size(J);
+ for (GCFunctionInfo::iterator J = FI->begin(), JE = FI->end(); J != JE;
+ ++J) {
+ size_t LiveCount = FI->live_size(J);
if (LiveCount >= 1 << 16) {
// Very rude!
- report_fatal_error("Function '" + FI.getFunction().getName() +
+ report_fatal_error("Function '" + FI->getFunction().getName() +
"' is too large for the ocaml GC! "
"Live root count " +
Twine(LiveCount) + " >= 65536.");
@@ -169,8 +164,8 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AP.emitInt16(FrameSize);
AP.emitInt16(LiveCount);
- for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
- KE = FI.live_end(J);
+ for (GCFunctionInfo::live_iterator K = FI->live_begin(J),
+ KE = FI->live_end(J);
K != KE; ++K) {
if (K->StackOffset >= 1 << 16) {
// Very rude!
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index 9e6f1a537de3..bab187f46535 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -47,7 +47,6 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
InlinedAt = InlinedAt->getInlinedAt();
}
- SmallVector<InlineSite, 8> InlineStack(ReversedInlineStack.rbegin(),
- ReversedInlineStack.rend());
+ SmallVector<InlineSite, 8> InlineStack(llvm::reverse(ReversedInlineStack));
Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index 64dadc82b48b..0ff67f7ca00a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1125,8 +1125,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// If this is a large problem, avoid visiting the same basic blocks multiple
// times.
if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
+ for (MergePotentialsElt &Elt : MergePotentials)
+ TriedMerging.insert(Elt.getBlock());
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 863a0e1e0b56..5f9982cd155d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -15,13 +15,13 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/StackMaps.h"
#include <cassert>
#include <tuple>
@@ -35,7 +35,7 @@ void VirtRegAuxInfo::calculateSpillWeightsAndHints() {
MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
calculateSpillWeightAndHint(LIS.getInterval(Reg));
@@ -64,14 +64,14 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg,
if (Register::isVirtualRegister(HReg))
return Sub == HSub ? HReg : Register();
- const TargetRegisterClass *rc = MRI.getRegClass(Reg);
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
MCRegister CopiedPReg = HSub ? TRI.getSubReg(HReg, HSub) : HReg.asMCReg();
- if (rc->contains(CopiedPReg))
+ if (RC->contains(CopiedPReg))
return CopiedPReg;
// Check if reg:sub matches so that a super register could be hinted.
if (Sub)
- return TRI.getMatchingSuperReg(CopiedPReg, Sub, rc);
+ return TRI.getMatchingSuperReg(CopiedPReg, Sub, RC);
return 0;
}
@@ -80,8 +80,8 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg,
static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS,
const VirtRegMap &VRM,
const TargetInstrInfo &TII) {
- unsigned Reg = LI.reg();
- unsigned Original = VRM.getOriginal(Reg);
+ Register Reg = LI.reg();
+ Register Original = VRM.getOriginal(Reg);
for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I != E; ++I) {
const VNInfo *VNI = *I;
@@ -183,8 +183,8 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
bool ShouldUpdateLI = !IsLocalSplitArtifact;
if (IsLocalSplitArtifact) {
- MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*End);
- assert(localMBB == LIS.getMBBFromIndex(*Start) &&
+ MachineBasicBlock *LocalMBB = LIS.getMBBFromIndex(*End);
+ assert(LocalMBB == LIS.getMBBFromIndex(*Start) &&
"start and end are expected to be in the same basic block");
// Local split artifact will have 2 additional copy instructions and they
@@ -192,8 +192,8 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// localLI = COPY other
// ...
// other = COPY localLI
- TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
- TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
+ TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB);
+ TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB);
NumInstr += 2;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index bbdd8aab502e..7c236a9785d8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -68,6 +68,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineCSEPass(Registry);
initializeMachineCombinerPass(Registry);
initializeMachineCopyPropagationPass(Registry);
+ initializeMachineCycleInfoPrinterPassPass(Registry);
+ initializeMachineCycleInfoWrapperPassPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachineFunctionPrinterPassPass(Registry);
initializeMachineLICMPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index ac4180c4c3ab..747f4e4fdecc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -4831,9 +4831,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
TargetLowering::AsmOperandInfoVector TargetConstraints =
TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI);
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
+ for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, SDValue());
@@ -5617,9 +5615,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
TargetLowering::AsmOperandInfoVector TargetConstraints =
TLI->ParseConstraints(*DL, TRI, *CS);
unsigned ArgNo = 0;
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
+ for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
@@ -6856,8 +6852,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// Use reverse iterator because later select may use the value of the
// earlier select, and we need to propagate value through earlier select
// to get the PHI operand.
- for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
- SelectInst *SI = *It;
+ for (SelectInst *SI : llvm::reverse(ASI)) {
// The select itself is replaced with a PHI Node.
PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
PN->takeName(SI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 4e98d49206b5..901409ea9f8f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -405,8 +405,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin,
const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &Forbid) {
ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
- for (unsigned i = 0; i != Order.size(); ++i) {
- unsigned NewReg = Order[i];
+ for (unsigned NewReg : Order) {
// Don't replace a register with itself.
if (NewReg == AntiDepReg) continue;
// Don't replace a register with one that was recently used to repair
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 0bb186a02416..5579152f1ce0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -142,9 +142,9 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
if (isDead(&MI)) {
LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI);
// It is possible that some DBG_VALUE instructions refer to this
- // instruction. They get marked as undef and will be deleted
- // in the live debug variable analysis.
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ // instruction. They will be deleted in the live debug variable
+ // analysis.
+ MI.eraseFromParent();
AnyChanges = true;
++NumDeletes;
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 90883212a275..0b5469b02637 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -210,9 +210,9 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
// Check all instructions, except the terminators. It is assumed that
// terminators never have side effects or define any used register values.
- for (MachineBasicBlock::iterator I = MBB->begin(),
- E = MBB->getFirstTerminator(); I != E; ++I) {
- if (I->isDebugInstr())
+ for (MachineInstr &MI :
+ llvm::make_range(MBB->begin(), MBB->getFirstTerminator())) {
+ if (MI.isDebugInstr())
continue;
if (++InstrCount > BlockInstrLimit && !Stress) {
@@ -222,28 +222,28 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
}
// There shouldn't normally be any phis in a single-predecessor block.
- if (I->isPHI()) {
- LLVM_DEBUG(dbgs() << "Can't hoist: " << *I);
+ if (MI.isPHI()) {
+ LLVM_DEBUG(dbgs() << "Can't hoist: " << MI);
return false;
}
// Don't speculate loads. Note that it may be possible and desirable to
// speculate GOT or constant pool loads that are guaranteed not to trap,
// but we don't support that for now.
- if (I->mayLoad()) {
- LLVM_DEBUG(dbgs() << "Won't speculate load: " << *I);
+ if (MI.mayLoad()) {
+ LLVM_DEBUG(dbgs() << "Won't speculate load: " << MI);
return false;
}
// We never speculate stores, so an AA pointer isn't necessary.
bool DontMoveAcrossStore = true;
- if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) {
- LLVM_DEBUG(dbgs() << "Can't speculate: " << *I);
+ if (!MI.isSafeToMove(nullptr, DontMoveAcrossStore)) {
+ LLVM_DEBUG(dbgs() << "Can't speculate: " << MI);
return false;
}
// Check for any dependencies on Head instructions.
- if (!InstrDependenciesAllowIfConv(&(*I)))
+ if (!InstrDependenciesAllowIfConv(&MI))
return false;
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 17094a8e44f8..d061664e8c5d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -256,7 +256,7 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
LLT PartLLT = MRI.getType(SrcRegs[0]);
// Deal with v3s16 split into v2s16
- LLT LCMTy = getLCMType(LLTy, PartLLT);
+ LLT LCMTy = getCoverTy(LLTy, PartLLT);
if (LCMTy == LLTy) {
// Common case where no padding is needed.
assert(DstRegs.size() == 1);
@@ -267,21 +267,9 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
// widening the original value.
Register UnmergeSrcReg;
if (LCMTy != PartLLT) {
- // e.g. A <3 x s16> value was split to <2 x s16>
- // %register_value0:_(<2 x s16>)
- // %register_value1:_(<2 x s16>)
- // %undef:_(<2 x s16>) = G_IMPLICIT_DEF
- // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef
- // %dst_reg:_(<3 x s16>), %dead:_(<3 x s16>) = G_UNMERGE_VALUES %concat
- const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits();
- Register Undef = B.buildUndef(PartLLT).getReg(0);
-
- // Build vector of undefs.
- SmallVector<Register, 8> WidenedSrcs(NumWide, Undef);
-
- // Replace the first sources with the real registers.
- std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin());
- UnmergeSrcReg = B.buildConcatVectors(LCMTy, WidenedSrcs).getReg(0);
+ assert(DstRegs.size() == 1);
+ return B.buildDeleteTrailingVectorElements(DstRegs[0],
+ B.buildMerge(LCMTy, SrcRegs));
} else {
// We don't need to widen anything if we're extracting a scalar which was
// promoted to a vector e.g. s8 -> v4s8 -> s8
@@ -298,6 +286,8 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
for (int I = DstRegs.size(); I != NumDst; ++I)
PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
+ if (PadDstRegs.size() == 1)
+ return B.buildDeleteTrailingVectorElements(DstRegs[0], UnmergeSrcReg);
return B.buildUnmerge(PadDstRegs, UnmergeSrcReg);
}
@@ -485,7 +475,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
MachineRegisterInfo &MRI = *B.getMRI();
LLT DstTy = MRI.getType(DstRegs[0]);
- LLT LCMTy = getLCMType(SrcTy, PartTy);
+ LLT LCMTy = getCoverTy(SrcTy, PartTy);
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
@@ -493,7 +483,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
Register UnmergeSrc = SrcReg;
- if (CoveringSize != SrcSize) {
+ if (!LCMTy.isVector() && CoveringSize != SrcSize) {
// For scalars, it's common to be able to use a simple extension.
if (SrcTy.isScalar() && DstTy.isScalar()) {
CoveringSize = alignTo(SrcSize, DstSize);
@@ -510,14 +500,10 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
}
}
- // Unmerge to the original registers and pad with dead defs.
- SmallVector<Register, 8> UnmergeResults(DstRegs.begin(), DstRegs.end());
- for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize;
- Size += DstSize) {
- UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy));
- }
+ if (LCMTy.isVector() && CoveringSize != SrcSize)
+ UnmergeSrc = B.buildPadVectorWithUndefElements(LCMTy, SrcReg).getReg(0);
- B.buildUnmerge(UnmergeResults, UnmergeSrc);
+ B.buildUnmerge(DstRegs, UnmergeSrc);
}
bool CallLowering::determineAndHandleAssignments(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 381c6df5c97a..dd1ef74e8ad0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -135,7 +135,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
// Erase dead insts before even adding to the list.
if (isTriviallyDead(CurMI, *MRI)) {
LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
- CurMI.eraseFromParentAndMarkDBGValuesForRemoval();
+ CurMI.eraseFromParent();
continue;
}
WorkList.deferred_insert(&CurMI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 755b3b844570..f7a634dad61a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1551,8 +1551,8 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
// These were one use so it's safe to remove them.
- MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval();
- MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval();
+ MatchInfo.Shift2->eraseFromParent();
+ MatchInfo.Logic->eraseFromParent();
MI.eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 87cc60d51bc2..6d415c9c7f90 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -338,9 +338,10 @@ bool IRTranslator::translateCompare(const User &U,
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
else {
- assert(CI && "Instruction should be CmpInst");
- MIRBuilder.buildFCmp(Pred, Res, Op0, Op1,
- MachineInstr::copyFlagsFromInstruction(*CI));
+ uint16_t Flags = 0;
+ if (CI)
+ Flags = MachineInstr::copyFlagsFromInstruction(*CI);
+ MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags);
}
return true;
@@ -3502,7 +3503,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Get rid of the now empty basic block.
EntryBB->removeSuccessor(&NewEntryBB);
MF->remove(EntryBB);
- MF->DeleteMachineBasicBlock(EntryBB);
+ MF->deleteMachineBasicBlock(EntryBB);
assert(&MF->front() == &NewEntryBB &&
"New entry wasn't next in the list of basic block!");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 9b2692486384..b10c9272a508 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -163,7 +163,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// If so, erase it.
if (isTriviallyDead(MI, MRI)) {
LLVM_DEBUG(dbgs() << "Is dead; erasing.\n");
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ MI.eraseFromParent();
continue;
}
@@ -255,8 +255,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MI = nullptr;
if (!MRI.def_empty(VReg))
MI = &*MRI.def_instr_begin(VReg);
- else if (!MRI.use_empty(VReg))
+ else if (!MRI.use_empty(VReg)) {
MI = &*MRI.use_instr_begin(VReg);
+ // Debug value instruction is permitted to use undefined vregs.
+ if (MI->isDebugValue())
+ continue;
+ }
if (!MI)
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e09cd26eb0c1..e8a8efd5dad4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -176,16 +176,18 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
return true;
}
+ // Perform irregular split. Leftover is last element of RegPieces.
if (MainTy.isVector()) {
- unsigned EltSize = MainTy.getScalarSizeInBits();
- if (LeftoverSize % EltSize != 0)
- return false;
- LeftoverTy = LLT::scalarOrVector(
- ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
- } else {
- LeftoverTy = LLT::scalar(LeftoverSize);
+ SmallVector<Register, 8> RegPieces;
+ extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
+ for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
+ VRegs.push_back(RegPieces[i]);
+ LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
+ LeftoverTy = MRI.getType(LeftoverRegs[0]);
+ return true;
}
+ LeftoverTy = LLT::scalar(LeftoverSize);
// For irregular sizes, extract the individual parts.
for (unsigned I = 0; I != NumParts; ++I) {
Register NewReg = MRI.createGenericVirtualRegister(MainTy);
@@ -203,6 +205,44 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
return true;
}
+void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
+ SmallVectorImpl<Register> &VRegs) {
+ LLT RegTy = MRI.getType(Reg);
+ assert(RegTy.isVector() && "Expected a vector type");
+
+ LLT EltTy = RegTy.getElementType();
+ LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
+ unsigned RegNumElts = RegTy.getNumElements();
+ unsigned LeftoverNumElts = RegNumElts % NumElts;
+ unsigned NumNarrowTyPieces = RegNumElts / NumElts;
+
+ // Perfect split without leftover
+ if (LeftoverNumElts == 0)
+ return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
+
+ // Irregular split. Provide direct access to all elements for artifact
+ // combiner using unmerge to elements. Then build vectors with NumElts
+ // elements. Remaining element(s) will be (used to build vector) Leftover.
+ SmallVector<Register, 8> Elts;
+ extractParts(Reg, EltTy, RegNumElts, Elts);
+
+ unsigned Offset = 0;
+ // Requested sub-vectors of NarrowTy.
+ for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
+ ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
+ VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
+ }
+
+ // Leftover element(s).
+ if (LeftoverNumElts == 1) {
+ VRegs.push_back(Elts[Offset]);
+ } else {
+ LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
+ ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
+ VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
+ }
+}
+
void LegalizerHelper::insertParts(Register DstReg,
LLT ResultTy, LLT PartTy,
ArrayRef<Register> PartRegs,
@@ -223,6 +263,15 @@ void LegalizerHelper::insertParts(Register DstReg,
return;
}
+ // Merge sub-vectors with different number of elements and insert into DstReg.
+ if (ResultTy.isVector()) {
+ assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
+ SmallVector<Register, 8> AllRegs;
+ for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
+ AllRegs.push_back(Reg);
+ return mergeMixedSubvectors(DstReg, AllRegs);
+ }
+
SmallVector<Register> GCDRegs;
LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
@@ -231,6 +280,30 @@ void LegalizerHelper::insertParts(Register DstReg,
buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
}
+void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
+ Register Reg) {
+ LLT Ty = MRI.getType(Reg);
+ SmallVector<Register, 8> RegElts;
+ extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
+ Elts.append(RegElts);
+}
+
+/// Merge \p PartRegs with different types into \p DstReg.
+void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
+ ArrayRef<Register> PartRegs) {
+ SmallVector<Register, 8> AllElts;
+ for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
+ appendVectorElts(AllElts, PartRegs[i]);
+
+ Register Leftover = PartRegs[PartRegs.size() - 1];
+ if (MRI.getType(Leftover).isScalar())
+ AllElts.push_back(Leftover);
+ else
+ appendVectorElts(AllElts, Leftover);
+
+ MIRBuilder.buildMerge(DstReg, AllElts);
+}
+
/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
const MachineInstr &MI) {
@@ -916,8 +989,26 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
}
- case TargetOpcode::G_FREEZE:
- return reduceOperationWidth(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_FREEZE: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ // Should widen scalar first
+ if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
+ return UnableToLegalize;
+
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
+ SmallVector<Register, 8> Parts;
+ for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
+ Parts.push_back(
+ MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
case TargetOpcode::G_SADDO:
@@ -1372,37 +1463,17 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
+ Register Dst = MO.getReg();
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MO.setReg(DstExt);
+ MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
}
void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
-
- LLT OldTy = MRI.getType(MO.getReg());
- unsigned OldElts = OldTy.getNumElements();
- unsigned NewElts = MoreTy.getNumElements();
-
- unsigned NumParts = NewElts / OldElts;
-
- // Use concat_vectors if the result is a multiple of the number of elements.
- if (NumParts * OldElts == NewElts) {
- SmallVector<Register, 8> Parts;
- Parts.push_back(MO.getReg());
-
- Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
- for (unsigned I = 1; I != NumParts; ++I)
- Parts.push_back(ImpDef);
-
- auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
- MO.setReg(Concat.getReg(0));
- return;
- }
-
- Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
- Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
- MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
- MO.setReg(MoreReg);
+ SmallVector<Register, 8> Regs;
+ MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
}
void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
@@ -3558,20 +3629,83 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
}
-LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
- MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT LCMTy = getLCMType(DstTy, NarrowTy);
+#ifndef NDEBUG
+/// Check that all vector operands have same number of elements. Other operands
+/// should be listed in NonVecOp.
+static bool hasSameNumEltsOnAllVectorOperands(
+ GenericMachineInstr &MI, MachineRegisterInfo &MRI,
+ std::initializer_list<unsigned> NonVecOpIndices) {
+ if (MI.getNumMemOperands() != 0)
+ return false;
- unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
+ LLT VecTy = MRI.getType(MI.getReg(0));
+ if (!VecTy.isVector())
+ return false;
+ unsigned NumElts = VecTy.getNumElements();
- auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
- SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
+ for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ if (!Op.isReg()) {
+ if (!is_contained(NonVecOpIndices, OpIdx))
+ return false;
+ continue;
+ }
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
- MI.eraseFromParent();
- return Legalized;
+ LLT Ty = MRI.getType(Op.getReg());
+ if (!Ty.isVector()) {
+ if (!is_contained(NonVecOpIndices, OpIdx))
+ return false;
+ is_contained(NonVecOpIndices, OpIdx);
+ continue;
+ }
+
+ if (Ty.getNumElements() != NumElts)
+ return false;
+ }
+
+ return true;
+}
+#endif
+
+/// Fill \p DstOps with DstOps that have same number of elements combined as
+/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
+/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
+/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
+static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
+ unsigned NumElts) {
+ LLT LeftoverTy;
+ assert(Ty.isVector() && "Expected vector type");
+ LLT EltTy = Ty.getElementType();
+ LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
+ int NumParts, NumLeftover;
+ std::tie(NumParts, NumLeftover) =
+ getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
+
+ assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
+ for (int i = 0; i < NumParts; ++i) {
+ DstOps.push_back(NarrowTy);
+ }
+
+ if (LeftoverTy.isValid()) {
+ assert(NumLeftover == 1 && "expected exactly one leftover");
+ DstOps.push_back(LeftoverTy);
+ }
+}
+
+/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
+/// made from \p Op depending on operand type.
+static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
+ MachineOperand &Op) {
+ for (unsigned i = 0; i < N; ++i) {
+ if (Op.isReg())
+ Ops.push_back(Op.getReg());
+ else if (Op.isImm())
+ Ops.push_back(Op.getImm());
+ else if (Op.isPredicate())
+ Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
+ else
+ llvm_unreachable("Unsupported type");
+ }
}
// Handle splitting vector operations which need to have the same number of
@@ -3588,335 +3722,116 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
// s64 = G_SHL s64, s32
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorMultiEltType(
- MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
- const LLT NarrowTy0 = NarrowTyArg;
- const Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT LeftoverTy0;
-
- // All of the operands need to have the same number of elements, so if we can
- // determine a type breakdown for the result type, we can for all of the
- // source types.
- int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
- if (NumParts < 0)
- return UnableToLegalize;
-
- SmallVector<MachineInstrBuilder, 4> NewInsts;
-
- SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
- SmallVector<Register, 4> PartRegs, LeftoverRegs;
-
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- Register SrcReg = MI.getOperand(I).getReg();
- LLT SrcTyI = MRI.getType(SrcReg);
- const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount()
- : ElementCount::getFixed(1);
- LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType());
- LLT LeftoverTyI;
-
- // Split this operand into the requested typed registers, and any leftover
- // required to reproduce the original type.
- if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
- LeftoverRegs))
- return UnableToLegalize;
-
- if (I == 1) {
- // For the first operand, create an instruction for each part and setup
- // the result.
- for (Register PartReg : PartRegs) {
- Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
- .addDef(PartDstReg)
- .addUse(PartReg));
- DstRegs.push_back(PartDstReg);
- }
-
- for (Register LeftoverReg : LeftoverRegs) {
- Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
- NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
- .addDef(PartDstReg)
- .addUse(LeftoverReg));
- LeftoverDstRegs.push_back(PartDstReg);
- }
+ GenericMachineInstr &MI, unsigned NumElts,
+ std::initializer_list<unsigned> NonVecOpIndices) {
+ assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
+ "Non-compatible opcode or not specified non-vector operands");
+ unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
+
+ unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
+
+ // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
+ // Build instructions with DstOps to use instruction found by CSE directly.
+ // CSE copies found instruction into given vreg when building with vreg dest.
+ SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
+ // Output registers will be taken from created instructions.
+ SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
+ for (unsigned i = 0; i < NumDefs; ++i) {
+ makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
+ }
+
+ // Split vector input operands into sub-vectors with NumElts elts + Leftover.
+ // Operands listed in NonVecOpIndices will be used as is without splitting;
+ // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
+ // scalar condition (op 1), immediate in sext_inreg (op 2).
+ SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
+ for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
+ ++UseIdx, ++UseNo) {
+ if (is_contained(NonVecOpIndices, UseIdx)) {
+ broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
+ MI.getOperand(UseIdx));
} else {
- assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
-
- // Add the newly created operand splits to the existing instructions. The
- // odd-sized pieces are ordered after the requested NarrowTyArg sized
- // pieces.
- unsigned InstCount = 0;
- for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
- NewInsts[InstCount++].addUse(PartRegs[J]);
- for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
- NewInsts[InstCount++].addUse(LeftoverRegs[J]);
+ SmallVector<Register, 8> SplitPieces;
+ extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
+ for (auto Reg : SplitPieces)
+ InputOpsPieces[UseNo].push_back(Reg);
}
-
- PartRegs.clear();
- LeftoverRegs.clear();
}
- // Insert the newly built operations and rebuild the result register.
- for (auto &MIB : NewInsts)
- MIRBuilder.insertInstr(MIB);
+ unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
- insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
+ // Take i-th piece of each input operand split and build sub-vector/scalar
+ // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
+ for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
+ SmallVector<DstOp, 2> Defs;
+ for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
+ Defs.push_back(OutputOpsPieces[DstNo][i]);
- MI.eraseFromParent();
- return Legalized;
-}
+ SmallVector<SrcOp, 3> Uses;
+ for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
+ Uses.push_back(InputOpsPieces[InputNo][i]);
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
-
- LLT NarrowTy0 = NarrowTy;
- LLT NarrowTy1;
- unsigned NumParts;
-
- if (NarrowTy.isVector()) {
- // Uneven breakdown not handled.
- NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
- if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
- return UnableToLegalize;
-
- NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType());
- } else {
- NumParts = DstTy.getNumElements();
- NarrowTy1 = SrcTy.getElementType();
+ auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
+ for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
+ OutputRegs[DstNo].push_back(I.getReg(DstNo));
}
- SmallVector<Register, 4> SrcRegs, DstRegs;
- extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
-
- for (unsigned I = 0; I < NumParts; ++I) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- MachineInstr *NewInst =
- MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
-
- NewInst->setFlags(MI.getFlags());
- DstRegs.push_back(DstReg);
+ // Merge small outputs into MI's output for each def operand.
+ if (NumLeftovers) {
+ for (unsigned i = 0; i < NumDefs; ++i)
+ mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
+ } else {
+ for (unsigned i = 0; i < NumDefs; ++i)
+ MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
}
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
-
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(2).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(Src0Reg);
-
- unsigned NumParts;
- LLT NarrowTy0, NarrowTy1;
-
- if (TypeIdx == 0) {
- unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
- unsigned OldElts = DstTy.getNumElements();
-
- NarrowTy0 = NarrowTy;
- NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
- NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(),
- SrcTy.getScalarSizeInBits())
- : SrcTy.getElementType();
-
- } else {
- unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
- unsigned OldElts = SrcTy.getNumElements();
-
- NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
- NarrowTy.getNumElements();
- NarrowTy0 =
- LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits());
- NarrowTy1 = NarrowTy;
+LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
+ unsigned NumElts) {
+ unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
+
+ unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
+
+ SmallVector<DstOp, 8> OutputOpsPieces;
+ SmallVector<Register, 8> OutputRegs;
+ makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
+
+ // Instructions that perform register split will be inserted in basic block
+ // where register is defined (basic block is in the next operand).
+ SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
+ for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
+ UseIdx += 2, ++UseNo) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
}
- // FIXME: Don't know how to handle the situation where the small vectors
- // aren't all the same size yet.
- if (NarrowTy1.isVector() &&
- NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
- return UnableToLegalize;
-
- CmpInst::Predicate Pred
- = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
-
- SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
- extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
- extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
+ // Build PHIs with fewer elements.
+ unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
+ MIRBuilder.setInsertPt(*MI.getParent(), MI);
+ for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
+ auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
+ Phi.addDef(
+ MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
+ OutputRegs.push_back(Phi.getReg(0));
- for (unsigned I = 0; I < NumParts; ++I) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- DstRegs.push_back(DstReg);
-
- if (MI.getOpcode() == TargetOpcode::G_ICMP)
- MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
- else {
- MachineInstr *NewCmp
- = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
- NewCmp->setFlags(MI.getFlags());
+ for (unsigned j = 0; j < NumInputs / 2; ++j) {
+ Phi.addUse(InputOpsPieces[j][i]);
+ Phi.add(MI.getOperand(1 + j * 2 + 1));
}
}
- if (NarrowTy1.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register CondReg = MI.getOperand(1).getReg();
-
- unsigned NumParts = 0;
- LLT NarrowTy0, NarrowTy1;
-
- LLT DstTy = MRI.getType(DstReg);
- LLT CondTy = MRI.getType(CondReg);
- unsigned Size = DstTy.getSizeInBits();
-
- assert(TypeIdx == 0 || CondTy.isVector());
-
- if (TypeIdx == 0) {
- NarrowTy0 = NarrowTy;
- NarrowTy1 = CondTy;
-
- unsigned NarrowSize = NarrowTy0.getSizeInBits();
- // FIXME: Don't know how to handle the situation where the small vectors
- // aren't all the same size yet.
- if (Size % NarrowSize != 0)
- return UnableToLegalize;
-
- NumParts = Size / NarrowSize;
-
- // Need to break down the condition type
- if (CondTy.isVector()) {
- if (CondTy.getNumElements() == NumParts)
- NarrowTy1 = CondTy.getElementType();
- else
- NarrowTy1 =
- LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts),
- CondTy.getScalarSizeInBits());
- }
+ // Merge small outputs into MI's def.
+ if (NumLeftovers) {
+ mergeMixedSubvectors(MI.getReg(0), OutputRegs);
} else {
- NumParts = CondTy.getNumElements();
- if (NarrowTy.isVector()) {
- // TODO: Handle uneven breakdown.
- if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
- return UnableToLegalize;
-
- return UnableToLegalize;
- } else {
- NarrowTy0 = DstTy.getElementType();
- NarrowTy1 = NarrowTy;
- }
- }
-
- SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
- if (CondTy.isVector())
- extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
-
- extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
- extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
-
- for (unsigned i = 0; i < NumParts; ++i) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
- Src1Regs[i], Src2Regs[i]);
- DstRegs.push_back(DstReg);
- }
-
- if (NarrowTy0.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- const Register DstReg = MI.getOperand(0).getReg();
- LLT PhiTy = MRI.getType(DstReg);
- LLT LeftoverTy;
-
- // All of the operands need to have the same number of elements, so if we can
- // determine a type breakdown for the result type, we can for all of the
- // source types.
- int NumParts, NumLeftover;
- std::tie(NumParts, NumLeftover)
- = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
- if (NumParts < 0)
- return UnableToLegalize;
-
- SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
- SmallVector<MachineInstrBuilder, 4> NewInsts;
-
- const int TotalNumParts = NumParts + NumLeftover;
-
- // Insert the new phis in the result block first.
- for (int I = 0; I != TotalNumParts; ++I) {
- LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
- Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
- NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
- .addDef(PartDstReg));
- if (I < NumParts)
- DstRegs.push_back(PartDstReg);
- else
- LeftoverDstRegs.push_back(PartDstReg);
- }
-
- MachineBasicBlock *MBB = MI.getParent();
- MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
- insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
-
- SmallVector<Register, 4> PartRegs, LeftoverRegs;
-
- // Insert code to extract the incoming values in each predecessor block.
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
- PartRegs.clear();
- LeftoverRegs.clear();
-
- Register SrcReg = MI.getOperand(I).getReg();
- MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
- MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
-
- LLT Unused;
- if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
- LeftoverRegs))
- return UnableToLegalize;
-
- // Add the newly created operand splits to the existing instructions. The
- // odd-sized pieces are ordered after the requested NarrowTyArg sized
- // pieces.
- for (int J = 0; J != TotalNumParts; ++J) {
- MachineInstrBuilder MIB = NewInsts[J];
- MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
- MIB.addMBB(&OpMBB);
- }
+ MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
}
MI.eraseFromParent();
@@ -3927,27 +3842,36 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
- if (TypeIdx != 1)
- return UnableToLegalize;
-
const int NumDst = MI.getNumOperands() - 1;
const Register SrcReg = MI.getOperand(NumDst).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(SrcReg);
- // TODO: Create sequence of extracts.
- if (DstTy == NarrowTy)
+ if (TypeIdx != 1 || NarrowTy == DstTy)
return UnableToLegalize;
- LLT GCDTy = getGCDType(SrcTy, NarrowTy);
- if (DstTy == GCDTy) {
- // This would just be a copy of the same unmerge.
- // TODO: Create extracts, pad with undef and create intermediate merges.
+ // Requires compatible types. Otherwise SrcReg should have been defined by
+ // merge-like instruction that would get artifact combined. Most likely
+ // instruction that defines SrcReg has to perform more/fewer elements
+ // legalization compatible with NarrowTy.
+ assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
+ assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+
+ if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
+ (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
return UnableToLegalize;
- }
- auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ // This is most likely DstTy (smaller then register size) packed in SrcTy
+ // (larger then register size) and since unmerge was not combined it will be
+ // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
+ // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
+
+ // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
+ //
+ // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
+ // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
+ // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
const int NumUnmerge = Unmerge->getNumOperands() - 1;
const int PartsPerUnmerge = NumDst / NumUnmerge;
@@ -3964,89 +3888,87 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register Result = MI.getOperand(0).getReg();
- Register Overflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
-
- LLT SrcTy = MRI.getType(LHS);
- if (!SrcTy.isVector())
- return UnableToLegalize;
-
- LLT ElementType = SrcTy.getElementType();
- LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
- const ElementCount NumResult = SrcTy.getElementCount();
- LLT GCDTy = getGCDType(SrcTy, NarrowTy);
-
- // Unmerge the operands to smaller parts of GCD type.
- auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
- auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
-
- const int NumOps = UnmergeLHS->getNumOperands() - 1;
- const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps);
- LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
- LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
-
- // Perform the operation over unmerged parts.
- SmallVector<Register, 8> ResultParts;
- SmallVector<Register, 8> OverflowParts;
- for (int I = 0; I != NumOps; ++I) {
- Register Operand1 = UnmergeLHS->getOperand(I).getReg();
- Register Operand2 = UnmergeRHS->getOperand(I).getReg();
- auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
- {Operand1, Operand2});
- ResultParts.push_back(PartMul->getOperand(0).getReg());
- OverflowParts.push_back(PartMul->getOperand(1).getReg());
- }
-
- LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
- LLT OverflowLCMTy =
- LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy);
-
- // Recombine the pieces to the original result and overflow registers.
- buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
- buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
- MI.eraseFromParent();
- return Legalized;
-}
-
-// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
-// a vector
-//
-// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
-// undef as necessary.
-//
-// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
-// -> <2 x s16>
-//
-// %4:_(s16) = G_IMPLICIT_DEF
-// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
-// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
-// %7:_(<2 x s16>) = G_IMPLICIT_DEF
-// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
-// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
-LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
- LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+ // Requires compatible types. Otherwise user of DstReg did not perform unmerge
+ // that should have been artifact combined. Most likely instruction that uses
+ // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
+ assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
+ assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+ if (NarrowTy == SrcTy)
+ return UnableToLegalize;
- // Break into a common type
- SmallVector<Register, 16> Parts;
- for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
- extractGCDType(Parts, GCDTy, MO.getReg());
+ // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
+ // is for old mir tests. Since the changes to more/fewer elements it should no
+ // longer be possible to generate MIR like this when starting from llvm-ir
+ // because LCMTy approach was replaced with merge/unmerge to vector elements.
+ if (TypeIdx == 1) {
+ assert(SrcTy.isVector() && "Expected vector types");
+ assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+ if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
+ (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
+ return UnableToLegalize;
+ // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
+ //
+ // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
+ // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
+ // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
+ // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
+ // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
+ // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
+
+ SmallVector<Register, 8> Elts;
+ LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
+ for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
+ auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
+ for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
+ Elts.push_back(Unmerge.getReg(j));
+ }
- // Build the requested new merge, padding with undef.
- LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
- TargetOpcode::G_ANYEXT);
+ SmallVector<Register, 8> NarrowTyElts;
+ unsigned NumNarrowTyElts = NarrowTy.getNumElements();
+ unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
+ for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
+ ++i, Offset += NumNarrowTyElts) {
+ ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
+ NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
+ }
- // Pack into the original result register.
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
+ MIRBuilder.buildMerge(DstReg, NarrowTyElts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ assert(TypeIdx == 0 && "Bad type index");
+ if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
+ (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
+ return UnableToLegalize;
+ // This is most likely SrcTy (smaller then register size) packed in DstTy
+ // (larger then register size) and since merge was not combined it will be
+ // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
+ // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
+
+ // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
+ //
+ // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
+ // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
+ // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
+ SmallVector<Register, 8> NarrowTyElts;
+ unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
+ unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
+ unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
+ for (unsigned i = 0; i < NumParts; ++i) {
+ SmallVector<Register, 8> Sources;
+ for (unsigned j = 0; j < NumElts; ++j)
+ Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
+ NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
+ }
+
+ MIRBuilder.buildMerge(DstReg, NarrowTyElts);
MI.eraseFromParent();
return Legalized;
}
@@ -4218,163 +4140,14 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
- LLT NarrowTy) {
- assert(TypeIdx == 0 && "only one type index expected");
-
- const unsigned Opc = MI.getOpcode();
- const int NumDefOps = MI.getNumExplicitDefs();
- const int NumSrcOps = MI.getNumOperands() - NumDefOps;
- const unsigned Flags = MI.getFlags();
- const unsigned NarrowSize = NarrowTy.getSizeInBits();
- const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
-
- assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 "
- "result and 1-3 sources or 2 results and "
- "1-2 sources");
-
- SmallVector<Register, 2> DstRegs;
- for (int I = 0; I < NumDefOps; ++I)
- DstRegs.push_back(MI.getOperand(I).getReg());
-
- // First of all check whether we are narrowing (changing the element type)
- // or reducing the vector elements
- const LLT DstTy = MRI.getType(DstRegs[0]);
- const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
-
- SmallVector<Register, 8> ExtractedRegs[3];
- SmallVector<Register, 8> Parts;
-
- // Break down all the sources into NarrowTy pieces we can operate on. This may
- // involve creating merges to a wider type, padded with undef.
- for (int I = 0; I != NumSrcOps; ++I) {
- Register SrcReg = MI.getOperand(I + NumDefOps).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
- // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
- // For fewerElements, this is a smaller vector with the same element type.
- LLT OpNarrowTy;
- if (IsNarrow) {
- OpNarrowTy = NarrowScalarTy;
-
- // In case of narrowing, we need to cast vectors to scalars for this to
- // work properly
- // FIXME: Can we do without the bitcast here if we're narrowing?
- if (SrcTy.isVector()) {
- SrcTy = LLT::scalar(SrcTy.getSizeInBits());
- SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
- }
- } else {
- auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount()
- : ElementCount::getFixed(1);
- OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType());
- }
-
- LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
-
- // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
- buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
- TargetOpcode::G_ANYEXT);
- }
-
- SmallVector<Register, 8> ResultRegs[2];
-
- // Input operands for each sub-instruction.
- SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register());
-
- int NumParts = ExtractedRegs[0].size();
- const unsigned DstSize = DstTy.getSizeInBits();
- const LLT DstScalarTy = LLT::scalar(DstSize);
-
- // Narrowing needs to use scalar types
- LLT DstLCMTy, NarrowDstTy;
- if (IsNarrow) {
- DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
- NarrowDstTy = NarrowScalarTy;
- } else {
- DstLCMTy = getLCMType(DstTy, NarrowTy);
- NarrowDstTy = NarrowTy;
- }
-
- // We widened the source registers to satisfy merge/unmerge size
- // constraints. We'll have some extra fully undef parts.
- const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
-
- for (int I = 0; I != NumRealParts; ++I) {
- // Emit this instruction on each of the split pieces.
- for (int J = 0; J != NumSrcOps; ++J)
- InputRegs[J] = ExtractedRegs[J][I];
-
- MachineInstrBuilder Inst;
- if (NumDefOps == 1)
- Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
- else
- Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs,
- Flags);
-
- for (int J = 0; J != NumDefOps; ++J)
- ResultRegs[J].push_back(Inst.getReg(J));
- }
-
- // Fill out the widened result with undef instead of creating instructions
- // with undef inputs.
- int NumUndefParts = NumParts - NumRealParts;
- if (NumUndefParts != 0) {
- Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0);
- for (int I = 0; I != NumDefOps; ++I)
- ResultRegs[I].append(NumUndefParts, Undef);
- }
-
- // Extract the possibly padded result. Use a scratch register if we need to do
- // a final bitcast, otherwise use the original result register.
- Register MergeDstReg;
- for (int I = 0; I != NumDefOps; ++I) {
- if (IsNarrow && DstTy.isVector())
- MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
- else
- MergeDstReg = DstRegs[I];
-
- buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]);
-
- // Recast to vector if we narrowed a vector
- if (IsNarrow && DstTy.isVector())
- MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg);
- }
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- int64_t Imm = MI.getOperand(2).getImm();
-
- LLT DstTy = MRI.getType(DstReg);
-
- SmallVector<Register, 8> Parts;
- LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
- LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
-
- for (Register &R : Parts)
- R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
-
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
using namespace TargetOpcode;
+ GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
+ unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
switch (MI.getOpcode()) {
case G_IMPLICIT_DEF:
- return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
case G_TRUNC:
case G_AND:
case G_OR:
@@ -4439,10 +4212,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_SSUBSAT:
case G_UADDSAT:
case G_USUBSAT:
- return reduceOperationWidth(MI, TypeIdx, NarrowTy);
case G_UMULO:
case G_SMULO:
- return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
case G_SHL:
case G_LSHR:
case G_ASHR:
@@ -4454,7 +4225,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_CTTZ_ZERO_UNDEF:
case G_CTPOP:
case G_FCOPYSIGN:
- return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
case G_ZEXT:
case G_SEXT:
case G_ANYEXT:
@@ -4467,14 +4237,16 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_INTTOPTR:
case G_PTRTOINT:
case G_ADDRSPACE_CAST:
- return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
- return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
case G_SELECT:
- return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
+ if (MRI.getType(MI.getOperand(1).getReg()).isVector())
+ return fewerElementsVectorMultiEltType(GMI, NumElts);
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
case G_PHI:
- return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorPhi(GMI, NumElts);
case G_UNMERGE_VALUES:
return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
case G_BUILD_VECTOR:
@@ -4491,7 +4263,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_STORE:
return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
case G_SEXT_INREG:
- return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
GISEL_VECREDUCE_CASES_NONSEQ
return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
case G_SHUFFLE_VECTOR:
@@ -5053,6 +4825,15 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_UADDSAT:
+ case TargetOpcode::G_USUBSAT:
+ case TargetOpcode::G_SADDSAT:
+ case TargetOpcode::G_SSUBSAT:
case TargetOpcode::G_SMIN:
case TargetOpcode::G_SMAX:
case TargetOpcode::G_UMIN:
@@ -5070,6 +4851,17 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FSHR:
+ case TargetOpcode::G_FSHL: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorSrc(MI, MoreTy, 3);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
case TargetOpcode::G_EXTRACT:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -5079,6 +4871,11 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
case TargetOpcode::G_INSERT:
case TargetOpcode::G_FREEZE:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_BSWAP:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_SEXT_INREG:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -5098,30 +4895,34 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_UNMERGE_VALUES: {
- if (TypeIdx != 1)
- return UnableToLegalize;
-
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- int NumDst = MI.getNumOperands() - 1;
- moreElementsVectorSrc(MI, MoreTy, NumDst);
-
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
- for (int I = 0; I != NumDst; ++I)
- MIB.addDef(MI.getOperand(I).getReg());
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return UnableToLegalize;
+ case TargetOpcode::G_PHI:
+ return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_BUILD_VECTOR: {
+ SmallVector<SrcOp, 8> Elts;
+ for (auto Op : MI.uses()) {
+ Elts.push_back(Op.getReg());
+ }
- int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
- for (int I = NumDst; I != NewNumDst; ++I)
- MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+ for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
+ Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
+ }
- MIB.addUse(MI.getOperand(NumDst).getReg());
+ MIRBuilder.buildDeleteTrailingVectorElements(
+ MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_PHI:
- return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
- case TargetOpcode::G_SHUFFLE_VECTOR:
- return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_TRUNC: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
default:
return UnableToLegalize;
}
@@ -6778,6 +6579,24 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
LLT VecTy = MRI.getType(SrcVec);
LLT EltTy = VecTy.getElementType();
+ unsigned NumElts = VecTy.getNumElements();
+
+ int64_t IdxVal;
+ if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
+ SmallVector<Register, 8> SrcRegs;
+ extractParts(SrcVec, EltTy, NumElts, SrcRegs);
+
+ if (InsertVal) {
+ SrcRegs[IdxVal] = MI.getOperand(2).getReg();
+ MIRBuilder.buildMerge(DstReg, SrcRegs);
+ } else {
+ MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
if (!EltTy.isByteSized()) { // Not implemented.
LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
return UnableToLegalize;
@@ -6796,7 +6615,6 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
// if the index is out of bounds.
Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
- int64_t IdxVal;
if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
int64_t Offset = IdxVal * EltBytes;
PtrInfo = PtrInfo.getWithOffset(Offset);
@@ -6923,6 +6741,32 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
+ // Extract sub-vector or one element
+ if (SrcTy.isVector()) {
+ unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
+ (Offset + DstSize <= SrcTy.getSizeInBits())) {
+ // Unmerge and allow access to each Src element for the artifact combiner.
+ auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src);
+
+ // Take element(s) we need to extract and copy it (merge them).
+ SmallVector<Register, 8> SubVectorElts;
+ for (unsigned Idx = Offset / SrcEltSize;
+ Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
+ SubVectorElts.push_back(Unmerge.getReg(Idx));
+ }
+ if (SubVectorElts.size() == 1)
+ MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
+ else
+ MIRBuilder.buildMerge(Dst, SubVectorElts);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+
if (DstTy.isScalar() &&
(SrcTy.isScalar() ||
(SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
@@ -6956,6 +6800,45 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
LLT DstTy = MRI.getType(Src);
LLT InsertTy = MRI.getType(InsertSrc);
+ // Insert sub-vector or one element
+ if (DstTy.isVector() && !InsertTy.isPointer()) {
+ LLT EltTy = DstTy.getElementType();
+ unsigned EltSize = EltTy.getSizeInBits();
+ unsigned InsertSize = InsertTy.getSizeInBits();
+
+ if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
+ (Offset + InsertSize <= DstTy.getSizeInBits())) {
+ auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
+ SmallVector<Register, 8> DstElts;
+ unsigned Idx = 0;
+ // Elements from Src before insert start Offset
+ for (; Idx < Offset / EltSize; ++Idx) {
+ DstElts.push_back(UnmergeSrc.getReg(Idx));
+ }
+
+ // Replace elements in Src with elements from InsertSrc
+ if (InsertTy.getSizeInBits() > EltSize) {
+ auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
+ for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
+ ++Idx, ++i) {
+ DstElts.push_back(UnmergeInsertSrc.getReg(i));
+ }
+ } else {
+ DstElts.push_back(InsertSrc);
+ ++Idx;
+ }
+
+ // Remaining elements from Src after insert
+ for (; Idx < DstTy.getNumElements(); ++Idx) {
+ DstElts.push_back(UnmergeSrc.getReg(Idx));
+ }
+
+ MIRBuilder.buildMerge(Dst, DstElts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+
if (InsertTy.isVector() ||
(DstTy.isVector() && DstTy.getElementType() != InsertTy))
return UnableToLegalize;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 03dda806cb1e..de8dbd456901 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -554,12 +554,11 @@ bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
bool Changed = false;
// Walk through the block bottom-up, looking for merging candidates.
StoreMergeCandidate Candidate;
- for (auto II = MBB.rbegin(), IE = MBB.rend(); II != IE; ++II) {
- MachineInstr &MI = *II;
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
if (InstsToErase.contains(&MI))
continue;
- if (auto StoreMI = dyn_cast<GStore>(&*II)) {
+ if (auto *StoreMI = dyn_cast<GStore>(&MI)) {
// We have a G_STORE. Add it to the candidate if it writes to an adjacent
// address.
if (!addStoreToCandidate(*StoreMI, Candidate)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index fb5ed35c1f72..391251886fbb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -215,6 +215,48 @@ MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res,
return buildPtrMask(Res, Op0, MaskReg);
}
+MachineInstrBuilder
+MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res,
+ const SrcOp &Op0) {
+ LLT ResTy = Res.getLLTTy(*getMRI());
+ LLT Op0Ty = Op0.getLLTTy(*getMRI());
+
+ assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
+ "Op0 has more elements");
+
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+ SmallVector<Register, 8> Regs;
+ for (auto Op : Unmerge.getInstr()->defs())
+ Regs.push_back(Op.getReg());
+ Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0);
+ unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size();
+ for (unsigned i = 0; i < NumberOfPadElts; ++i)
+ Regs.push_back(Undef);
+ return buildMerge(Res, Regs);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res,
+ const SrcOp &Op0) {
+ LLT ResTy = Res.getLLTTy(*getMRI());
+ LLT Op0Ty = Op0.getLLTTy(*getMRI());
+
+ assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() < Op0Ty.getNumElements()) &&
+ "Op0 has fewer elements");
+
+ SmallVector<Register, 8> Regs;
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+ for (unsigned i = 0; i < ResTy.getNumElements(); ++i)
+ Regs.push_back(Unmerge.getReg(i));
+ return buildMerge(Res, Regs);
+}
+
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
@@ -613,10 +655,8 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
const SrcOp &Op) {
unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
- SmallVector<Register, 8> TmpVec;
- for (unsigned I = 0; I != NumReg; ++I)
- TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res));
- return buildUnmerge(TmpVec, Op);
+ SmallVector<DstOp, 8> TmpVec(NumReg, Res);
+ return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index b0b84763e922..4981a537dc7c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -923,6 +923,21 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
return LLT::scalar(LCMSize);
}
+LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) {
+ if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy ||
+ (OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits()))
+ return getLCMType(OrigTy, TargetTy);
+
+ unsigned OrigTyNumElts = OrigTy.getNumElements();
+ unsigned TargetTyNumElts = TargetTy.getNumElements();
+ if (OrigTyNumElts % TargetTyNumElts == 0)
+ return OrigTy;
+
+ unsigned NumElts = alignTo(OrigTyNumElts, TargetTyNumElts);
+ return LLT::scalarOrVector(ElementCount::getFixed(NumElts),
+ OrigTy.getElementType());
+}
+
LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
const unsigned OrigSize = OrigTy.getSizeInBits();
const unsigned TargetSize = TargetTy.getSizeInBits();
@@ -1184,25 +1199,6 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
}
-/// These artifacts generally don't have any debug users because they don't
-/// directly originate from IR instructions, but instead usually from
-/// legalization. Avoiding checking for debug users improves compile time.
-/// Note that truncates or extends aren't included because they have IR
-/// counterparts which can have debug users after translation.
-static bool shouldSkipDbgValueFor(MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case TargetOpcode::G_UNMERGE_VALUES:
- case TargetOpcode::G_MERGE_VALUES:
- case TargetOpcode::G_CONCAT_VECTORS:
- case TargetOpcode::G_BUILD_VECTOR:
- case TargetOpcode::G_EXTRACT:
- case TargetOpcode::G_INSERT:
- return true;
- default:
- return false;
- }
-}
-
void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
LostDebugLocObserver *LocObserver,
SmallInstListTy &DeadInstChain) {
@@ -1212,10 +1208,7 @@ void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
}
LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
DeadInstChain.remove(&MI);
- if (shouldSkipDbgValueFor(MI))
- MI.eraseFromParent();
- else
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ MI.eraseFromParent();
if (LocObserver)
LocObserver->checkpoint(false);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 0882ce366c9c..fc97938ccd3e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -242,7 +242,7 @@ bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
(void)IsRegMask;
- assert(!llvm::any_of(MI->operands(), IsRegMask) &&
+ assert(llvm::none_of(MI->operands(), IsRegMask) &&
"Calls were filtered out above!");
auto IsUnordered = [](MachineMemOperand *MMO) { return MMO->isUnordered(); };
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index fc5ac45752ca..c975013db8c8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -686,9 +686,7 @@ void InlineSpiller::reMaterializeAll() {
// Remove any values that were completely rematted.
for (Register Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
- for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
- I != E; ++I) {
- VNInfo *VNI = *I;
+ for (VNInfo *VNI : llvm::make_range(LI.vni_begin(), LI.vni_end())) {
if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
continue;
MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
index a56485cdbc67..3cab9e5734ee 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -56,8 +56,8 @@ void InterferenceCache::init(MachineFunction *mf,
LIUArray = liuarray;
TRI = tri;
reinitPhysRegEntries();
- for (unsigned i = 0; i != CacheEntries; ++i)
- Entries[i].clear(mf, indexes, lis);
+ for (Entry &E : Entries)
+ E.clear(mf, indexes, lis);
}
InterferenceCache::Entry *InterferenceCache::get(MCRegister PhysReg) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index cf62b0e5d7e8..e97dcca201e8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -1249,8 +1249,8 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8};
Optional<ValueIDNum> Result = None;
Optional<LocIdx> SpillLoc = None;
- for (unsigned int I = 0; I < CandidateSizes.size(); ++I) {
- unsigned SpillID = MTracker->getLocID(SpillNo, {CandidateSizes[I], 0});
+ for (unsigned CS : CandidateSizes) {
+ unsigned SpillID = MTracker->getLocID(SpillNo, {CS, 0});
SpillLoc = MTracker->getSpillMLoc(SpillID);
ValueIDNum Val = MTracker->readMLoc(*SpillLoc);
// If this value was defined in it's own position, then it was probably
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index a632d3d9ce76..b4dd41bbb810 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -492,10 +492,10 @@ private:
static VarLoc CreateCopyLoc(const VarLoc &OldVL, const MachineLoc &OldML,
Register NewReg) {
VarLoc VL = OldVL;
- for (size_t I = 0, E = VL.Locs.size(); I < E; ++I)
- if (VL.Locs[I] == OldML) {
- VL.Locs[I].Kind = MachineLocKind::RegisterKind;
- VL.Locs[I].Value.RegNo = NewReg;
+ for (MachineLoc &ML : VL.Locs)
+ if (ML == OldML) {
+ ML.Kind = MachineLocKind::RegisterKind;
+ ML.Value.RegNo = NewReg;
return VL;
}
llvm_unreachable("Should have found OldML in new VarLoc.");
@@ -506,10 +506,10 @@ private:
static VarLoc CreateSpillLoc(const VarLoc &OldVL, const MachineLoc &OldML,
unsigned SpillBase, StackOffset SpillOffset) {
VarLoc VL = OldVL;
- for (int I = 0, E = VL.Locs.size(); I < E; ++I)
- if (VL.Locs[I] == OldML) {
- VL.Locs[I].Kind = MachineLocKind::SpillLocKind;
- VL.Locs[I].Value.SpillLocation = {SpillBase, SpillOffset};
+ for (MachineLoc &ML : VL.Locs)
+ if (ML == OldML) {
+ ML.Kind = MachineLocKind::SpillLocKind;
+ ML.Value.SpillLocation = {SpillBase, SpillOffset};
return VL;
}
llvm_unreachable("Should have found OldML in new VarLoc.");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 5f976bf43c5b..e6661e5135c3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -822,9 +822,6 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// register that hasn't been defined yet. If we do not remove those here, then
// the re-insertion of the DBG_VALUE instruction after register allocation
// will be incorrect.
- // TODO: If earlier passes are corrected to generate sane debug information
- // (and if the machine verifier is improved to catch this), then these checks
- // could be removed or replaced by asserts.
bool Discard = false;
for (const MachineOperand &Op : MI.debug_operands()) {
if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
@@ -1341,8 +1338,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs,
bool DidChange = false;
LocMap::iterator LocMapI;
LocMapI.setMap(locInts);
- for (unsigned i = 0; i != NewRegs.size(); ++i) {
- LiveInterval *LI = &LIS.getInterval(NewRegs[i]);
+ for (Register NewReg : NewRegs) {
+ LiveInterval *LI = &LIS.getInterval(NewReg);
if (LI->empty())
continue;
@@ -1500,8 +1497,8 @@ void LDVImpl::splitRegister(Register OldReg, ArrayRef<Register> NewRegs) {
// Map all of the new virtual registers.
UserValue *UV = lookupVirtReg(OldReg);
- for (unsigned i = 0; i != NewRegs.size(); ++i)
- mapVirtReg(NewRegs[i], UV);
+ for (Register NewReg : NewRegs)
+ mapVirtReg(NewReg, UV);
}
void LiveDebugVariables::
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
index 07dd3a83866f..9998ce9e8dad 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -56,6 +56,11 @@ private:
bool runOnMachineFunction(MachineFunction &) override;
void releaseMemory() override;
void getAnalysisUsage(AnalysisUsage &) const override;
+
+ MachineFunctionProperties getSetProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 6380c4bfd6e6..05768140cbdf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -133,6 +133,22 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
if (OVNI != li.getVNInfoAt(UseIdx))
return false;
+
+ // Check that subrange is live at UseIdx.
+ if (MO.getSubReg()) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ LaneBitmask LM = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ for (LiveInterval::SubRange &SR : li.subranges()) {
+ if ((SR.LaneMask & LM).none())
+ continue;
+ if (!SR.liveAt(UseIdx))
+ return false;
+ // Early exit if all used lanes are checked. No need to continue.
+ LM &= ~SR.LaneMask;
+ if (LM.none())
+ break;
+ }
+ }
}
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
index e8744797707b..94bdfab5e5e0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -141,8 +141,8 @@ void LiveVariables::HandleVirtRegUse(Register Reg, MachineBasicBlock *MBB,
}
#ifndef NDEBUG
- for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
- assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+ for (MachineInstr *Kill : VRInfo.Kills)
+ assert(Kill->getParent() != MBB && "entry should be at end!");
#endif
// This situation can occur:
@@ -534,8 +534,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
MachineBasicBlock *MBB = MI.getParent();
// Process all uses.
- for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
- unsigned MOReg = UseRegs[i];
+ for (unsigned MOReg : UseRegs) {
if (Register::isVirtualRegister(MOReg))
HandleVirtRegUse(MOReg, MBB, MI);
else if (!MRI->isReserved(MOReg))
@@ -543,12 +542,11 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
}
// Process all masked registers. (Call clobbers).
- for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
- HandleRegMask(MI.getOperand(RegMasks[i]));
+ for (unsigned Mask : RegMasks)
+ HandleRegMask(MI.getOperand(Mask));
// Process all defs.
- for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
- unsigned MOReg = DefRegs[i];
+ for (unsigned MOReg : DefRegs) {
if (Register::isVirtualRegister(MOReg))
HandleVirtRegDef(MOReg, MI);
else if (!MRI->isReserved(MOReg))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index ee2387d1e8e6..37fd3e4853ac 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -210,7 +210,11 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
StackObjSet SmallArrayObjs;
StackObjSet AddrOfObjs;
- AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown, MaxAlign);
+ // Only place the stack protector in the local stack area if the target
+ // allows it.
+ if (TFI.isStackIdSafeForLocalArea(MFI.getStackID(StackProtectorFI)))
+ AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown,
+ MaxAlign);
// Assign large stack objects first.
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 6221b5929301..d0323eaf3d78 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -350,18 +350,33 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
bool HasPHI = false;
bool HasInlineAsm = false;
+ bool AllTiedOpsRewritten = true, HasTiedOps = false;
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
if (MI.isPHI())
HasPHI = true;
if (MI.isInlineAsm())
HasInlineAsm = true;
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ unsigned DefIdx;
+ if (MO.isUse() && MI.isRegTiedToDefOperand(I, &DefIdx)) {
+ HasTiedOps = true;
+ if (MO.getReg() != MI.getOperand(DefIdx).getReg())
+ AllTiedOpsRewritten = false;
+ }
+ }
}
}
if (!HasPHI)
Properties.set(MachineFunctionProperties::Property::NoPHIs);
MF.setHasInlineAsm(HasInlineAsm);
+ if (HasTiedOps && AllTiedOpsRewritten)
+ Properties.set(MachineFunctionProperties::Property::TiedOpsRewritten);
+
if (isSSA(MF))
Properties.set(MachineFunctionProperties::Property::IsSSA);
else
@@ -457,6 +472,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (YamlMF.FailsVerification)
MF.getProperties().set(
MachineFunctionProperties::Property::FailsVerification);
+ if (YamlMF.TracksDebugUserValues)
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target);
if (parseRegisterInfo(PFS, YamlMF))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
index f1369396e37f..dc72f83ad0e4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -219,6 +219,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
MachineFunctionProperties::Property::FailedISel);
YamlMF.FailsVerification = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailsVerification);
+ YamlMF.TracksDebugUserValues = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
MachineModuleSlotTracker MST(&MF);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 23c511aaa056..8c9d00d08c6a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -193,7 +193,7 @@ void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList,
void ilist_traits<MachineInstr>::deleteNode(MachineInstr *MI) {
assert(!MI->getParent() && "MI is still in a block!");
- Parent->getParent()->DeleteMachineInstr(MI);
+ Parent->getParent()->deleteMachineInstr(MI);
}
MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
@@ -1038,16 +1038,15 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
// Collect a list of virtual registers killed by the terminators.
SmallVector<Register, 4> KilledRegs;
if (LV)
- for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
- I != E; ++I) {
- MachineInstr *MI = &*I;
- for (MachineOperand &MO : MI->operands()) {
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end())) {
+ for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse() || !MO.isKill() ||
MO.isUndef())
continue;
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg) ||
- LV->getVarInfo(Reg).removeKill(*MI)) {
+ LV->getVarInfo(Reg).removeKill(MI)) {
KilledRegs.push_back(Reg);
LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI);
MO.setIsKill(false);
@@ -1057,11 +1056,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
SmallVector<Register, 4> UsedRegs;
if (LIS) {
- for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
- I != E; ++I) {
- MachineInstr *MI = &*I;
-
- for (const MachineOperand &MO : MI->operands()) {
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end())) {
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.getReg() == 0)
continue;
@@ -1078,9 +1075,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
// SlotIndexes.
SmallVector<MachineInstr*, 4> Terminators;
if (Indexes) {
- for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
- I != E; ++I)
- Terminators.push_back(&*I);
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end()))
+ Terminators.push_back(&MI);
}
// Since we replaced all uses of Succ with NMBB, that should also be treated
@@ -1091,9 +1088,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
if (Indexes) {
SmallVector<MachineInstr*, 4> NewTerminators;
- for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
- I != E; ++I)
- NewTerminators.push_back(&*I);
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end()))
+ NewTerminators.push_back(&MI);
for (MachineInstr *Terminator : Terminators) {
if (!is_contained(NewTerminators, Terminator))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 8a1b4031642d..692587cd58fa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -61,6 +61,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/CodeLayout.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -193,6 +194,11 @@ static cl::opt<unsigned> TriangleChainCount(
cl::init(2),
cl::Hidden);
+static cl::opt<bool> EnableExtTspBlockPlacement(
+ "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
+ cl::desc("Enable machine block placement based on the ext-tsp model, "
+ "optimizing I-cache utilization."));
+
namespace llvm {
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
@@ -557,6 +563,15 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// but a local analysis would not find them.
void precomputeTriangleChains();
+ /// Apply a post-processing step optimizing block placement.
+ void applyExtTsp();
+
+ /// Modify the existing block placement in the function and adjust all jumps.
+ void assignBlockOrder(const std::vector<const MachineBasicBlock *> &NewOrder);
+
+ /// Create a single CFG chain from the current block order.
+ void createCFGChainExtTsp();
+
public:
static char ID; // Pass identification, replacement for typeid
@@ -3387,6 +3402,15 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // Apply a post-processing optimizing block placement.
+ if (MF.size() >= 3 && EnableExtTspBlockPlacement) {
+ // Find a new placement and modify the layout of the blocks in the function.
+ applyExtTsp();
+
+ // Re-create CFG chain so that we can optimizeBranches and alignBlocks.
+ createCFGChainExtTsp();
+ }
+
optimizeBranches();
alignBlocks();
@@ -3413,12 +3437,147 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
MBFI->view("MBP." + MF.getName(), false);
}
-
// We always return true as we have no way to track whether the final order
// differs from the original order.
return true;
}
+void MachineBlockPlacement::applyExtTsp() {
+ // Prepare data; blocks are indexed by their index in the current ordering.
+ DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;
+ BlockIndex.reserve(F->size());
+ std::vector<const MachineBasicBlock *> CurrentBlockOrder;
+ CurrentBlockOrder.reserve(F->size());
+ size_t NumBlocks = 0;
+ for (const MachineBasicBlock &MBB : *F) {
+ BlockIndex[&MBB] = NumBlocks++;
+ CurrentBlockOrder.push_back(&MBB);
+ }
+
+ auto BlockSizes = std::vector<uint64_t>(F->size());
+ auto BlockCounts = std::vector<uint64_t>(F->size());
+ DenseMap<std::pair<uint64_t, uint64_t>, uint64_t> JumpCounts;
+ for (MachineBasicBlock &MBB : *F) {
+ // Getting the block frequency.
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+ BlockCounts[BlockIndex[&MBB]] = BlockFreq.getFrequency();
+ // Getting the block size:
+ // - approximate the size of an instruction by 4 bytes, and
+ // - ignore debug instructions.
+ // Note: getting the exact size of each block is target-dependent and can be
+ // done by extending the interface of MCCodeEmitter. Experimentally we do
+ // not see a perf improvement with the exact block sizes.
+ auto NonDbgInsts =
+ instructionsWithoutDebug(MBB.instr_begin(), MBB.instr_end());
+ int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
+ BlockSizes[BlockIndex[&MBB]] = 4 * NumInsts;
+ // Getting jump frequencies.
+ for (MachineBasicBlock *Succ : MBB.successors()) {
+ auto EP = MBPI->getEdgeProbability(&MBB, Succ);
+ BlockFrequency EdgeFreq = BlockFreq * EP;
+ auto Edge = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]);
+ JumpCounts[Edge] = EdgeFreq.getFrequency();
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Applying ext-tsp layout for |V| = " << F->size()
+ << " with profile = " << F->getFunction().hasProfileData()
+ << " (" << F->getName().str() << ")"
+ << "\n");
+ LLVM_DEBUG(
+ dbgs() << format(" original layout score: %0.2f\n",
+ calcExtTspScore(BlockSizes, BlockCounts, JumpCounts)));
+
+ // Run the layout algorithm.
+ auto NewOrder = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
+ std::vector<const MachineBasicBlock *> NewBlockOrder;
+ NewBlockOrder.reserve(F->size());
+ for (uint64_t Node : NewOrder) {
+ NewBlockOrder.push_back(CurrentBlockOrder[Node]);
+ }
+ LLVM_DEBUG(dbgs() << format(" optimized layout score: %0.2f\n",
+ calcExtTspScore(NewOrder, BlockSizes, BlockCounts,
+ JumpCounts)));
+
+ // Assign new block order.
+ assignBlockOrder(NewBlockOrder);
+}
+
+void MachineBlockPlacement::assignBlockOrder(
+ const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
+ assert(F->size() == NewBlockOrder.size() && "Incorrect size of block order");
+ F->RenumberBlocks();
+
+ bool HasChanges = false;
+ for (size_t I = 0; I < NewBlockOrder.size(); I++) {
+ if (NewBlockOrder[I] != F->getBlockNumbered(I)) {
+ HasChanges = true;
+ break;
+ }
+ }
+ // Stop early if the new block order is identical to the existing one.
+ if (!HasChanges)
+ return;
+
+ SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(F->getNumBlockIDs());
+ for (auto &MBB : *F) {
+ PrevFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
+ }
+
+ // Sort basic blocks in the function according to the computed order.
+ DenseMap<const MachineBasicBlock *, size_t> NewIndex;
+ for (const MachineBasicBlock *MBB : NewBlockOrder) {
+ NewIndex[MBB] = NewIndex.size();
+ }
+ F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {
+ return NewIndex[&L] < NewIndex[&R];
+ });
+
+ // Update basic block branches by inserting explicit fallthrough branches
+ // when required and re-optimize branches when possible.
+ const TargetInstrInfo *TII = F->getSubtarget().getInstrInfo();
+ SmallVector<MachineOperand, 4> Cond;
+ for (auto &MBB : *F) {
+ MachineFunction::iterator NextMBB = std::next(MBB.getIterator());
+ MachineFunction::iterator EndIt = MBB.getParent()->end();
+ auto *FTMBB = PrevFallThroughs[MBB.getNumber()];
+ // If this block had a fallthrough before we need an explicit unconditional
+ // branch to that block if the fallthrough block is not adjacent to the
+ // block in the new order.
+ if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
+ TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc());
+ }
+
+ // It might be possible to optimize branches by flipping the condition.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ if (TII->analyzeBranch(MBB, TBB, FBB, Cond))
+ continue;
+ MBB.updateTerminator(FTMBB);
+ }
+
+#ifndef NDEBUG
+ // Make sure we correctly constructed all branches.
+ F->verify(this, "After optimized block reordering");
+#endif
+}
+
+void MachineBlockPlacement::createCFGChainExtTsp() {
+ BlockToChain.clear();
+ ComputedEdges.clear();
+ ChainAllocator.DestroyAll();
+
+ MachineBasicBlock *HeadBB = &F->front();
+ BlockChain *FunctionChain =
+ new (ChainAllocator.Allocate()) BlockChain(BlockToChain, HeadBB);
+
+ for (MachineBasicBlock &MBB : *F) {
+ if (HeadBB == &MBB)
+ continue; // Ignore head of the chain
+ FunctionChain->merge(&MBB, nullptr);
+ }
+}
+
namespace {
/// A pass to compute block placement statistics.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
index e2b6cfe55c16..72ab9ee4f388 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -485,7 +485,7 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
for (auto *InstrPtr : DelInstrs) {
- InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+ InstrPtr->eraseFromParent();
// Erase all LiveRegs defined by the removed instruction
for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
if (I->MI == InstrPtr)
@@ -693,7 +693,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// use for them.
MachineFunction *MF = MBB->getParent();
for (auto *InstrPtr : InsInstrs)
- MF->DeleteMachineInstr(InstrPtr);
+ MF->deleteMachineInstr(InstrPtr);
}
InstrIdxForVirtReg.clear();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 7c83bacd80d9..57fbe4112e47 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -847,31 +847,27 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName()
<< "\n");
- for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
- I != E;) {
- MachineInstr *MI = &*I;
- ++I;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
// Ignore non-trivial COPYs.
- if (MI->isCopy() && MI->getNumOperands() == 2 &&
- !TRI->regsOverlap(MI->getOperand(0).getReg(),
- MI->getOperand(1).getReg())) {
+ if (MI.isCopy() && MI.getNumOperands() == 2 &&
+ !TRI->regsOverlap(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg())) {
- MCRegister Def = MI->getOperand(0).getReg().asMCReg();
- MCRegister Src = MI->getOperand(1).getReg().asMCReg();
+ MCRegister Def = MI.getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI.getOperand(1).getReg().asMCReg();
// Unlike forward cp, we don't invoke propagateDefs here,
// just let forward cp do COPY-to-COPY propagation.
- if (isBackwardPropagatableCopy(*MI, *MRI)) {
+ if (isBackwardPropagatableCopy(MI, *MRI)) {
Tracker.invalidateRegister(Src, *TRI);
Tracker.invalidateRegister(Def, *TRI);
- Tracker.trackCopy(MI, *TRI);
+ Tracker.trackCopy(&MI, *TRI);
continue;
}
}
// Invalidate any earlyclobber regs first.
- for (const MachineOperand &MO : MI->operands())
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isEarlyClobber()) {
MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
@@ -879,8 +875,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
Tracker.invalidateRegister(Reg, *TRI);
}
- propagateDefs(*MI);
- for (const MachineOperand &MO : MI->operands()) {
+ propagateDefs(MI);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
@@ -898,7 +894,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
for (MCRegUnitIterator RUI(MO.getReg().asMCReg(), TRI); RUI.isValid();
++RUI) {
if (auto *Copy = Tracker.findCopyDefViaUnit(*RUI, *TRI)) {
- CopyDbgUsers[Copy].insert(MI);
+ CopyDbgUsers[Copy].insert(&MI);
}
}
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
new file mode 100644
index 000000000000..42a5e2b7af01
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
@@ -0,0 +1,113 @@
+//===- MachineCycleAnalysis.cpp - Compute CycleInfo for Machine IR --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
+#include "llvm/ADT/GenericCycleImpl.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+template class llvm::GenericCycleInfo<llvm::MachineSSAContext>;
+template class llvm::GenericCycle<llvm::MachineSSAContext>;
+
+namespace {
+
+/// Legacy analysis pass which computes a \ref MachineCycleInfo.
+class MachineCycleInfoWrapperPass : public MachineFunctionPass {
+ MachineFunction *F = nullptr;
+ MachineCycleInfo CI;
+
+public:
+ static char ID;
+
+ MachineCycleInfoWrapperPass();
+
+ MachineCycleInfo &getCycleInfo() { return CI; }
+ const MachineCycleInfo &getCycleInfo() const { return CI; }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override;
+ void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+ // TODO: verify analysis
+};
+
+class MachineCycleInfoPrinterPass : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineCycleInfoPrinterPass();
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+} // namespace
+
+char MachineCycleInfoWrapperPass::ID = 0;
+
+MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineCycleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(MachineCycleInfoWrapperPass, "machine-cycles",
+ "Machine Cycle Info Analysis", true, true)
+INITIALIZE_PASS_END(MachineCycleInfoWrapperPass, "machine-cycles",
+ "Machine Cycle Info Analysis", true, true)
+
+void MachineCycleInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineCycleInfoWrapperPass::runOnMachineFunction(MachineFunction &Func) {
+ CI.clear();
+
+ F = &Func;
+ CI.compute(Func);
+ return false;
+}
+
+void MachineCycleInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
+ OS << "MachineCycleInfo for function: " << F->getName() << "\n";
+ CI.print(OS);
+}
+
+void MachineCycleInfoWrapperPass::releaseMemory() {
+ CI.clear();
+ F = nullptr;
+}
+
+char MachineCycleInfoPrinterPass::ID = 0;
+
+MachineCycleInfoPrinterPass::MachineCycleInfoPrinterPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineCycleInfoPrinterPassPass(*PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(MachineCycleInfoPrinterPass, "print-machine-cycles",
+ "Print Machine Cycle Info Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
+INITIALIZE_PASS_END(MachineCycleInfoPrinterPass, "print-machine-cycles",
+ "Print Machine Cycle Info Analysis", true, true)
+
+void MachineCycleInfoPrinterPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineCycleInfoWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) {
+ auto &CI = getAnalysis<MachineCycleInfoWrapperPass>();
+ CI.print(errs());
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 310c2721c3bd..81ed3d0e93ff 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -89,6 +89,7 @@ static cl::opt<unsigned> AlignAllFunctions(
static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
using P = MachineFunctionProperties::Property;
+ // clang-format off
switch(Prop) {
case P::FailedISel: return "FailedISel";
case P::IsSSA: return "IsSSA";
@@ -100,7 +101,9 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
case P::TracksLiveness: return "TracksLiveness";
case P::TiedOpsRewritten: return "TiedOpsRewritten";
case P::FailsVerification: return "FailsVerification";
+ case P::TracksDebugUserValues: return "TracksDebugUserValues";
}
+ // clang-format on
llvm_unreachable("Invalid machine function property");
}
@@ -125,7 +128,7 @@ void MachineFunctionProperties::print(raw_ostream &OS) const {
MachineFunctionInfo::~MachineFunctionInfo() = default;
void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
- MBB->getParent()->DeleteMachineBasicBlock(MBB);
+ MBB->getParent()->deleteMachineBasicBlock(MBB);
}
static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
@@ -347,10 +350,10 @@ void MachineFunction::assignBeginEndSections() {
/// Allocate a new MachineInstr. Use this instead of `new MachineInstr'.
MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
- const DebugLoc &DL,
+ DebugLoc DL,
bool NoImplicit) {
return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
- MachineInstr(*this, MCID, DL, NoImplicit);
+ MachineInstr(*this, MCID, std::move(DL), NoImplicit);
}
/// Create a new MachineInstr which is a copy of the 'Orig' instruction,
@@ -361,8 +364,9 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
MachineInstr(*this, *Orig);
}
-MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) {
+MachineInstr &MachineFunction::cloneMachineInstrBundle(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ const MachineInstr &Orig) {
MachineInstr *FirstClone = nullptr;
MachineBasicBlock::const_instr_iterator I = Orig.getIterator();
while (true) {
@@ -390,8 +394,7 @@ MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB,
///
/// This function also serves as the MachineInstr destructor - the real
/// ~MachineInstr() destructor must be empty.
-void
-MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+void MachineFunction::deleteMachineInstr(MachineInstr *MI) {
// Verify that a call site info is at valid state. This assertion should
// be triggered during the implementation of support for the
// call site info of a new architecture. If the assertion is triggered,
@@ -418,8 +421,7 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
}
/// Delete the given MachineBasicBlock.
-void
-MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+void MachineFunction::deleteMachineBasicBlock(MachineBasicBlock *MBB) {
assert(MBB->getParent() == this && "MBB parent mismatch!");
// Clean up any references to MBB in jump tables before deleting it.
if (JumpTableInfo)
@@ -769,8 +771,8 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
void MachineFunction::addCatchTypeInfo(MachineBasicBlock *LandingPad,
ArrayRef<const GlobalValue *> TyInfo) {
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- for (unsigned N = TyInfo.size(); N; --N)
- LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+ for (const GlobalValue *GV : llvm::reverse(TyInfo))
+ LP.TypeIds.push_back(getTypeIDFor(GV));
}
void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad,
@@ -1404,10 +1406,10 @@ MachineConstantPool::~MachineConstantPool() {
// A constant may be a member of both Constants and MachineCPVsSharingEntries,
// so keep track of which we've deleted to avoid double deletions.
DenseSet<MachineConstantPoolValue*> Deleted;
- for (unsigned i = 0, e = Constants.size(); i != e; ++i)
- if (Constants[i].isMachineConstantPoolEntry()) {
- Deleted.insert(Constants[i].Val.MachineCPVal);
- delete Constants[i].Val.MachineCPVal;
+ for (const MachineConstantPoolEntry &C : Constants)
+ if (C.isMachineConstantPoolEntry()) {
+ Deleted.insert(C.Val.MachineCPVal);
+ delete C.Val.MachineCPVal;
}
for (MachineConstantPoolValue *CPV : MachineCPVsSharingEntries) {
if (Deleted.count(CPV) == 0)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index aaa80432d2f2..85b266afceef 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -115,10 +115,10 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
/// implicit operands. It reserves space for the number of operands specified by
/// the MCInstrDesc.
-MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
- DebugLoc dl, bool NoImp)
- : MCID(&tid), debugLoc(std::move(dl)), DebugInstrNum(0) {
- assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID,
+ DebugLoc DL, bool NoImp)
+ : MCID(&TID), DbgLoc(std::move(DL)), DebugInstrNum(0) {
+ assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
if (unsigned NumOps = MCID->getNumOperands() +
@@ -135,9 +135,9 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
/// Does not copy the number from debug instruction numbering, to preserve
/// uniqueness.
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()),
+ : MCID(&MI.getDesc()), Info(MI.Info), DbgLoc(MI.getDebugLoc()),
DebugInstrNum(0) {
- assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+ assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
Operands = MF.allocateOperandArray(CapOperands);
@@ -682,26 +682,6 @@ void MachineInstr::eraseFromParent() {
getParent()->erase(this);
}
-void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() {
- assert(getParent() && "Not embedded in a basic block!");
- MachineBasicBlock *MBB = getParent();
- MachineFunction *MF = MBB->getParent();
- assert(MF && "Not embedded in a function!");
-
- MachineInstr *MI = (MachineInstr *)this;
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
- Register Reg = MO.getReg();
- if (!Reg.isVirtual())
- continue;
- MRI.markUsesInDebugValueAsUndef(Reg);
- }
- MI->eraseFromParent();
-}
-
void MachineInstr::eraseFromBundle() {
assert(getParent() && "Not embedded in a basic block!");
getParent()->erase_instr(this);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index 8d6459a627fa..762395542b40 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -649,7 +649,7 @@ void SwingSchedulerDAG::schedule() {
/// Clean up after the software pipeliner runs.
void SwingSchedulerDAG::finishBlock() {
for (auto &KV : NewMIs)
- MF.DeleteMachineInstr(KV.second);
+ MF.deleteMachineInstr(KV.second);
NewMIs.clear();
// Call the superclass.
@@ -1101,17 +1101,15 @@ unsigned SwingSchedulerDAG::calculateResMII() {
// Sort the instructions by the number of available choices for scheduling,
// least to most. Use the number of critical resources as the tie breaker.
FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
- for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
- E = MBB->getFirstTerminator();
- I != E; ++I)
- FUS.calcCriticalResources(*I);
+ for (MachineInstr &MI :
+ llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator()))
+ FUS.calcCriticalResources(MI);
PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
FuncUnitOrder(FUS);
- for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
- E = MBB->getFirstTerminator();
- I != E; ++I)
- FuncUnitOrder.push(&*I);
+ for (MachineInstr &MI :
+ llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator()))
+ FuncUnitOrder.push(&MI);
while (!FuncUnitOrder.empty()) {
MachineInstr *MI = FuncUnitOrder.top();
@@ -1192,14 +1190,10 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
/// but we do this to find the circuits, and then change them back.
static void swapAntiDependences(std::vector<SUnit> &SUnits) {
SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- SUnit *SU = &SUnits[i];
- for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
- IP != EP; ++IP) {
- if (IP->getKind() != SDep::Anti)
- continue;
- DepsAdded.push_back(std::make_pair(SU, *IP));
- }
+ for (SUnit &SU : SUnits) {
+ for (SDep &Pred : SU.Preds)
+ if (Pred.getKind() == SDep::Anti)
+ DepsAdded.push_back(std::make_pair(&SU, Pred));
}
for (std::pair<SUnit *, SDep> &P : DepsAdded) {
// Remove this anti dependency and add one in the reverse direction.
@@ -1471,27 +1465,23 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
}
// Compute ALAP, ZeroLatencyHeight, and MOV.
- for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),
- E = Topo.rend();
- I != E; ++I) {
+ for (int I : llvm::reverse(Topo)) {
int alap = maxASAP;
int zeroLatencyHeight = 0;
- SUnit *SU = &SUnits[*I];
- for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
- ES = SU->Succs.end();
- IS != ES; ++IS) {
- SUnit *succ = IS->getSUnit();
- if (IS->getLatency() == 0)
+ SUnit *SU = &SUnits[I];
+ for (const SDep &S : SU->Succs) {
+ SUnit *succ = S.getSUnit();
+ if (S.getLatency() == 0)
zeroLatencyHeight =
std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
- if (ignoreDependence(*IS, true))
+ if (ignoreDependence(S, true))
continue;
- alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
- getDistance(SU, succ, *IS) * MII));
+ alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() +
+ getDistance(SU, succ, S) * MII));
}
- ScheduleInfo[*I].ALAP = alap;
- ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
+ ScheduleInfo[I].ALAP = alap;
+ ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;
}
// After computing the node functions, compute the summary for each node set.
@@ -1548,9 +1538,8 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder,
SmallSetVector<SUnit *, 8> &Succs,
const NodeSet *S = nullptr) {
Succs.clear();
- for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
- I != E; ++I) {
- for (SDep &Succ : (*I)->Succs) {
+ for (const SUnit *SU : NodeOrder) {
+ for (const SDep &Succ : SU->Succs) {
if (S && S->count(Succ.getSUnit()) == 0)
continue;
if (ignoreDependence(Succ, false))
@@ -1558,7 +1547,7 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder,
if (NodeOrder.count(Succ.getSUnit()) == 0)
Succs.insert(Succ.getSUnit());
}
- for (SDep &Pred : (*I)->Preds) {
+ for (const SDep &Pred : SU->Preds) {
if (Pred.getKind() != SDep::Anti)
continue;
if (S && S->count(Pred.getSUnit()) == 0)
@@ -2202,7 +2191,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
MachineInstr *NewMI = MF.CloneMachineInstr(MI);
NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
- MF.DeleteMachineInstr(NewMI);
+ MF.deleteMachineInstr(NewMI);
if (!Disjoint)
return false;
@@ -2885,10 +2874,8 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
++stage) {
std::deque<SUnit *> &cycleInstrs =
ScheduledInstrs[cycle + (stage * InitiationInterval)];
- for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
- E = cycleInstrs.rend();
- I != E; ++I)
- ScheduledInstrs[cycle].push_front(*I);
+ for (SUnit *SU : llvm::reverse(cycleInstrs))
+ ScheduledInstrs[cycle].push_front(SU);
}
}
@@ -2899,10 +2886,8 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
// Change the registers in instruction as specified in the InstrChanges
// map. We need to use the new registers to create the correct order.
- for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
- SUnit *SU = &SSD->SUnits[i];
- SSD->applyInstrChange(SU->getInstr(), *this);
- }
+ for (const SUnit &SU : SSD->SUnits)
+ SSD->applyInstrChange(SU.getInstr(), *this);
// Reorder the instructions in each cycle to fix and improve the
// generated code.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
new file mode 100644
index 000000000000..8db893535daf
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -0,0 +1,52 @@
+//===- MachineSSAContext.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a specialization of the GenericSSAContext<X>
+/// template class for Machine IR.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+MachineBasicBlock *MachineSSAContext::getEntryBlock(MachineFunction &F) {
+ return &F.front();
+}
+
+void MachineSSAContext::setFunction(MachineFunction &Fn) {
+ MF = &Fn;
+ RegInfo = &MF->getRegInfo();
+}
+
+Printable MachineSSAContext::print(MachineBasicBlock *Block) const {
+ return Printable([Block](raw_ostream &Out) { Block->printName(Out); });
+}
+
+Printable MachineSSAContext::print(MachineInstr *I) const {
+ return Printable([I](raw_ostream &Out) { I->print(Out); });
+}
+
+Printable MachineSSAContext::print(Register Value) const {
+ auto *MRI = RegInfo;
+ return Printable([MRI, Value](raw_ostream &Out) {
+ Out << printReg(Value, MRI->getTargetRegisterInfo(), 0, MRI);
+
+ if (Value) {
+ // Try to print the definition.
+ if (auto *Instr = MRI->getUniqueVRegDef(Value)) {
+ Out << ": ";
+ Instr->print(Out);
+ }
+ }
+ });
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 930677e4fd7d..48076663ddf5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -126,7 +126,9 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode,
}
/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
-/// is live in the middle of the specified block.
+/// is live in the middle of the specified block. If ExistingValueOnly is
+/// true then this will only return an existing value or $noreg; otherwise new
+/// instructions may be inserted to materialize a value.
///
/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
/// important case: if there is a definition of the rewritten value after the
@@ -143,14 +145,18 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode,
/// their respective blocks. However, the use of X happens in the *middle* of
/// a block. Because of this, we need to insert a new PHI node in SomeBB to
/// merge the appropriate values, and this value isn't live out of the block.
-Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB,
+ bool ExistingValueOnly) {
// If there is no definition of the renamed variable in this block, just use
// GetValueAtEndOfBlock to do our work.
if (!HasValueForBlock(BB))
- return GetValueAtEndOfBlockInternal(BB);
+ return GetValueAtEndOfBlockInternal(BB, ExistingValueOnly);
// If there are no predecessors, just return undef.
if (BB->pred_empty()) {
+ // If we cannot insert new instructions, just return $noreg.
+ if (ExistingValueOnly)
+ return Register();
// Insert an implicit_def to represent an undef value.
MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
BB, BB->getFirstTerminator(),
@@ -165,7 +171,7 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
bool isFirstPred = true;
for (MachineBasicBlock *PredBB : BB->predecessors()) {
- Register PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ Register PredVal = GetValueAtEndOfBlockInternal(PredBB, ExistingValueOnly);
PredValues.push_back(std::make_pair(PredBB, PredVal));
// Compute SingularValue.
@@ -185,6 +191,10 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
if (DupPHI)
return DupPHI;
+ // If we cannot create new instructions, return $noreg now.
+ if (ExistingValueOnly)
+ return Register();
+
// Otherwise, we do need a PHI: insert one now.
MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
@@ -350,10 +360,13 @@ public:
/// for the specified BB and if so, return it. If not, construct SSA form by
/// first calculating the required placement of PHIs and then inserting new
/// PHIs where needed.
-Register MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+Register
+MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB,
+ bool ExistingValueOnly) {
AvailableValsTy &AvailableVals = getAvailableVals(AV);
- if (Register V = AvailableVals[BB])
- return V;
+ Register ExistingVal = AvailableVals.lookup(BB);
+ if (ExistingVal || ExistingValueOnly)
+ return ExistingVal;
SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
return Impl.GetValue(BB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index 47d40f0823c8..b043d4c1b0c1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -90,12 +90,17 @@ cl::opt<bool> VerifyScheduling(
"verify-misched", cl::Hidden,
cl::desc("Verify machine instrs before and after machine scheduling"));
+#ifndef NDEBUG
+cl::opt<bool> ViewMISchedDAGs(
+ "view-misched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show MISched dags after they are processed"));
+#else
+const bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
} // end namespace llvm
#ifndef NDEBUG
-static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
- cl::desc("Pop up a window to show MISched dags after they are processed"));
-
/// In some situations a few uninteresting nodes depend on nearly all other
/// nodes in the graph, provide a cutoff to hide them.
static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
@@ -111,7 +116,6 @@ static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
cl::desc("Print schedule DAGs"));
#else
-static const bool ViewMISchedDAGs = false;
static const bool PrintDAGs = false;
#endif // NDEBUG
@@ -561,11 +565,10 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
MBBRegionsVector MBBRegions;
getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
- for (MBBRegionsVector::iterator R = MBBRegions.begin();
- R != MBBRegions.end(); ++R) {
- MachineBasicBlock::iterator I = R->RegionBegin;
- MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
- unsigned NumRegionInstrs = R->NumRegionInstrs;
+ for (const SchedRegion &R : MBBRegions) {
+ MachineBasicBlock::iterator I = R.RegionBegin;
+ MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
+ unsigned NumRegionInstrs = R.NumRegionInstrs;
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 8df23b781ffd..0a5ff276fedc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -80,9 +80,9 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
void MachineTraceMetrics::releaseMemory() {
MF = nullptr;
BlockInfo.clear();
- for (unsigned i = 0; i != TS_NumStrategies; ++i) {
- delete Ensembles[i];
- Ensembles[i] = nullptr;
+ for (Ensemble *&E : Ensembles) {
+ delete E;
+ E = nullptr;
}
}
@@ -398,9 +398,9 @@ void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
<< '\n');
BlockInfo[MBB->getNumber()].invalidate();
- for (unsigned i = 0; i != TS_NumStrategies; ++i)
- if (Ensembles[i])
- Ensembles[i]->invalidate(MBB);
+ for (Ensemble *E : Ensembles)
+ if (E)
+ E->invalidate(MBB);
}
void MachineTraceMetrics::verifyAnalysis() const {
@@ -408,9 +408,9 @@ void MachineTraceMetrics::verifyAnalysis() const {
return;
#ifndef NDEBUG
assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
- for (unsigned i = 0; i != TS_NumStrategies; ++i)
- if (Ensembles[i])
- Ensembles[i]->verify();
+ for (Ensemble *E : Ensembles)
+ if (E)
+ E->verify();
#endif
}
@@ -984,8 +984,7 @@ addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
const MachineBasicBlock *DefMBB = DefMI->getParent();
// Reg is live-in to all blocks in Trace that follow DefMBB.
- for (unsigned i = Trace.size(); i; --i) {
- const MachineBasicBlock *MBB = Trace[i-1];
+ for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
if (MBB == DefMBB)
return;
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
@@ -1204,8 +1203,8 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
for (unsigned K = 0; K != PRDepths.size(); ++K)
PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
} else {
- for (unsigned K = 0; K != PRDepths.size(); ++K)
- PRMax = std::max(PRMax, PRDepths[K]);
+ for (unsigned PRD : PRDepths)
+ PRMax = std::max(PRMax, PRD);
}
// Convert to cycle count.
PRMax = TE.MTM.getCycles(PRMax);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index 32078db76cf3..005d4ad1a328 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -101,6 +101,7 @@ namespace {
// Avoid querying the MachineFunctionProperties for each operand.
bool isFunctionRegBankSelected;
bool isFunctionSelected;
+ bool isFunctionTracksDebugUserValues;
using RegVector = SmallVector<Register, 16>;
using RegMaskVector = SmallVector<const uint32_t *, 4>;
@@ -384,6 +385,8 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) {
MachineFunctionProperties::Property::RegBankSelected);
isFunctionSelected = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Selected);
+ isFunctionTracksDebugUserValues = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
LiveVars = nullptr;
LiveInts = nullptr;
@@ -1605,12 +1608,16 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
break;
}
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR:
case TargetOpcode::G_ROTR:
case TargetOpcode::G_ROTL: {
LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg());
LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg());
if (Src1Ty.isVector() != Src2Ty.isVector()) {
- report("Rotate requires operands to be either all scalars or all vectors",
+ report("Shifts and rotates require operands to be either all scalars or "
+ "all vectors",
MI);
break;
}
@@ -1980,41 +1987,50 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MO->isUndef())
report("Generic virtual register use cannot be undef", MO, MONum);
- // If we're post-Select, we can't have gvregs anymore.
- if (isFunctionSelected) {
- report("Generic virtual register invalid in a Selected function",
- MO, MONum);
- return;
- }
+ // Debug value instruction is permitted to use undefined vregs.
+ // This is a performance measure to skip the overhead of immediately
+ // pruning unused debug operands. The final undef substitution occurs
+ // when debug values are allocated in LDVImpl::handleDebugValue, so
+ // these verifications always apply after this pass.
+ if (isFunctionTracksDebugUserValues || !MO->isUse() ||
+ !MI->isDebugValue() || !MRI->def_empty(Reg)) {
+ // If we're post-Select, we can't have gvregs anymore.
+ if (isFunctionSelected) {
+ report("Generic virtual register invalid in a Selected function",
+ MO, MONum);
+ return;
+ }
- // The gvreg must have a type and it must not have a SubIdx.
- LLT Ty = MRI->getType(Reg);
- if (!Ty.isValid()) {
- report("Generic virtual register must have a valid type", MO,
- MONum);
- return;
- }
+ // The gvreg must have a type and it must not have a SubIdx.
+ LLT Ty = MRI->getType(Reg);
+ if (!Ty.isValid()) {
+ report("Generic virtual register must have a valid type", MO,
+ MONum);
+ return;
+ }
- const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
+ const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
- // If we're post-RegBankSelect, the gvreg must have a bank.
- if (!RegBank && isFunctionRegBankSelected) {
- report("Generic virtual register must have a bank in a "
- "RegBankSelected function",
- MO, MONum);
- return;
- }
+ // If we're post-RegBankSelect, the gvreg must have a bank.
+ if (!RegBank && isFunctionRegBankSelected) {
+ report("Generic virtual register must have a bank in a "
+ "RegBankSelected function",
+ MO, MONum);
+ return;
+ }
- // Make sure the register fits into its register bank if any.
- if (RegBank && Ty.isValid() &&
- RegBank->getSize() < Ty.getSizeInBits()) {
- report("Register bank is too small for virtual register", MO,
- MONum);
- errs() << "Register bank " << RegBank->getName() << " too small("
- << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
- << "-bits\n";
- return;
+ // Make sure the register fits into its register bank if any.
+ if (RegBank && Ty.isValid() &&
+ RegBank->getSize() < Ty.getSizeInBits()) {
+ report("Register bank is too small for virtual register", MO,
+ MONum);
+ errs() << "Register bank " << RegBank->getName() << " too small("
+ << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
+ << "-bits\n";
+ return;
+ }
}
+
if (SubIdx) {
report("Generic virtual register does not allow subregister index", MO,
MONum);
@@ -2217,8 +2233,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (LiveInts && Reg.isVirtual()) {
if (LiveInts->hasInterval(Reg)) {
LI = &LiveInts->getInterval(Reg);
- if (SubRegIdx != 0 && !LI->empty() && !LI->hasSubRanges() &&
- MRI->shouldTrackSubRegLiveness(Reg))
+ if (SubRegIdx != 0 && (MO->isDef() || !MO->isUndef()) && !LI->empty() &&
+ !LI->hasSubRanges() && MRI->shouldTrackSubRegLiveness(Reg))
report("Live interval for subreg operand has no subranges", MO, MONum);
} else {
report("Virtual register has no live interval", MO, MONum);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
index 77a6c37e1362..7693ab417de9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
@@ -213,7 +213,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
for (auto &I : LoweredPHIs) {
if (LIS)
LIS->RemoveMachineInstrFromMaps(*I.first);
- MF.DeleteMachineInstr(I.first);
+ MF.deleteMachineInstr(I.first);
}
// TODO: we should use the incremental DomTree updater here.
@@ -626,7 +626,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (reusedIncoming || !IncomingReg) {
if (LIS)
LIS->RemoveMachineInstrFromMaps(*MPhi);
- MF.DeleteMachineInstr(MPhi);
+ MF.deleteMachineInstr(MPhi);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
index b85f00a61eac..d7cd0a583cee 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -252,8 +252,8 @@ void SchedulePostRATDList::exitRegion() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dumpSchedule - dump the scheduled Sequence.
LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
+ for (const SUnit *SU : Sequence) {
+ if (SU)
dumpNode(*SU);
else
dbgs() << "**** NOOP ****\n";
@@ -531,11 +531,11 @@ void SchedulePostRATDList::ListScheduleTopDown() {
ReleaseSuccessors(&EntrySU);
// Add all leaves to Available queue.
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ for (SUnit &SUnit : SUnits) {
// It is available if it has no predecessors.
- if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) {
- AvailableQueue.push(&SUnits[i]);
- SUnits[i].isAvailable = true;
+ if (!SUnit.NumPredsLeft && !SUnit.isAvailable) {
+ AvailableQueue.push(&SUnit);
+ SUnit.isAvailable = true;
}
}
@@ -657,10 +657,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
#ifndef NDEBUG
unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
- unsigned Noops = 0;
- for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
- if (!Sequence[i])
- ++Noops;
+ unsigned Noops = llvm::count(Sequence, nullptr);
assert(Sequence.size() - Noops == ScheduledNodes &&
"The number of nodes scheduled doesn't match the expected number!");
#endif // NDEBUG
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 29a88480fd9f..8d8a6126dad0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -953,12 +953,22 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// LocalStackSlotPass didn't already allocate a slot for it.
// If we are told to use the LocalStackAllocationBlock, the stack protector
// is expected to be already pre-allocated.
- if (!MFI.getUseLocalStackAllocationBlock())
+ if (MFI.getStackID(StackProtectorFI) != TargetStackID::Default) {
+ // If the stack protector isn't on the default stack then it's up to the
+ // target to set the stack offset.
+ assert(MFI.getObjectOffset(StackProtectorFI) != 0 &&
+ "Offset of stack protector on non-default stack expected to be "
+ "already set.");
+ assert(!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()) &&
+ "Stack protector on non-default stack expected to not be "
+ "pre-allocated by LocalStackSlotPass.");
+ } else if (!MFI.getUseLocalStackAllocationBlock()) {
AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
Skew);
- else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()))
+ } else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex())) {
llvm_unreachable(
"Stack protector not pre-allocated by LocalStackSlotPass.");
+ }
// Assign large stack objects first.
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
index f605068e076d..882f8e91bf1d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
@@ -1500,8 +1500,8 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
// Erase from MaxRefs all elements in the closure.
auto Begin = MaxRefs.begin();
- for (unsigned i = ClosureIdx.size(); i != 0; --i)
- MaxRefs.erase(Begin + ClosureIdx[i-1]);
+ for (unsigned Idx : llvm::reverse(ClosureIdx))
+ MaxRefs.erase(Begin + Idx);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
new file mode 100644
index 000000000000..9f1012c95964
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -0,0 +1,121 @@
+//===- RegAllocEvictionAdvisor.cpp - eviction advisor ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the default eviction advisor and of the Analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocEvictionAdvisor.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<RegAllocEvictionAdvisorAnalysis::AdvisorMode> Mode(
+ "regalloc-enable-advisor", cl::Hidden,
+ cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default),
+ cl::desc("Enable regalloc advisor mode"),
+ cl::values(
+ clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default,
+ "default", "Default"),
+ clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release,
+ "release", "precompiled"),
+ clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development,
+ "development", "for training")));
+
+static cl::opt<bool> EnableLocalReassignment(
+ "enable-local-reassign", cl::Hidden,
+ cl::desc("Local reassignment can yield better allocation decisions, but "
+ "may be compile time intensive"),
+ cl::init(false));
+
+#define DEBUG_TYPE "regalloc"
+
+char RegAllocEvictionAdvisorAnalysis::ID = 0;
+INITIALIZE_PASS(RegAllocEvictionAdvisorAnalysis, "regalloc-evict",
+ "Regalloc eviction policy", false, true)
+
+namespace {
+class DefaultEvictionAdvisorAnalysis final
+ : public RegAllocEvictionAdvisorAnalysis {
+public:
+ DefaultEvictionAdvisorAnalysis(bool NotAsRequested)
+ : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Default),
+ NotAsRequested(NotAsRequested) {}
+
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Default;
+ }
+
+private:
+ std::unique_ptr<RegAllocEvictionAdvisor>
+ getAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
+ LiveIntervals *LIS, VirtRegMap *VRM,
+ const RegisterClassInfo &RegClassInfo,
+ ExtraRegInfo *ExtraInfo) override {
+ return std::make_unique<DefaultEvictionAdvisor>(MF, Matrix, LIS, VRM,
+ RegClassInfo, ExtraInfo);
+ }
+ bool doInitialization(Module &M) override {
+ if (NotAsRequested)
+ M.getContext().emitError("Requested regalloc eviction advisor analysis "
+ "could be created. Using default");
+ return RegAllocEvictionAdvisorAnalysis::doInitialization(M);
+ }
+ const bool NotAsRequested;
+};
+} // namespace
+
+template <> Pass *llvm::callDefaultCtor<RegAllocEvictionAdvisorAnalysis>() {
+ Pass *Ret = nullptr;
+ switch (Mode) {
+ case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default:
+ Ret = new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ false);
+ break;
+ case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development:
+ // TODO(mtrofin): add implementation
+ break;
+ case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release:
+ // TODO(mtrofin): add implementation
+ break;
+ }
+ if (Ret)
+ return Ret;
+ return new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ true);
+}
+
+StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const {
+ switch (getAdvisorMode()) {
+ case AdvisorMode::Default:
+ return "Default Regalloc Eviction Advisor";
+ case AdvisorMode::Release:
+ return "Release mode Regalloc Eviction Advisor";
+ case AdvisorMode::Development:
+ return "Development mode Regalloc Eviction Advisor";
+ }
+ llvm_unreachable("Unknown advisor kind");
+}
+
+RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(
+ const MachineFunction &MF, LiveRegMatrix *Matrix, LiveIntervals *LIS,
+ VirtRegMap *VRM, const RegisterClassInfo &RegClassInfo,
+ ExtraRegInfo *ExtraInfo)
+ : MF(MF), Matrix(Matrix), LIS(LIS), VRM(VRM), MRI(&VRM->getRegInfo()),
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RegClassInfo),
+ RegCosts(TRI->getRegisterCosts(MF)), ExtraInfo(ExtraInfo),
+ EnableLocalReassign(EnableLocalReassignment ||
+ MF.getSubtarget().enableRALocalReassignment(
+ MF.getTarget().getOptLevel())) {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index 85fd3207888b..debb75ed5020 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Pass.h"
namespace llvm {
@@ -85,6 +86,215 @@ struct EvictionCost {
std::tie(O.BrokenHints, O.MaxWeight);
}
};
+
+/// Track allocation stage and eviction loop prevention during allocation.
+// TODO(mtrofin): Consider exposing RAGreedy in a header instead, and folding
+// this back into it.
+class ExtraRegInfo final {
+ // RegInfo - Keep additional information about each live range.
+ struct RegInfo {
+ LiveRangeStage Stage = RS_New;
+
+ // Cascade - Eviction loop prevention. See
+ // canEvictInterferenceBasedOnCost().
+ unsigned Cascade = 0;
+
+ RegInfo() = default;
+ };
+
+ IndexedMap<RegInfo, VirtReg2IndexFunctor> Info;
+ unsigned NextCascade = 1;
+
+public:
+ ExtraRegInfo() = default;
+ ExtraRegInfo(const ExtraRegInfo &) = delete;
+
+ LiveRangeStage getStage(Register Reg) const { return Info[Reg].Stage; }
+
+ LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+ return getStage(VirtReg.reg());
+ }
+
+ void setStage(Register Reg, LiveRangeStage Stage) {
+ Info.grow(Reg.id());
+ Info[Reg].Stage = Stage;
+ }
+
+ void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ setStage(VirtReg.reg(), Stage);
+ }
+
+ /// Return the current stage of the register, if present, otherwise initialize
+ /// it and return that.
+ LiveRangeStage getOrInitStage(Register Reg) {
+ Info.grow(Reg.id());
+ return getStage(Reg);
+ }
+
+ unsigned getCascade(Register Reg) const { return Info[Reg].Cascade; }
+
+ void setCascade(Register Reg, unsigned Cascade) {
+ Info.grow(Reg.id());
+ Info[Reg].Cascade = Cascade;
+ }
+
+ unsigned getOrAssignNewCascade(Register Reg) {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade) {
+ Cascade = NextCascade++;
+ setCascade(Reg, Cascade);
+ }
+ return Cascade;
+ }
+
+ unsigned getCascadeOrCurrentNext(Register Reg) const {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade)
+ Cascade = NextCascade;
+ return Cascade;
+ }
+
+ template <typename Iterator>
+ void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+ for (; Begin != End; ++Begin) {
+ Register Reg = *Begin;
+ Info.grow(Reg.id());
+ if (Info[Reg].Stage == RS_New)
+ Info[Reg].Stage = NewStage;
+ }
+ }
+ void LRE_DidCloneVirtReg(Register New, Register Old);
+};
+
+/// Interface to the eviction advisor, which is responsible for making a
+/// decision as to which live ranges should be evicted (if any).
+class RegAllocEvictionAdvisor {
+public:
+ RegAllocEvictionAdvisor(const RegAllocEvictionAdvisor &) = delete;
+ RegAllocEvictionAdvisor(RegAllocEvictionAdvisor &&) = delete;
+ virtual ~RegAllocEvictionAdvisor() = default;
+
+ /// Find a physical register that can be freed by evicting the FixedRegisters,
+ /// or return NoRegister. The eviction decision is assumed to be correct (i.e.
+ /// no fixed live ranges are evicted) and profitable.
+ virtual MCRegister
+ tryFindEvictionCandidate(LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const = 0;
+
+ /// Find out if we can evict the live ranges occupying the given PhysReg,
+ /// which is a hint (preferred register) for VirtReg.
+ virtual bool
+ canEvictHintInterference(LiveInterval &VirtReg, MCRegister PhysReg,
+ const SmallVirtRegSet &FixedRegisters) const = 0;
+
+ /// Returns true if the given \p PhysReg is a callee saved register and has
+ /// not been used for allocation yet.
+ bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
+
+protected:
+ RegAllocEvictionAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
+ LiveIntervals *LIS, VirtRegMap *VRM,
+ const RegisterClassInfo &RegClassInfo,
+ ExtraRegInfo *ExtraInfo);
+
+ Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
+
+ const MachineFunction &MF;
+ LiveRegMatrix *const Matrix;
+ LiveIntervals *const LIS;
+ VirtRegMap *const VRM;
+ MachineRegisterInfo *const MRI;
+ const TargetRegisterInfo *const TRI;
+ const RegisterClassInfo &RegClassInfo;
+ const ArrayRef<uint8_t> RegCosts;
+ ExtraRegInfo *const ExtraInfo;
+
+ /// Run or not the local reassignment heuristic. This information is
+ /// obtained from the TargetSubtargetInfo.
+ const bool EnableLocalReassign;
+
+private:
+ unsigned NextCascade = 1;
+};
+
+/// ImmutableAnalysis abstraction for fetching the Eviction Advisor. We model it
+/// as an analysis to decouple the user from the implementation insofar as
+/// dependencies on other analyses goes. The motivation for it being an
+/// immutable pass is twofold:
+/// - in the ML implementation case, the evaluator is stateless but (especially
+/// in the development mode) expensive to set up. With an immutable pass, we set
+/// it up once.
+/// - in the 'development' mode ML case, we want to capture the training log
+/// during allocation (this is a log of features encountered and decisions
+/// made), and then measure a score, potentially a few steps after allocation
+/// completes. So we need the properties of an immutable pass to keep the logger
+/// state around until we can make that measurement.
+///
+/// Because we need to offer additional services in 'development' mode, the
+/// implementations of this analysis need to implement RTTI support.
+class RegAllocEvictionAdvisorAnalysis : public ImmutablePass {
+public:
+ enum class AdvisorMode : int { Default, Release, Development };
+
+ RegAllocEvictionAdvisorAnalysis(AdvisorMode Mode)
+ : ImmutablePass(ID), Mode(Mode){};
+ static char ID;
+
+ /// Get an advisor for the given context (i.e. machine function, etc)
+ virtual std::unique_ptr<RegAllocEvictionAdvisor>
+ getAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
+ LiveIntervals *LIS, VirtRegMap *VRM,
+ const RegisterClassInfo &RegClassInfo,
+ ExtraRegInfo *ExtraInfo) = 0;
+ AdvisorMode getAdvisorMode() const { return Mode; }
+
+private:
+ // This analysis preserves everything, and subclasses may have additional
+ // requirements.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ StringRef getPassName() const override;
+ const AdvisorMode Mode;
+};
+
+/// Specialization for the API used by the analysis infrastructure to create
+/// an instance of the eviction advisor.
+template <> Pass *callDefaultCtor<RegAllocEvictionAdvisorAnalysis>();
+
+// TODO(mtrofin): implement these.
+#ifdef LLVM_HAVE_TF_AOT
+RegAllocEvictionAdvisorAnalysis *createReleaseModeAdvisor();
+#endif
+
+#ifdef LLVM_HAVE_TF_API
+RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor();
+#endif
+
+// TODO: move to RegAllocEvictionAdvisor.cpp when we move implementation
+// out of RegAllocGreedy.cpp
+class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor {
+public:
+ DefaultEvictionAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
+ LiveIntervals *LIS, VirtRegMap *VRM,
+ const RegisterClassInfo &RegClassInfo,
+ ExtraRegInfo *ExtraInfo)
+ : RegAllocEvictionAdvisor(MF, Matrix, LIS, VRM, RegClassInfo, ExtraInfo) {
+ }
+
+private:
+ MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &,
+ uint8_t,
+ const SmallVirtRegSet &) const override;
+ bool canEvictHintInterference(LiveInterval &, MCRegister,
+ const SmallVirtRegSet &) const override;
+ bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool,
+ EvictionCost &,
+ const SmallVirtRegSet &) const;
+ bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
+};
} // namespace llvm
#endif // LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 50411c177007..ce3cf31dbd6b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -112,12 +112,6 @@ static cl::opt<bool> ExhaustiveSearch(
"and interference cutoffs of last chance recoloring"),
cl::Hidden);
-static cl::opt<bool> EnableLocalReassignment(
- "enable-local-reassign", cl::Hidden,
- cl::desc("Local reassignment can yield better allocation decisions, but "
- "may be compile time intensive"),
- cl::init(false));
-
static cl::opt<bool> EnableDeferredSpilling(
"enable-deferred-spilling", cl::Hidden,
cl::desc("Instead of spilling a variable right away, defer the actual "
@@ -172,8 +166,9 @@ class RAGreedy : public MachineFunctionPass,
// state
std::unique_ptr<Spiller> SpillerInstance;
PQueue Queue;
- unsigned NextCascade;
std::unique_ptr<VirtRegAuxInfo> VRAI;
+ Optional<ExtraRegInfo> ExtraInfo;
+ std::unique_ptr<RegAllocEvictionAdvisor> EvictAdvisor;
// Enum CutOffStage to keep a track whether the register allocation failed
// because of the cutoffs encountered in last chance recoloring.
@@ -195,76 +190,6 @@ class RAGreedy : public MachineFunctionPass,
static const char *const StageName[];
#endif
- // RegInfo - Keep additional information about each live range.
- struct RegInfo {
- LiveRangeStage Stage = RS_New;
-
- // Cascade - Eviction loop prevention. See
- // canEvictInterferenceBasedOnCost().
- unsigned Cascade = 0;
-
- RegInfo() = default;
- };
-
- IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
-
- LiveRangeStage getStage(Register Reg) const {
- return ExtraRegInfo[Reg].Stage;
- }
-
- LiveRangeStage getStage(const LiveInterval &VirtReg) const {
- return getStage(VirtReg.reg());
- }
-
- void setStage(Register Reg, LiveRangeStage Stage) {
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
- ExtraRegInfo[Reg].Stage = Stage;
- }
-
- void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
- setStage(VirtReg.reg(), Stage);
- }
-
- /// Return the current stage of the register, if present, otherwise initialize
- /// it and return that.
- LiveRangeStage getOrInitStage(Register Reg) {
- ExtraRegInfo.grow(Reg);
- return getStage(Reg);
- }
-
- unsigned getCascade(Register Reg) const { return ExtraRegInfo[Reg].Cascade; }
-
- void setCascade(Register Reg, unsigned Cascade) {
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
- ExtraRegInfo[Reg].Cascade = Cascade;
- }
-
- unsigned getOrAssignNewCascade(Register Reg) {
- unsigned Cascade = getCascade(Reg);
- if (!Cascade) {
- Cascade = NextCascade++;
- setCascade(Reg, Cascade);
- }
- return Cascade;
- }
-
- unsigned getCascadeOrCurrentNext(Register Reg) const {
- unsigned Cascade = getCascade(Reg);
- if (!Cascade)
- Cascade = NextCascade;
- return Cascade;
- }
-
- template<typename Iterator>
- void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
- for (;Begin != End; ++Begin) {
- Register Reg = *Begin;
- if (ExtraRegInfo[Reg].Stage == RS_New)
- ExtraRegInfo[Reg].Stage = NewStage;
- }
- }
-
/// EvictionTrack - Keeps track of past evictions in order to optimize region
/// split decision.
class EvictionTrack {
@@ -375,10 +300,6 @@ class RAGreedy : public MachineFunctionPass,
/// Callee-save register cost, calculated once per machine function.
BlockFrequency CSRCost;
- /// Run or not the local reassignment heuristic. This information is
- /// obtained from the TargetSubtargetInfo.
- bool EnableLocalReassign;
-
/// Enable or not the consideration of the cost of local intervals created
/// by a split candidate when choosing the best split candidate.
bool EnableAdvancedRASplitCost;
@@ -447,13 +368,6 @@ private:
bool calcCompactRegion(GlobalSplitCandidate&);
void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
- Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
- bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
- bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool,
- EvictionCost &,
- const SmallVirtRegSet &) const;
- bool canEvictHintInterference(LiveInterval &, MCRegister,
- const SmallVirtRegSet &) const;
bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
MCRegister PhysReg, SlotIndex Start,
SlotIndex End, EvictionCost &MaxCost) const;
@@ -529,8 +443,6 @@ private:
BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister);
void collectHintInfo(Register, HintsInfo &);
- bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
-
/// Greedy RA statistic to remark.
struct RAGreedyStats {
unsigned Reloads = 0;
@@ -597,6 +509,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
INITIALIZE_PASS_END(RAGreedy, "greedy",
"Greedy Register Allocator", false, false)
@@ -663,6 +576,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<EdgeBundles>();
AU.addRequired<SpillPlacement>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -696,22 +610,25 @@ void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
}
void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
+ ExtraInfo->LRE_DidCloneVirtReg(New, Old);
+}
+
+void ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
// Cloning a register we haven't even heard about yet? Just ignore it.
- if (!ExtraRegInfo.inBounds(Old))
+ if (!Info.inBounds(Old))
return;
// LRE may clone a virtual register because dead code elimination causes it to
// be split into connected components. The new components are much smaller
// than the original, so they should get a new chance at being assigned.
// same stage as the parent.
- ExtraRegInfo[Old].Stage = RS_Assign;
- ExtraRegInfo.grow(New);
- ExtraRegInfo[New] = ExtraRegInfo[Old];
+ Info[Old].Stage = RS_Assign;
+ Info.grow(New.id());
+ Info[New] = Info[Old];
}
void RAGreedy::releaseMemory() {
SpillerInstance.reset();
- ExtraRegInfo.clear();
GlobalCand.clear();
}
@@ -725,10 +642,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
assert(Reg.isVirtual() && "Can only enqueue virtual registers");
unsigned Prio;
- auto Stage = getOrInitStage(Reg);
+ auto Stage = ExtraInfo->getOrInitStage(Reg);
if (Stage == RS_New) {
Stage = RS_Assign;
- setStage(Reg, Stage);
+ ExtraInfo->setStage(Reg, Stage);
}
if (Stage == RS_Split) {
// Unsplit ranges that couldn't be allocated immediately are deferred until
@@ -824,7 +741,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
MCRegister PhysHint = Hint.asMCReg();
LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
- if (canEvictHintInterference(VirtReg, PhysHint, FixedRegisters)) {
+ if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
+ FixedRegisters)) {
evictInterference(VirtReg, PhysHint, NewVRegs);
return PhysHint;
}
@@ -850,7 +768,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
// Interference eviction
//===----------------------------------------------------------------------===//
-Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) const {
+Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg,
+ Register PrevReg) const {
auto Order =
AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
MCRegister PhysReg;
@@ -889,9 +808,10 @@ Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) const {
/// register.
/// @param B The live range to be evicted.
/// @param BreaksHint True when B is already assigned to its preferred register.
-bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
- LiveInterval &B, bool BreaksHint) const {
- bool CanSplit = getStage(B) < RS_Spill;
+bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
+ LiveInterval &B,
+ bool BreaksHint) const {
+ bool CanSplit = ExtraInfo->getStage(B) < RS_Spill;
// Be fairly aggressive about following hints as long as the evictee can be
// split.
@@ -907,7 +827,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// canEvictHintInterference - return true if the interference for VirtReg
/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
-bool RAGreedy::canEvictHintInterference(
+bool DefaultEvictionAdvisor::canEvictHintInterference(
LiveInterval &VirtReg, MCRegister PhysReg,
const SmallVirtRegSet &FixedRegisters) const {
EvictionCost MaxCost;
@@ -925,7 +845,7 @@ bool RAGreedy::canEvictHintInterference(
/// @param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterferenceBasedOnCost(
+bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
// It is only possible to evict virtual register interference.
@@ -941,9 +861,7 @@ bool RAGreedy::canEvictInterferenceBasedOnCost(
//
// This works out so a register without a cascade number is allowed to evict
// anything, and it can be evicted by anything.
- unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
- if (!Cascade)
- Cascade = NextCascade;
+ unsigned Cascade = ExtraInfo->getCascadeOrCurrentNext(VirtReg.reg());
EvictionCost Cost;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
@@ -965,7 +883,7 @@ bool RAGreedy::canEvictInterferenceBasedOnCost(
return false;
// Never evict spill products. They cannot split or spill.
- if (getStage(*Intf) == RS_Done)
+ if (ExtraInfo->getStage(*Intf) == RS_Done)
return false;
// Once a live range becomes small enough, it is urgent that we find a
// register for it. This is indicated by an infinite spill weight. These
@@ -980,7 +898,7 @@ bool RAGreedy::canEvictInterferenceBasedOnCost(
RegClassInfo.getNumAllocatableRegs(
MRI->getRegClass(Intf->reg())));
// Only evict older cascades or live ranges without a cascade.
- unsigned IntfCascade = ExtraRegInfo[Intf->reg()].Cascade;
+ unsigned IntfCascade = ExtraInfo->getCascade(Intf->reg());
if (Cascade <= IntfCascade) {
if (!Urgent)
return false;
@@ -1043,7 +961,7 @@ bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
if (!Register::isVirtualRegister(Intf->reg()))
return false;
// Never evict spill products. They cannot split or spill.
- if (getStage(*Intf) == RS_Done)
+ if (ExtraInfo->getStage(*Intf) == RS_Done)
return false;
// Would this break a satisfied hint?
@@ -1106,7 +1024,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
// evicted by a newer cascade, preventing infinite loops.
- unsigned Cascade = getOrAssignNewCascade(VirtReg.reg());
+ unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
@@ -1132,10 +1050,10 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
Matrix->unassign(*Intf);
- assert((getCascade(Intf->reg()) < Cascade ||
+ assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
"Cannot decrease cascade number, illegal eviction");
- setCascade(Intf->reg(), Cascade);
+ ExtraInfo->setCascade(Intf->reg(), Cascade);
++NumEvicted;
NewVRegs.push_back(Intf->reg());
}
@@ -1143,7 +1061,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
/// Returns true if the given \p PhysReg is a callee saved register and has not
/// been used for allocation yet.
-bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
+bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
if (!CSR)
return false;
@@ -1151,7 +1069,7 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
return !Matrix->isPhysRegUsed(PhysReg);
}
-MCRegister RAGreedy::tryFindEvictionCandidate(
+MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
LiveInterval &VirtReg, const AllocationOrder &Order,
uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
// Keep track of the cheapest interference seen so far.
@@ -1225,8 +1143,8 @@ MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
TimePassesIsEnabled);
- MCRegister BestPhys =
- tryFindEvictionCandidate(VirtReg, Order, CostPerUseLimit, FixedRegisters);
+ MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
+ VirtReg, Order, CostPerUseLimit, FixedRegisters);
if (BestPhys.isValid())
evictInterference(VirtReg, BestPhys, NewVRegs);
return BestPhys;
@@ -1769,8 +1687,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// the ActiveBlocks list with each candidate. We need to filter out
// duplicates.
BitVector Todo = SA->getThroughBlocks();
- for (unsigned c = 0; c != UsedCands.size(); ++c) {
- ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
+ for (unsigned UsedCand : UsedCands) {
+ ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
for (unsigned Number : Blocks) {
if (!Todo.test(Number))
continue;
@@ -1817,13 +1735,13 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
// Ignore old intervals from DCE.
- if (getOrInitStage(Reg.reg()) != RS_New)
+ if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
continue;
// Remainder interval. Don't try splitting again, spill if it doesn't
// allocate.
if (IntvMap[I] == 0) {
- setStage(Reg, RS_Spill);
+ ExtraInfo->setStage(Reg, RS_Spill);
continue;
}
@@ -1834,7 +1752,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
<< " blocks as original.\n");
// Don't allow repeated splitting as a safe guard against looping.
- setStage(Reg, RS_Split2);
+ ExtraInfo->setStage(Reg, RS_Split2);
}
continue;
}
@@ -1899,7 +1817,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
unsigned BestCand = NoCand;
for (MCPhysReg PhysReg : Order) {
assert(PhysReg);
- if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg))
+ if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
continue;
// Discard bad candidates before we run out of interference cache cursors.
@@ -2065,8 +1983,8 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// goes straight to spilling, the new local ranges get to stay RS_New.
for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
- if (getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
- setStage(LI, RS_Spill);
+ if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
+ ExtraInfo->setStage(LI, RS_Spill);
}
if (VerifyEnabled)
@@ -2152,7 +2070,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SE->finish(&IntvMap);
DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
// Assign all new registers to RS_Spill. This was the last chance.
- setStage(LREdit.begin(), LREdit.end(), RS_Spill);
+ ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
return 0;
}
@@ -2320,7 +2238,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// These rules allow a 3 -> 2+3 split once, which we need. They also prevent
// excessive splitting and infinite loops.
//
- bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
+ bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
// Best split candidate.
unsigned BestBefore = NumGaps;
@@ -2456,7 +2374,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
assert(!ProgressRequired && "Didn't make progress when it was required.");
for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
if (IntvMap[I] == 1) {
- setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
+ ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
}
LLVM_DEBUG(dbgs() << '\n');
@@ -2477,7 +2395,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
const SmallVirtRegSet &FixedRegisters) {
// Ranges must be Split2 or less.
- if (getStage(VirtReg) >= RS_Spill)
+ if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
return 0;
// Local intervals are handled separately.
@@ -2499,7 +2417,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
// First try to split around a region spanning multiple blocks. RS_Split2
// ranges already made dubious progress with region splitting, so they go
// straight to single block splitting.
- if (getStage(VirtReg) < RS_Split2) {
+ if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
if (PhysReg || !NewVRegs.empty())
return PhysReg;
@@ -2551,7 +2469,7 @@ bool RAGreedy::mayRecolorAllInterferences(
// it would not be recolorable as it is in the same state as VirtReg.
// However, if VirtReg has tied defs and Intf doesn't, then
// there is still a point in examining if it can be recolorable.
- if (((getStage(*Intf) == RS_Done &&
+ if (((ExtraInfo->getStage(*Intf) == RS_Done &&
MRI->getRegClass(Intf->reg()) == CurRC) &&
!(hasTiedDef(MRI, VirtReg.reg()) &&
!hasTiedDef(MRI, Intf->reg()))) ||
@@ -2615,7 +2533,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
// Ranges must be Done.
- assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
+ assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
"Last chance recoloring should really be last chance");
// Set the max depth to LastChanceRecoloringMaxDepth.
// We may want to reconsider that if we end up with a too large search space
@@ -2806,7 +2724,7 @@ MCRegister
RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
MCRegister PhysReg, uint8_t &CostPerUseLimit,
SmallVectorImpl<Register> &NewVRegs) {
- if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
+ if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
// We choose spill over using the CSR for the first time if the spill cost
// is lower than CSRCost.
SA->analyze(&VirtReg);
@@ -2818,7 +2736,7 @@ RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
CostPerUseLimit = 1;
return 0;
}
- if (getStage(VirtReg) < RS_Split) {
+ if (ExtraInfo->getStage(VirtReg) < RS_Split) {
// We choose pre-splitting over using the CSR for the first time if
// the cost of splitting is lower than CSRCost.
SA->analyze(&VirtReg);
@@ -3051,8 +2969,8 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
// register.
- if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
- NewVRegs.empty()) {
+ if (CSRCost.getFrequency() &&
+ EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
CostPerUseLimit, NewVRegs);
if (CSRReg || !NewVRegs.empty())
@@ -3063,9 +2981,9 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return PhysReg;
}
- LiveRangeStage Stage = getStage(VirtReg);
+ LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
- << getCascade(VirtReg.reg()) << '\n');
+ << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
// Try to evict a less worthy live range, but only for ranges from the primary
// queue. The RS_Split ranges already failed to do this, and they should not
@@ -3094,7 +3012,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// Wait until the second time, when all smaller ranges have been allocated.
// This gives a better picture of the interference to split around.
if (Stage < RS_Split) {
- setStage(VirtReg, RS_Split);
+ ExtraInfo->setStage(VirtReg, RS_Split);
LLVM_DEBUG(dbgs() << "wait for second round\n");
NewVRegs.push_back(VirtReg.reg());
return 0;
@@ -3120,12 +3038,12 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// Finally spill VirtReg itself.
if ((EnableDeferredSpilling ||
TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
- getStage(VirtReg) < RS_Memory) {
+ ExtraInfo->getStage(VirtReg) < RS_Memory) {
// TODO: This is experimental and in particular, we do not model
// the live range splitting done by spilling correctly.
// We would need a deep integration with the spiller to do the
// right thing here. Anyway, that is still good for early testing.
- setStage(VirtReg, RS_Memory);
+ ExtraInfo->setStage(VirtReg, RS_Memory);
LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
NewVRegs.push_back(VirtReg.reg());
} else {
@@ -3133,7 +3051,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
TimerGroupDescription, TimePassesIsEnabled);
LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
- setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+ ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
// Tell LiveDebugVariables about the new ranges. Ranges not being covered by
// the new regs are kept in LDV (still mapping to the old register), until
@@ -3316,10 +3234,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
RCI.runOnMachineFunction(mf);
- EnableLocalReassign = EnableLocalReassignment ||
- MF->getSubtarget().enableRALocalReassignment(
- MF->getTarget().getOptLevel());
-
EnableAdvancedRASplitCost =
ConsiderLocalIntervalCost.getNumOccurrences()
? ConsiderLocalIntervalCost
@@ -3354,8 +3268,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
- ExtraRegInfo.clear();
- NextCascade = 1;
+ ExtraInfo.emplace();
+ EvictAdvisor = getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(
+ *MF, Matrix, LIS, VRM, RegClassInfo, &*ExtraInfo);
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
SetOfBrokenHints.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
index b22eb080791e..93be8f689d57 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -623,8 +623,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
// Compute an initial allowed set for the current vreg.
std::vector<MCRegister> VRegAllowed;
ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
- for (unsigned I = 0; I != RawPRegOrder.size(); ++I) {
- MCRegister PReg(RawPRegOrder[I]);
+ for (MCPhysReg R : RawPRegOrder) {
+ MCRegister PReg(R);
if (MRI.isReserved(PReg))
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
new file mode 100644
index 000000000000..740890831617
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
@@ -0,0 +1,124 @@
+//===- RegAllocScore.cpp - evaluate regalloc policy quality ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// Calculate a measure of the register allocation policy quality. This is used
+/// to construct a reward for the training of the ML-driven allocation policy.
+/// Currently, the score is the sum of the machine basic block frequency-weighed
+/// number of loads, stores, copies, and remat instructions, each factored with
+/// a relative weight.
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocScore.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstdint>
+#include <numeric>
+#include <vector>
+
+using namespace llvm;
+cl::opt<double> CopyWeight("regalloc-copy-weight", cl::init(0.2), cl::Hidden);
+cl::opt<double> LoadWeight("regalloc-load-weight", cl::init(4.0), cl::Hidden);
+cl::opt<double> StoreWeight("regalloc-store-weight", cl::init(1.0), cl::Hidden);
+cl::opt<double> CheapRematWeight("regalloc-cheap-remat-weight", cl::init(0.2),
+ cl::Hidden);
+cl::opt<double> ExpensiveRematWeight("regalloc-expensive-remat-weight",
+ cl::init(1.0), cl::Hidden);
+#define DEBUG_TYPE "regalloc-score"
+
+RegAllocScore &RegAllocScore::operator+=(const RegAllocScore &Other) {
+ CopyCounts += Other.copyCounts();
+ LoadCounts += Other.loadCounts();
+ StoreCounts += Other.storeCounts();
+ LoadStoreCounts += Other.loadStoreCounts();
+ CheapRematCounts += Other.cheapRematCounts();
+ ExpensiveRematCounts += Other.expensiveRematCounts();
+ return *this;
+}
+
+bool RegAllocScore::operator==(const RegAllocScore &Other) const {
+ return copyCounts() == Other.copyCounts() &&
+ loadCounts() == Other.loadCounts() &&
+ storeCounts() == Other.storeCounts() &&
+ loadStoreCounts() == Other.loadStoreCounts() &&
+ cheapRematCounts() == Other.cheapRematCounts() &&
+ expensiveRematCounts() == Other.expensiveRematCounts();
+}
+
+bool RegAllocScore::operator!=(const RegAllocScore &Other) const {
+ return !(*this == Other);
+}
+
+double RegAllocScore::getScore() const {
+ double Ret = 0.0;
+ Ret += CopyWeight * copyCounts();
+ Ret += LoadWeight * loadCounts();
+ Ret += StoreWeight * storeCounts();
+ Ret += (LoadWeight + StoreWeight) * loadStoreCounts();
+ Ret += CheapRematWeight * cheapRematCounts();
+ Ret += ExpensiveRematWeight * expensiveRematCounts();
+
+ return Ret;
+}
+
+RegAllocScore
+llvm::calculateRegAllocScore(const MachineFunction &MF,
+ const MachineBlockFrequencyInfo &MBFI,
+ AAResults &AAResults) {
+ return calculateRegAllocScore(
+ MF,
+ [&](const MachineBasicBlock &MBB) {
+ return MBFI.getBlockFreqRelativeToEntryBlock(&MBB);
+ },
+ [&](const MachineInstr &MI) {
+ return MF.getSubtarget().getInstrInfo()->isTriviallyReMaterializable(
+ MI, &AAResults);
+ });
+}
+
+RegAllocScore llvm::calculateRegAllocScore(
+ const MachineFunction &MF,
+ llvm::function_ref<double(const MachineBasicBlock &)> GetBBFreq,
+ llvm::function_ref<bool(const MachineInstr &)>
+ IsTriviallyRematerializable) {
+ RegAllocScore Total;
+
+ for (const MachineBasicBlock &MBB : MF) {
+ double BlockFreqRelativeToEntrypoint = GetBBFreq(MBB);
+ RegAllocScore MBBScore;
+
+ for (const MachineInstr &MI : MBB) {
+ if (MI.isDebugInstr() || MI.isKill() || MI.isInlineAsm()) {
+ continue;
+ }
+ if (MI.isCopy()) {
+ MBBScore.onCopy(BlockFreqRelativeToEntrypoint);
+ } else if (IsTriviallyRematerializable(MI)) {
+ if (MI.getDesc().isAsCheapAsAMove()) {
+ MBBScore.onCheapRemat(BlockFreqRelativeToEntrypoint);
+ } else {
+ MBBScore.onExpensiveRemat(BlockFreqRelativeToEntrypoint);
+ }
+ } else if (MI.mayLoad() && MI.mayStore()) {
+ MBBScore.onLoadStore(BlockFreqRelativeToEntrypoint);
+ } else if (MI.mayLoad()) {
+ MBBScore.onLoad(BlockFreqRelativeToEntrypoint);
+ } else if (MI.mayStore()) {
+ MBBScore.onStore(BlockFreqRelativeToEntrypoint);
+ }
+ }
+ Total += MBBScore;
+ }
+ return Total;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h
new file mode 100644
index 000000000000..3c28bb61189d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h
@@ -0,0 +1,80 @@
+//==- RegAllocScore.h - evaluate regalloc policy quality ----------*-C++-*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// Calculate a measure of the register allocation policy quality. This is used
+/// to construct a reward for the training of the ML-driven allocation policy.
+/// Currently, the score is the sum of the machine basic block frequency-weighed
+/// number of loads, stores, copies, and remat instructions, each factored with
+/// a relative weight.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCSCORE_H_
+#define LLVM_CODEGEN_REGALLOCSCORE_H_
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/Module.h"
+#include <cassert>
+#include <cstdint>
+#include <limits>
+
+namespace llvm {
+
+/// Regalloc score.
+class RegAllocScore final {
+ double CopyCounts = 0.0;
+ double LoadCounts = 0.0;
+ double StoreCounts = 0.0;
+ double CheapRematCounts = 0.0;
+ double LoadStoreCounts = 0.0;
+ double ExpensiveRematCounts = 0.0;
+
+public:
+ RegAllocScore() = default;
+ RegAllocScore(const RegAllocScore &) = default;
+
+ double copyCounts() const { return CopyCounts; }
+ double loadCounts() const { return LoadCounts; }
+ double storeCounts() const { return StoreCounts; }
+ double loadStoreCounts() const { return LoadStoreCounts; }
+ double expensiveRematCounts() const { return ExpensiveRematCounts; }
+ double cheapRematCounts() const { return CheapRematCounts; }
+
+ void onCopy(double Freq) { CopyCounts += Freq; }
+ void onLoad(double Freq) { LoadCounts += Freq; }
+ void onStore(double Freq) { StoreCounts += Freq; }
+ void onLoadStore(double Freq) { LoadStoreCounts += Freq; }
+ void onExpensiveRemat(double Freq) { ExpensiveRematCounts += Freq; }
+ void onCheapRemat(double Freq) { CheapRematCounts += Freq; }
+
+ RegAllocScore &operator+=(const RegAllocScore &Other);
+ bool operator==(const RegAllocScore &Other) const;
+ bool operator!=(const RegAllocScore &Other) const;
+ double getScore() const;
+};
+
+/// Calculate a score. When comparing 2 scores for the same function but
+/// different policies, the better policy would have a smaller score.
+/// The implementation is the overload below (which is also easily unittestable)
+RegAllocScore calculateRegAllocScore(const MachineFunction &MF,
+ const MachineBlockFrequencyInfo &MBFI,
+ AAResults &AAResults);
+
+/// Implementation of the above, which is also more easily unittestable.
+RegAllocScore calculateRegAllocScore(
+ const MachineFunction &MF,
+ llvm::function_ref<double(const MachineBasicBlock &)> GetBBFreq,
+ llvm::function_ref<bool(const MachineInstr &)> IsTriviallyRematerializable);
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_REGALLOCSCORE_H_
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 797899fb5b86..65a65b9cae95 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -109,8 +109,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// FIXME: Once targets reserve registers instead of removing them from the
// allocation order, we can simply use begin/end here.
ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
- for (unsigned i = 0; i != RawOrder.size(); ++i) {
- unsigned PhysReg = RawOrder[i];
+ for (unsigned PhysReg : RawOrder) {
// Remove reserved registers from the allocation order.
if (Reserved.test(PhysReg))
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 4c8534cf2d01..a917b0d27d4a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -4067,13 +4067,13 @@ void RegisterCoalescer::joinAllIntervals() {
// Coalesce intervals in MBB priority order.
unsigned CurrDepth = std::numeric_limits<unsigned>::max();
- for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ for (MBBPriorityInfo &MBB : MBBs) {
// Try coalescing the collected local copies for deeper loops.
- if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
+ if (JoinGlobalCopies && MBB.Depth < CurrDepth) {
coalesceLocals();
- CurrDepth = MBBs[i].Depth;
+ CurrDepth = MBB.Depth;
}
- copyCoalesceInMBB(MBBs[i].MBB);
+ copyCoalesceInMBB(MBB.MBB);
}
lateLiveIntervalUpdate();
coalesceLocals();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
index de6129a912d3..49859aeec78b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
@@ -159,20 +159,17 @@ static bool reduceDbgValsBackwardScan(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 8> DbgValsToBeRemoved;
SmallDenseSet<DebugVariable> VariableSet;
- for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
- I != E; ++I) {
- MachineInstr *MI = &*I;
-
- if (MI->isDebugValue()) {
- DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
- MI->getDebugLoc()->getInlinedAt());
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (MI.isDebugValue()) {
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
auto R = VariableSet.insert(Var);
// If it is a DBG_VALUE describing a constant as:
// DBG_VALUE 0, ...
// we just don't consider such instructions as candidates
// for redundant removal.
- if (MI->isNonListDebugValue()) {
- MachineOperand &Loc = MI->getDebugOperand(0);
+ if (MI.isNonListDebugValue()) {
+ MachineOperand &Loc = MI.getDebugOperand(0);
if (!Loc.isReg()) {
// If we have already encountered this variable, just stop
// tracking it.
@@ -185,7 +182,7 @@ static bool reduceDbgValsBackwardScan(MachineBasicBlock &MBB) {
// We have already encountered the value for this variable,
// so this one can be deleted.
if (!R.second)
- DbgValsToBeRemoved.push_back(MI);
+ DbgValsToBeRemoved.push_back(&MI);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
index 50d9d64bfcfd..3d8a7eecce18 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -521,8 +521,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
StackLayout SSL(StackAlignment);
if (StackGuardSlot) {
Type *Ty = StackGuardSlot->getAllocatedType();
- uint64_t Align =
- std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
+ Align Align = std::max(DL.getPrefTypeAlign(Ty), StackGuardSlot->getAlign());
SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
Align, SSC.getFullLiveRange());
}
@@ -534,8 +533,9 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- uint64_t Align =
- std::max(DL.getPrefTypeAlignment(Ty), Arg->getParamAlignment());
+ Align Align = DL.getPrefTypeAlign(Ty);
+ if (auto A = Arg->getParamAlign())
+ Align = std::max(Align, *A);
SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());
}
@@ -546,24 +546,24 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- uint64_t Align = std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment());
+ Align Align = std::max(DL.getPrefTypeAlign(Ty), AI->getAlign());
SSL.addObject(AI, Size, Align,
ClColoring ? SSC.getLiveRange(AI) : NoColoringRange);
}
SSL.computeLayout();
- uint64_t FrameAlignment = SSL.getFrameAlignment();
+ Align FrameAlignment = SSL.getFrameAlignment();
// FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location
// (AlignmentSkew).
if (FrameAlignment > StackAlignment) {
// Re-align the base pointer according to the max requested alignment.
- assert(isPowerOf2_64(FrameAlignment));
IRB.SetInsertPoint(BasePointer->getNextNode());
BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
- IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
- ConstantInt::get(IntPtrTy, ~uint64_t(FrameAlignment - 1))),
+ IRB.CreateAnd(
+ IRB.CreatePtrToInt(BasePointer, IntPtrTy),
+ ConstantInt::get(IntPtrTy, ~(FrameAlignment.value() - 1))),
StackPtrTy));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
index 7cdda7743c16..602afcfa9001 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -37,7 +37,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {
}
}
-void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment,
+void StackLayout::addObject(const Value *V, unsigned Size, Align Alignment,
const StackLifetime::LiveRange &Range) {
StackObjects.push_back({V, Size, Alignment, Range});
ObjectAlignments[V] = Alignment;
@@ -45,7 +45,7 @@ void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment,
}
static unsigned AdjustStackOffset(unsigned Offset, unsigned Size,
- uint64_t Alignment) {
+ Align Alignment) {
return alignTo(Offset + Size, Alignment) - Size;
}
@@ -62,7 +62,8 @@ void StackLayout::layoutObject(StackObject &Obj) {
}
LLVM_DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align "
- << Obj.Alignment << ", range " << Obj.Range << "\n");
+ << Obj.Alignment.value() << ", range " << Obj.Range
+ << "\n");
assert(Obj.Alignment <= MaxAlignment);
unsigned Start = AdjustStackOffset(0, Obj.Size, Obj.Alignment);
unsigned End = Start + Obj.Size;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
index b72450e57080..4ac7af2059f5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
@@ -22,7 +22,7 @@ namespace safestack {
/// Compute the layout of an unsafe stack frame.
class StackLayout {
- uint64_t MaxAlignment;
+ Align MaxAlignment;
struct StackRegion {
unsigned Start;
@@ -40,14 +40,14 @@ class StackLayout {
struct StackObject {
const Value *Handle;
unsigned Size;
- uint64_t Alignment;
+ Align Alignment;
StackLifetime::LiveRange Range;
};
SmallVector<StackObject, 8> StackObjects;
DenseMap<const Value *, unsigned> ObjectOffsets;
- DenseMap<const Value *, uint64_t> ObjectAlignments;
+ DenseMap<const Value *, Align> ObjectAlignments;
void layoutObject(StackObject &Obj);
@@ -56,7 +56,7 @@ public:
/// Add an object to the stack frame. Value pointer is opaque and used as a
/// handle to retrieve the object's offset in the frame later.
- void addObject(const Value *V, unsigned Size, uint64_t Alignment,
+ void addObject(const Value *V, unsigned Size, Align Alignment,
const StackLifetime::LiveRange &Range);
/// Run the layout computation for all previously added objects.
@@ -66,13 +66,13 @@ public:
unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; }
/// Returns the alignment of the object
- uint64_t getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
+ Align getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
/// Returns the size of the entire frame.
unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; }
/// Returns the alignment of the frame.
- uint64_t getFrameAlignment() { return MaxAlignment; }
+ Align getFrameAlignment() { return MaxAlignment; }
void print(raw_ostream &OS);
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
index ef3afab2b730..696b29018ae6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -618,8 +618,8 @@ std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU,
do {
const SUnit *SU = WorkList.back();
WorkList.pop_back();
- for (int I = SU->Succs.size()-1; I >= 0; --I) {
- const SUnit *Succ = SU->Succs[I].getSUnit();
+ for (const SDep &SD : llvm::reverse(SU->Succs)) {
+ const SUnit *Succ = SD.getSUnit();
unsigned s = Succ->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
if (Succ->isBoundaryNode())
@@ -652,8 +652,8 @@ std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU,
do {
const SUnit *SU = WorkList.back();
WorkList.pop_back();
- for (int I = SU->Preds.size()-1; I >= 0; --I) {
- const SUnit *Pred = SU->Preds[I].getSUnit();
+ for (const SDep &SD : llvm::reverse(SU->Preds)) {
+ const SUnit *Pred = SD.getSUnit();
unsigned s = Pred->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
if (Pred->isBoundaryNode())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index df5a041b87cd..067ad819e0d2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -487,10 +487,7 @@ namespace {
SDValue visitFCEIL(SDNode *N);
SDValue visitFTRUNC(SDNode *N);
SDValue visitFFLOOR(SDNode *N);
- SDValue visitFMINNUM(SDNode *N);
- SDValue visitFMAXNUM(SDNode *N);
- SDValue visitFMINIMUM(SDNode *N);
- SDValue visitFMAXIMUM(SDNode *N);
+ SDValue visitFMinMax(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
@@ -1701,10 +1698,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
case ISD::FFLOOR: return visitFFLOOR(N);
- case ISD::FMINNUM: return visitFMINNUM(N);
- case ISD::FMAXNUM: return visitFMAXNUM(N);
- case ISD::FMINIMUM: return visitFMINIMUM(N);
- case ISD::FMAXIMUM: return visitFMAXIMUM(N);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: return visitFMinMax(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
@@ -2260,6 +2257,21 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
+ // fold (add x, undef) -> undef
+ if (N0.isUndef())
+ return N0;
+ if (N1.isUndef())
+ return N1;
+
+ // fold (add c1, c2) -> c1+c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
@@ -2268,23 +2280,6 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold (add x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
- return N1;
- }
-
- // fold (add x, undef) -> undef
- if (N0.isUndef())
- return N0;
-
- if (N1.isUndef())
- return N1;
-
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
- // canonicalize constant to RHS
- if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
- // fold (add c1, c2) -> c1+c2
- return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
}
// fold (add x, 0) -> x
@@ -2554,6 +2549,19 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
+ // fold (add_sat x, undef) -> -1
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getAllOnesConstant(DL, VT);
+
+ // fold (add_sat c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, VT, N1, N0);
+
// fold vector ops
if (VT.isVector()) {
// TODO SimplifyVBinOp
@@ -2561,20 +2569,6 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
// fold (add_sat x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
- return N1;
- }
-
- // fold (add_sat x, undef) -> -1
- if (N0.isUndef() || N1.isUndef())
- return DAG.getAllOnesConstant(DL, VT);
-
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
- // canonicalize constant to RHS
- if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(Opcode, DL, VT, N1, N0);
- // fold (add_sat c1, c2) -> c3
- return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
}
// fold (add_sat x, 0) -> x
@@ -3260,6 +3254,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
+ // fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
+ if (N0 == N1)
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // fold (sub c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
@@ -3270,15 +3273,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return N0;
}
- // fold (sub x, x) -> 0
- // FIXME: Refactor this and xor and other similar operations together.
- if (N0 == N1)
- return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
-
- // fold (sub c1, c2) -> c3
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
- return C;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -3611,15 +3605,6 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
- // fold vector ops
- if (VT.isVector()) {
- // TODO SimplifyVBinOp
-
- // fold (sub_sat x, 0) -> x, vector edition
- if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
- return N0;
- }
-
// fold (sub_sat x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -3632,6 +3617,15 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
return C;
+ // fold vector ops
+ if (VT.isVector()) {
+ // TODO SimplifyVBinOp
+
+ // fold (sub_sat x, 0) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
// fold (sub_sat x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -3781,6 +3775,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
+ // fold (mul c1, c2) -> c1*c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
+
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
APInt ConstValue1;
@@ -3802,15 +3805,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
}
- // fold (mul c1, c2) -> c1*c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
- return C;
-
- // canonicalize constant to RHS (vector doesn't have to splat)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
-
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1.isZero())
return N1;
@@ -4140,17 +4134,17 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
EVT CCVT = getSetCCResultType(VT);
SDLoc DL(N);
+ // fold (sdiv c1, c2) -> c1/c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- // fold (sdiv c1, c2) -> c1/c2
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
- return C;
-
// fold (sdiv X, -1) -> 0-X
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && N1C->isAllOnes())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
@@ -4284,17 +4278,17 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
EVT CCVT = getSetCCResultType(VT);
SDLoc DL(N);
+ // fold (udiv c1, c2) -> c1/c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- // fold (udiv c1, c2) -> c1/c2
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
- return C;
-
// fold (udiv X, -1) -> select(X == -1, 1, 0)
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
@@ -4463,6 +4457,15 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // fold (mulhs c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
+
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
@@ -4474,15 +4477,6 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
return DAG.getConstant(0, DL, VT);
}
- // fold (mulhs c1, c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
- return C;
-
- // canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
-
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4523,6 +4517,15 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // fold (mulhu c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
+
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
@@ -4534,15 +4537,6 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return DAG.getConstant(0, DL, VT);
}
- // fold (mulhu c1, c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
- return C;
-
- // canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
-
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4786,12 +4780,14 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
}
// Function to calculate whether the Min/Max pair of SDNodes (potentially
-// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1)
-// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp
-// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The
-// operands are the same as SimplifySelectCC. N0<N1 ? N2 : N3
+// swapped around) make a signed saturate pattern, clamping to between a signed
+// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
+// Returns the node being clamped and the bitwidth of the clamp in BW. Should
+// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
+// same as SimplifySelectCC. N0<N1 ? N2 : N3.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
- SDValue N3, ISD::CondCode CC, unsigned &BW) {
+ SDValue N3, ISD::CondCode CC, unsigned &BW,
+ bool &Unsigned) {
auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
// The compare and select operand should be the same or the select operands
@@ -4858,17 +4854,27 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
const APInt &MinC = MinCOp->getAPIntValue();
const APInt &MaxC = MaxCOp->getAPIntValue();
APInt MinCPlus1 = MinC + 1;
- if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2())
- return SDValue();
- BW = MinCPlus1.exactLogBase2() + 1;
- return N02;
+ if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
+ BW = MinCPlus1.exactLogBase2() + 1;
+ Unsigned = false;
+ return N02;
+ }
+
+ if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
+ BW = MinCPlus1.exactLogBase2();
+ Unsigned = true;
+ return N02;
+ }
+
+ return SDValue();
}
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
SDValue N3, ISD::CondCode CC,
SelectionDAG &DAG) {
unsigned BW;
- SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW);
+ bool Unsigned;
+ SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
return SDValue();
EVT FPVT = Fp.getOperand(0).getValueType();
@@ -4876,13 +4882,14 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
if (FPVT.isVector())
NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
FPVT.getVectorElementCount());
- if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(
- ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT))
+ unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
+ if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
return SDValue();
SDLoc DL(Fp);
- SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0),
+ SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
DAG.getValueType(NewVT.getScalarType()));
- return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
+ return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
+ : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
}
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
@@ -4892,11 +4899,6 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDLoc DL(N);
- // fold vector ops
- if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
-
// fold operation with constant operands.
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
@@ -4904,7 +4906,12 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(N->getOpcode(), DL, VT, N1, N0);
+ return DAG.getNode(Opcode, DL, VT, N1, N0);
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
@@ -5777,6 +5784,15 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N0 == N1)
return N0;
+ // fold (and c1, c2) -> c1&c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
@@ -5824,22 +5840,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- // fold (and c1, c2) -> c1&c2
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
- return C;
-
- // canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
-
// fold (and x, -1) -> x
if (isAllOnesConstant(N1))
return N0;
// if (and x, c) is known to be zero, return 0
unsigned BitWidth = VT.getScalarSizeInBits();
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
@@ -6546,21 +6553,25 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N0 == N1)
return N0;
+ // fold (or c1, c2) -> c1|c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (or x, 0) -> x, vector edition
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
- return N1;
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
// fold (or x, -1) -> -1, vector edition
- if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
- // do not return N0, because undef node may exist in N0
- return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
@@ -6629,16 +6640,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
- // fold (or c1, c2) -> c1|c2
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
- return C;
-
- // canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
-
// fold (or x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -6651,6 +6652,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return NewSel;
// fold (or x, c) -> c iff (x & ~c) == 0
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
@@ -7941,18 +7943,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
- // fold vector ops
- if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
-
- // fold (xor x, 0) -> x, vector edition
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
- return N1;
- if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
- return N0;
- }
-
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -7969,9 +7959,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (xor x, 0) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -8409,6 +8409,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ // fold (shl c1, c2) -> c1<<c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
@@ -8434,12 +8438,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
- // fold (shl c1, c2) -> c1<<c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
- return C;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -8558,6 +8556,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
// TODO - support non-uniform vector shift amounts.
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
N0->getFlags().hasExact()) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
@@ -8758,6 +8757,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ // fold (sra c1, c2) -> (sra c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
+ return C;
+
// Arithmetic shifting an all-sign-bit value is a no-op.
// fold (sra 0, x) -> 0
// fold (sra -1, x) -> -1
@@ -8769,17 +8772,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
- // fold (sra c1, c2) -> (sra c1, c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
- return C;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
@@ -8962,21 +8960,20 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ // fold (srl c1, c2) -> c1 >>u c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
- // fold (srl c1, c2) -> c1 >>u c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
- return C;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// if (srl x, c) is known to be zero, return 0
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C &&
DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
@@ -10043,6 +10040,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
SDValue Chain = MST->getChain();
+ SDValue Value = MST->getValue();
+ SDValue Ptr = MST->getBasePtr();
SDLoc DL(N);
// Zap masked stores with a zero mask.
@@ -10054,12 +10053,50 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
!MST->isCompressingStore() && !MST->isTruncatingStore())
return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
- MST->getBasePtr(), MST->getMemOperand());
+ MST->getBasePtr(), MST->getPointerInfo(),
+ MST->getOriginalAlign(), MachineMemOperand::MOStore,
+ MST->getAAInfo());
// Try transforming N to an indexed store.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
+ if (MST->isTruncatingStore() && MST->isUnindexed() &&
+ Value.getValueType().isInteger() &&
+ (!isa<ConstantSDNode>(Value) ||
+ !cast<ConstantSDNode>(Value)->isOpaque())) {
+ APInt TruncDemandedBits =
+ APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ MST->getMemoryVT().getScalarSizeInBits());
+
+ // See if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
+ // Re-visit the store if anything changed and the store hasn't been merged
+ // with another node (N is deleted) SimplifyDemandedBits will add Value's
+ // node back to the worklist if necessary, but we also need to re-visit
+ // the Store node itself.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
+
+ // If this is a TRUNC followed by a masked store, fold this into a masked
+ // truncating store. We can do this even if this is already a masked
+ // truncstore.
+ if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() &&
+ MST->isUnindexed() &&
+ TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
+ MST->getMemoryVT(), LegalOperations)) {
+ auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
+ Value.getOperand(0).getValueType());
+ return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ MST->getOffset(), Mask, MST->getMemoryVT(),
+ MST->getMemOperand(), MST->getAddressingMode(),
+ /*IsTruncating=*/true);
+ }
+
return SDValue();
}
@@ -10109,8 +10146,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
// FIXME: Can we do this for indexed, expanding, or extending loads?
if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
!MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
- SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
- MLD->getBasePtr(), MLD->getMemOperand());
+ SDValue NewLd = DAG.getLoad(
+ N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
+ MLD->getPointerInfo(), MLD->getOriginalAlign(),
+ MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
return CombineTo(N, NewLd, NewLd.getValue(1));
}
@@ -13876,19 +13915,19 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
- // fold vector ops
- if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
-
// fold (fadd c1, c2) -> c1 + c2
- if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
+ return C;
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
@@ -14084,15 +14123,15 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
+ // fold (fsub c1, c2) -> c1-c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- // fold (fsub c1, c2) -> c1-c2
- if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -14157,7 +14196,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
@@ -14168,22 +14206,20 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
- // fold vector ops
- if (VT.isVector()) {
- // This just handles C1 * C2 for vectors. Other vector folds are below.
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
- }
-
// fold (fmul c1, c2) -> c1*c2
- if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
+ return C;
// canonicalize constant to RHS
if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -14495,8 +14531,6 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -14506,15 +14540,15 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
+ // fold (fdiv c1, c2) -> c1/c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- // fold (fdiv c1, c2) -> c1/c2
- if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -14523,7 +14557,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
- if (N1CFP) {
+ if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
// Compute the reciprocal 1.0 / c2.
const APFloat &N1APF = N1CFP->getValueAPF();
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
@@ -14639,8 +14673,6 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -14649,9 +14681,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
return R;
// fold (frem c1, c2) -> fmod(c1,c2)
- if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
-
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
+ return C;
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -14712,12 +14744,12 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
- bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
- if (N0CFP && N1CFP) // Constant fold
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
+ // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
+ return C;
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
const APFloat &V = N1C->getValueAPF();
@@ -14835,14 +14867,6 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
- // This optimization is guarded by a function attribute because it may produce
- // unexpected results. Ie, programs may be relying on the platform-specific
- // undefined behavior when the float-to-int conversion overflows.
- const Function &F = DAG.getMachineFunction().getFunction();
- Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
- if (StrictOverflow.getValueAsString().equals("false"))
- return SDValue();
-
// We only do this if the target has legal ftrunc. Otherwise, we'd likely be
// replacing casts with a libcall. We also must be allowed to ignore -0.0
// because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
@@ -15216,31 +15240,26 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return SDValue();
}
-static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
- APFloat (*Op)(const APFloat &, const APFloat &)) {
+SDValue DAGCombiner::visitFMinMax(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
- const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
const SDNodeFlags Flags = N->getFlags();
unsigned Opc = N->getOpcode();
bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
- if (N0CFP && N1CFP) {
- const APFloat &C0 = N0CFP->getValueAPF();
- const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
- }
+ // Constant fold.
+ if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
+ return C;
// Canonicalize to constant on RHS.
if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
- if (N1CFP) {
+ if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
const APFloat &AF = N1CFP->getValueAPF();
// minnum(X, nan) -> X
@@ -15272,22 +15291,6 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
return SDValue();
}
-SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
- return visitFMinMax(DAG, N, minnum);
-}
-
-SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
- return visitFMinMax(DAG, N, maxnum);
-}
-
-SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
- return visitFMinMax(DAG, N, minimum);
-}
-
-SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
- return visitFMinMax(DAG, N, maximum);
-}
-
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -18392,8 +18395,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST->isUnindexed() && ST->isSimple() &&
ST1->isUnindexed() && ST1->isSimple()) {
- if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
- ST->getMemoryVT() == ST1->getMemoryVT() &&
+ if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
+ ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
ST->getAddressSpace() == ST1->getAddressSpace()) {
// If this is a store followed by a store with the same value to the
// same location, then the store is dead/noop.
@@ -20727,6 +20730,156 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return NewLd;
}
+/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
+/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
+/// EXTRACT_SUBVECTOR(Op?, ?),
+/// Mask'))
+/// iff it is legal and profitable to do so. Notably, the trimmed mask
+/// (containing only the elements that are extracted)
+/// must reference at most two subvectors.
+static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ "Must only be called on EXTRACT_SUBVECTOR's");
+
+ SDValue N0 = N->getOperand(0);
+
+ // Only deal with non-scalable vectors.
+ EVT NarrowVT = N->getValueType(0);
+ EVT WideVT = N0.getValueType();
+ if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
+ return SDValue();
+
+ // The operand must be a shufflevector.
+ auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
+ if (!WideShuffleVector)
+ return SDValue();
+
+ // The old shuffleneeds to go away.
+ if (!WideShuffleVector->hasOneUse())
+ return SDValue();
+
+ // And the narrow shufflevector that we'll form must be legal.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
+ return SDValue();
+
+ uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
+ int NumEltsExtracted = NarrowVT.getVectorNumElements();
+ assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
+ "Extract index is not a multiple of the output vector length.");
+
+ int WideNumElts = WideVT.getVectorNumElements();
+
+ SmallVector<int, 16> NewMask;
+ NewMask.reserve(NumEltsExtracted);
+ SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
+ DemandedSubvectors;
+
+ // Try to decode the wide mask into narrow mask from at most two subvectors.
+ for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
+ NumEltsExtracted)) {
+ assert((M >= -1) && (M < (2 * WideNumElts)) &&
+ "Out-of-bounds shuffle mask?");
+
+ if (M < 0) {
+ // Does not depend on operands, does not require adjustment.
+ NewMask.emplace_back(M);
+ continue;
+ }
+
+ // From which operand of the shuffle does this shuffle mask element pick?
+ int WideShufOpIdx = M / WideNumElts;
+ // Which element of that operand is picked?
+ int OpEltIdx = M % WideNumElts;
+
+ assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
+ "Shuffle mask vector decomposition failure.");
+
+ // And which NumEltsExtracted-sized subvector of that operand is that?
+ int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
+ // And which element within that subvector of that operand is that?
+ int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
+
+ assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
+ "Shuffle mask subvector decomposition failure.");
+
+ assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
+ WideShufOpIdx * WideNumElts) == M &&
+ "Shuffle mask full decomposition failure.");
+
+ SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
+
+ if (Op.isUndef()) {
+ // Picking from an undef operand. Let's adjust mask instead.
+ NewMask.emplace_back(-1);
+ continue;
+ }
+
+ // Profitability check: only deal with extractions from the first subvector.
+ if (OpSubvecIdx != 0)
+ return SDValue();
+
+ const std::pair<SDValue, int> DemandedSubvector =
+ std::make_pair(Op, OpSubvecIdx);
+
+ if (DemandedSubvectors.insert(DemandedSubvector)) {
+ if (DemandedSubvectors.size() > 2)
+ return SDValue(); // We can't handle more than two subvectors.
+ // How many elements into the WideVT does this subvector start?
+ int Index = NumEltsExtracted * OpSubvecIdx;
+ // Bail out if the extraction isn't going to be cheap.
+ if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
+ return SDValue();
+ }
+
+ // Ok, but from which operand of the new shuffle will this element pick?
+ int NewOpIdx =
+ getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
+ assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
+
+ int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
+ NewMask.emplace_back(AdjM);
+ }
+ assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
+ assert(DemandedSubvectors.size() <= 2 &&
+ "Should have ended up demanding at most two subvectors.");
+
+ // Did we discover that the shuffle does not actually depend on operands?
+ if (DemandedSubvectors.empty())
+ return DAG.getUNDEF(NarrowVT);
+
+ // We still perform the exact same EXTRACT_SUBVECTOR, just on different
+ // operand[s]/index[es], so there is no point in checking for it's legality.
+
+ // Do not turn a legal shuffle into an illegal one.
+ if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
+ !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ SmallVector<SDValue, 2> NewOps;
+ for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
+ &DemandedSubvector : DemandedSubvectors) {
+ // How many elements into the WideVT does this subvector start?
+ int Index = NumEltsExtracted * DemandedSubvector.second;
+ SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
+ NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
+ DemandedSubvector.first, IndexC));
+ }
+ assert((NewOps.size() == 1 || NewOps.size() == 2) &&
+ "Should end up with either one or two ops");
+
+ // If we ended up with only one operand, pad with an undef.
+ if (NewOps.size() == 1)
+ NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
+
+ return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
+}
+
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
@@ -20840,6 +20993,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
}
}
+ if (SDValue V =
+ foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
+ return V;
+
V = peekThroughBitcasts(V);
// If the input is a build vector. Try to make a smaller build vector.
@@ -22424,15 +22581,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- SDValue Ops[] = {LHS, RHS};
unsigned Opcode = N->getOpcode();
SDNodeFlags Flags = N->getFlags();
- // See if we can constant fold the vector operation.
- if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS),
- LHS.getValueType(), Ops))
- return Fold;
-
// Move unary shuffles with identical masks after a vector binop:
// VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
// --> shuffle (VBinOp A, B), Undef, Mask
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 08598eeded7a..5dfb65ef131a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3367,13 +3367,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::FSHL:
case ISD::FSHR:
- if (TLI.expandFunnelShift(Node, Tmp1, DAG))
- Results.push_back(Tmp1);
+ if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG))
+ Results.push_back(Expanded);
break;
case ISD::ROTL:
case ISD::ROTR:
- if (TLI.expandROT(Node, true /*AllowVectorOps*/, Tmp1, DAG))
- Results.push_back(Tmp1);
+ if (SDValue Expanded = TLI.expandROT(Node, true /*AllowVectorOps*/, DAG))
+ Results.push_back(Expanded);
break;
case ISD::SADDSAT:
case ISD::UADDSAT:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 1fa4d88fcb4a..518e525e13d0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1277,8 +1277,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) {
SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
// Lower the rotate to shifts and ORs which can be promoted.
- SDValue Res;
- TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ SDValue Res = TLI.expandROT(N, true /*AllowVectorOps*/, DAG);
ReplaceValueWith(SDValue(N, 0), Res);
return SDValue();
}
@@ -1286,7 +1285,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
SDValue Hi = GetPromotedInteger(N->getOperand(0));
SDValue Lo = GetPromotedInteger(N->getOperand(1));
- SDValue Amount = GetPromotedInteger(N->getOperand(2));
+ SDValue Amt = GetPromotedInteger(N->getOperand(2));
SDLoc DL(N);
EVT OldVT = N->getOperand(0).getValueType();
@@ -1297,21 +1296,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
unsigned NewBits = VT.getScalarSizeInBits();
// Amount has to be interpreted modulo the old bit width.
- Amount =
- DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT));
+ Amt = DAG.getNode(ISD::UREM, DL, VT, Amt, DAG.getConstant(OldBits, DL, VT));
// If the promoted type is twice the size (or more), then we use the
// traditional funnel 'double' shift codegen. This isn't necessary if the
// shift amount is constant.
// fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
// fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
- if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) &&
+ if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amt) &&
!TLI.isOperationLegalOrCustom(Opcode, VT)) {
SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift);
Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo);
- Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount);
+ Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amt);
if (!IsFSHR)
Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift);
return Res;
@@ -1324,9 +1322,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
// Increase Amount to shift the result into the lower bits of the promoted
// type.
if (IsFSHR)
- Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset);
+ Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, ShiftOffset);
- return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount);
+ return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt);
}
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 98312f91d8c0..03dcd0f6d2c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -83,7 +83,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
SDValue Res(&Node, i);
bool Failed = false;
// Don't create a value in map.
- auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0;
+ auto ResId = ValueToIdMap.lookup(Res);
unsigned Mapped = 0;
if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) {
@@ -301,7 +301,7 @@ ScanOperands:
if (IgnoreNodeResults(N->getOperand(i).getNode()))
continue;
- const auto Op = N->getOperand(i);
+ const auto &Op = N->getOperand(i);
LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG));
EVT OpVT = Op.getValueType();
switch (getTypeAction(OpVT)) {
@@ -1007,11 +1007,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
///
/// ValVT is the type of values that produced the boolean.
SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
- SDLoc dl(Bool);
- EVT BoolVT = getSetCCResultType(ValVT);
- ISD::NodeType ExtendCode =
- TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT));
- return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
+ return TLI.promoteTargetBoolean(DAG, Bool, ValVT);
}
/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 88a28a3be53e..1493f36fcd3e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -254,69 +254,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
- if (Op.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(Node);
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
- LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
- Node->dump(&DAG));
- switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
- LD->getMemoryVT())) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- return TranslateLegalizeResults(Op, Node);
- case TargetLowering::Custom: {
- SmallVector<SDValue, 2> ResultVals;
- if (LowerOperationWrapper(Node, ResultVals)) {
- if (ResultVals.empty())
- return TranslateLegalizeResults(Op, Node);
-
- Changed = true;
- return RecursivelyLegalizeResults(Op, ResultVals);
- }
- LLVM_FALLTHROUGH;
- }
- case TargetLowering::Expand: {
- Changed = true;
- std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
- AddLegalizedOperand(Op.getValue(0), Tmp.first);
- AddLegalizedOperand(Op.getValue(1), Tmp.second);
- return Op.getResNo() ? Tmp.first : Tmp.second;
- }
- }
- }
- } else if (Op.getOpcode() == ISD::STORE) {
- StoreSDNode *ST = cast<StoreSDNode>(Node);
- EVT StVT = ST->getMemoryVT();
- MVT ValVT = ST->getValue().getSimpleValueType();
- if (StVT.isVector() && ST->isTruncatingStore()) {
- LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
- Node->dump(&DAG));
- switch (TLI.getTruncStoreAction(ValVT, StVT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- return TranslateLegalizeResults(Op, Node);
- case TargetLowering::Custom: {
- SmallVector<SDValue, 1> ResultVals;
- if (LowerOperationWrapper(Node, ResultVals)) {
- if (ResultVals.empty())
- return TranslateLegalizeResults(Op, Node);
-
- Changed = true;
- return RecursivelyLegalizeResults(Op, ResultVals);
- }
- LLVM_FALLTHROUGH;
- }
- case TargetLowering::Expand: {
- Changed = true;
- SDValue Chain = ExpandStore(Node);
- AddLegalizedOperand(Op, Chain);
- return Chain;
- }
- }
- }
- }
-
bool HasVectorValueOrOp =
llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
llvm::any_of(Node->op_values(),
@@ -329,6 +266,22 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
switch (Op.getOpcode()) {
default:
return TranslateLegalizeResults(Op, Node);
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ EVT LoadedVT = LD->getMemoryVT();
+ if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
+ Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT);
+ break;
+ }
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ EVT StVT = ST->getMemoryVT();
+ MVT ValVT = ST->getValue().getSimpleValueType();
+ if (StVT.isVector() && ST->isTruncatingStore())
+ Action = TLI.getTruncStoreAction(ValVT, StVT);
+ break;
+ }
case ISD::MERGE_VALUES:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
// This operation lies about being legal: when it claims to be legal,
@@ -512,6 +465,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
switch (Action) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
+ assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
+ "This action is not supported yet!");
LLVM_DEBUG(dbgs() << "Promoting\n");
Promote(Node, ResultVals);
assert(!ResultVals.empty() && "No results for promotion?");
@@ -731,8 +686,16 @@ SDValue VectorLegalizer::ExpandStore(SDNode *N) {
}
void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
- SDValue Tmp;
switch (Node->getOpcode()) {
+ case ISD::LOAD: {
+ std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ return;
+ }
+ case ISD::STORE:
+ Results.push_back(ExpandStore(Node));
+ return;
case ISD::MERGE_VALUES:
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
Results.push_back(Node->getOperand(i));
@@ -804,15 +767,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
break;
case ISD::FSHL:
case ISD::FSHR:
- if (TLI.expandFunnelShift(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
case ISD::ROTL:
case ISD::ROTR:
- if (TLI.expandROT(Node, false /*AllowVectorOps*/, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 2695ed36991c..3d5c4c5b1cae 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -168,10 +168,9 @@ void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
NumNodesSolelyBlocking.resize(SUnits->size(), 0);
- for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
- SUnit *SU = &(*SUnits)[i];
- initNumRegDefsLeft(SU);
- SU->NodeQueueId = 0;
+ for (SUnit &SU : *SUnits) {
+ initNumRegDefsLeft(&SU);
+ SU.NodeQueueId = 0;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 84e6d2a16422..aec2cf38b400 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -442,33 +442,32 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
bool UnitLatencies = forceUnitLatencies();
// Pass 2: add the preds, succs, etc.
- for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
- SUnit *SU = &SUnits[su];
- SDNode *MainNode = SU->getNode();
+ for (SUnit &SU : SUnits) {
+ SDNode *MainNode = SU.getNode();
if (MainNode->isMachineOpcode()) {
unsigned Opc = MainNode->getMachineOpcode();
const MCInstrDesc &MCID = TII->get(Opc);
for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
- SU->isTwoAddress = true;
+ SU.isTwoAddress = true;
break;
}
}
if (MCID.isCommutable())
- SU->isCommutable = true;
+ SU.isCommutable = true;
}
// Find all predecessors and successors of the group.
- for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ for (SDNode *N = SU.getNode(); N; N = N->getGluedNode()) {
if (N->isMachineOpcode() &&
TII->get(N->getMachineOpcode()).getImplicitDefs()) {
- SU->hasPhysRegClobbers = true;
+ SU.hasPhysRegClobbers = true;
unsigned NumUsed = InstrEmitter::CountResults(N);
while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
--NumUsed; // Skip over unused values at the end.
if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
- SU->hasPhysRegDefs = true;
+ SU.hasPhysRegDefs = true;
}
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
@@ -477,7 +476,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (isPassiveNode(OpN)) continue; // Not scheduled.
SUnit *OpSU = &SUnits[OpN->getNodeId()];
assert(OpSU && "Node has no SUnit!");
- if (OpSU == SU) continue; // In the same group.
+ if (OpSU == &SU)
+ continue; // In the same group.
EVT OpVT = N->getOperand(i).getValueType();
assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
@@ -508,10 +508,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
Dep.setLatency(OpLatency);
if (!isChain && !UnitLatencies) {
computeOperandLatency(OpN, N, i, Dep);
- ST.adjustSchedDependency(OpSU, DefIdx, SU, i, Dep);
+ ST.adjustSchedDependency(OpSU, DefIdx, &SU, i, Dep);
}
- if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+ if (!SU.addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
// Multiple register uses are combined in the same SUnit. For example,
// we could have a set of glued nodes with all their defs consumed by
// another set of glued nodes. Register pressure tracking sees this as
@@ -721,10 +721,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const {
///
void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
- unsigned Noops = 0;
- for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
- if (!Sequence[i])
- ++Noops;
+ unsigned Noops = llvm::count(Sequence, nullptr);
assert(Sequence.size() - Noops == ScheduledNodes &&
"The number of nodes scheduled doesn't match the expected number!");
}
@@ -911,8 +908,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
}
}
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- SUnit *SU = Sequence[i];
+ for (SUnit *SU : Sequence) {
if (!SU) {
// Null SUnit* is a noop.
TII->insertNoop(*Emitter.getBlock(), InsertPos);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 540a6e3efbe1..10940478010e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -169,11 +169,11 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
releaseSuccessors(&EntrySU);
// All leaves to AvailableQueue.
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ for (SUnit &SU : SUnits) {
// It is available if it has no predecessors.
- if (SUnits[i].Preds.empty()) {
- AvailableQueue->push(&SUnits[i]);
- SUnits[i].isAvailable = true;
+ if (SU.Preds.empty()) {
+ AvailableQueue->push(&SU);
+ SU.isAvailable = true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c282e03387dd..2ae0d4df7b77 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2499,7 +2499,8 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
/// sense to specify which elements are demanded or undefined, therefore
/// they are simply ignored.
bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
- APInt &UndefElts, unsigned Depth) {
+ APInt &UndefElts, unsigned Depth) const {
+ unsigned Opcode = V.getOpcode();
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
@@ -2511,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
// Deal with some common cases here that work for both fixed and scalable
// vector types.
- switch (V.getOpcode()) {
+ switch (Opcode) {
case ISD::SPLAT_VECTOR:
UndefElts = V.getOperand(0).isUndef()
? APInt::getAllOnes(DemandedElts.getBitWidth())
@@ -2537,7 +2538,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1);
- }
+ default:
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
+ return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, Depth);
+ break;
+}
// We don't support other cases than those above for scalable vectors at
// the moment.
@@ -2548,7 +2554,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
UndefElts = APInt::getZero(NumElts);
- switch (V.getOpcode()) {
+ switch (Opcode) {
case ISD::BUILD_VECTOR: {
SDValue Scl;
for (unsigned i = 0; i != NumElts; ++i) {
@@ -2600,13 +2606,30 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
break;
}
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ // Widen the demanded elts by the src element count.
+ SDValue Src = V.getOperand(0);
+ // We don't support scalable vectors at the moment.
+ if (Src.getValueType().isScalableVector())
+ return false;
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt UndefSrcElts;
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+ if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) {
+ UndefElts = UndefSrcElts.truncOrSelf(NumElts);
+ return true;
+ }
+ break;
+ }
}
return false;
}
/// Helper wrapper to main isSplatValue function.
-bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
+bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const {
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
@@ -5291,9 +5314,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (isUndef(Opcode, Ops))
return getUNDEF(VT);
- // Handle the case of two scalars.
+ // Handle binops special cases.
if (NumOps == 2) {
- // TODO: Move foldConstantFPMath here?
+ if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops[0], Ops[1]))
+ return CFP;
if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) {
@@ -5463,10 +5487,11 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
// should. That will require dealing with a potentially non-default
// rounding mode, checking the "opStatus" return value from the APFloat
// math calculations, and possibly other variations.
- auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
- auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false);
+ ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false);
if (N1CFP && N2CFP) {
- APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF();
+ APFloat C1 = N1CFP->getValueAPF(); // make copy
+ const APFloat &C2 = N2CFP->getValueAPF();
switch (Opcode) {
case ISD::FADD:
C1.add(C2, APFloat::rmNearestTiesToEven);
@@ -5486,6 +5511,14 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
case ISD::FCOPYSIGN:
C1.copySign(C2);
return getConstantFP(C1, DL, VT);
+ case ISD::FMINNUM:
+ return getConstantFP(minnum(C1, C2), DL, VT);
+ case ISD::FMAXNUM:
+ return getConstantFP(maxnum(C1, C2), DL, VT);
+ case ISD::FMINIMUM:
+ return getConstantFP(minimum(C1, C2), DL, VT);
+ case ISD::FMAXIMUM:
+ return getConstantFP(maximum(C1, C2), DL, VT);
default: break;
}
}
@@ -5502,8 +5535,9 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
switch (Opcode) {
case ISD::FSUB:
// -0.0 - undef --> undef (consistent with "fneg undef")
- if (N1CFP && N1CFP->getValueAPF().isNegZero() && N2.isUndef())
- return getUNDEF(VT);
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true))
+ if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef())
+ return getUNDEF(VT);
LLVM_FALLTHROUGH;
case ISD::FADD:
@@ -5962,9 +5996,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
return SV;
- if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2))
- return V;
-
// Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) {
if (TLI->isCommutativeBinOp(Opcode)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7726a0007e44..63cd723cf6da 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1036,7 +1036,6 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
AA = aa;
GFI = gfi;
LibInfo = li;
- DL = &DAG.getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
@@ -1626,6 +1625,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C))
return getValue(Equiv->getGlobalValue());
+ if (const auto *NC = dyn_cast<NoCFIValue>(C))
+ return getValue(NC->getGlobalValue());
+
VectorType *VecTy = cast<VectorType>(V->getType());
// Now that we know the number and type of the elements, get that number of
@@ -1921,8 +1923,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
DAG.getDataLayout().getAllocaAddrSpace()),
PtrValueVTs);
- SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
- DemoteReg, PtrValueVTs[0]);
+ SDValue RetPtr =
+ DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]);
SDValue RetOp = getValue(I.getOperand(0));
SmallVector<EVT, 4> ValueVTs, MemVTs;
@@ -2657,7 +2659,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDLoc dl = getCurSDLoc();
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
const Module &M = *ParentBB->getParent()->getFunction().getParent();
- Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+ Align Align =
+ DAG.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
// Generate code to load the content of the guard slot.
SDValue GuardVal = DAG.getLoad(
@@ -3058,14 +3061,14 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
MachineBasicBlock *Last) {
// Update JTCases.
- for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i)
- if (SL->JTCases[i].first.HeaderBB == First)
- SL->JTCases[i].first.HeaderBB = Last;
+ for (JumpTableBlock &JTB : SL->JTCases)
+ if (JTB.first.HeaderBB == First)
+ JTB.first.HeaderBB = Last;
// Update BitTestCases.
- for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i)
- if (SL->BitTestCases[i].Parent == First)
- SL->BitTestCases[i].Parent = Last;
+ for (BitTestBlock &BTB : SL->BitTestCases)
+ if (BTB.Parent == First)
+ BTB.Parent = Last;
}
void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
@@ -3111,6 +3114,8 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
SDValue Op = getValue(I.getOperand(0));
SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
@@ -3881,7 +3886,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
if (Field) {
// N = N + Offset
- uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
+ uint64_t Offset =
+ DAG.getDataLayout().getStructLayout(StTy)->getElementOffset(Field);
// In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
@@ -3898,7 +3904,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// (and fix up the result later).
unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
MVT IdxTy = MVT::getIntegerVT(IdxSize);
- TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+ TypeSize ElementSize =
+ DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
// We intentionally mask away the high bits here; ElementSize may not
// fit in IdxTy.
APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
@@ -4788,7 +4795,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
// Use TargetConstant instead of a regular constant for immarg.
- EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), Arg->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
assert(CI->getBitWidth() <= 64 &&
"large intrinsic immediates not handled");
@@ -6571,7 +6578,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
} else {
EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
const Value *Global = TLI.getSDagStackGuard(M);
- Align Align = DL->getPrefTypeAlign(Global->getType());
+ Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType());
Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
MachinePointerInfo(Global, 0), Align,
MachineMemOperand::MOVolatile);
@@ -7127,12 +7134,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
SDValue VectorInduction = DAG.getNode(
- ISD::UADDO, sdl, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
- SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction.getValue(0),
+ ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep);
+ SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction,
VectorTripCount, ISD::CondCode::SETULT);
- setValue(&I, DAG.getNode(ISD::AND, sdl, CCVT,
- DAG.getNOT(sdl, VectorInduction.getValue(1), CCVT),
- SetCC));
+ setValue(&I, SetCC);
return;
}
case Intrinsic::experimental_vector_insert: {
@@ -7317,32 +7322,26 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
SmallVector<SDValue, 7> &OpValues,
- bool isGather) {
+ bool IsGather) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(0);
- MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
SDValue LD;
bool AddToChain = true;
- if (!isGather) {
+ if (!IsGather) {
// Do not serialize variable-length loads of constant memory with
// anything.
- MemoryLocation ML;
- if (VT.isScalableVector())
- ML = MemoryLocation::getAfter(PtrOperand);
- else
- ML = MemoryLocation(
- PtrOperand,
- LocationSize::precise(
- DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())),
- AAInfo);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
MMO, false /*IsExpanding */);
} else {
@@ -7380,18 +7379,20 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
SmallVector<SDValue, 7> &OpValues,
- bool isScatter) {
+ bool IsScatter) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
- MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
SDValue ST;
- if (!isScatter) {
+ if (!IsScatter) {
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
ST =
DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1],
OpValues[2], OpValues[3], MMO, false /* IsTruncating */);
@@ -7690,8 +7691,9 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
- if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
- const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL))
+ if (const Constant *LoadCst =
+ ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+ LoadTy, Builder.DAG.getDataLayout()))
return Builder.getValue(LoadCst);
}
@@ -9646,8 +9648,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// We push in swifterror return as the last element of CLI.Ins.
ArgListTy &Args = CLI.getArgs();
if (supportSwiftError()) {
- for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- if (Args[i].IsSwiftError) {
+ for (const ArgListEntry &Arg : Args) {
+ if (Arg.IsSwiftError) {
ISD::InputArg MyFlags;
MyFlags.VT = getPointerTy(DL);
MyFlags.ArgVT = EVT(getPointerTy(DL));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index d6122aa0a739..ea48042a5dcf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -190,7 +190,6 @@ public:
static const unsigned LowestSDNodeOrder = 1;
SelectionDAG &DAG;
- const DataLayout *DL = nullptr;
AAResults *AA = nullptr;
const TargetLibraryInfo *LibInfo;
@@ -568,9 +567,9 @@ private:
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues, bool isGather);
+ SmallVector<SDValue, 7> &OpValues, bool IsGather);
void visitVPStoreScatter(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues, bool isScatter);
+ SmallVector<SDValue, 7> &OpValues, bool IsScatter);
void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
void visitVAStart(const CallInst &I);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c7e37cf8ca14..77e11b364588 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -297,7 +297,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
#ifndef NDEBUG
dbgs() << "If a target marks an instruction with "
"'usesCustomInserter', it must implement "
- "TargetLowering::EmitInstrWithCustomInserter!";
+ "TargetLowering::EmitInstrWithCustomInserter!\n";
#endif
llvm_unreachable(nullptr);
}
@@ -1784,27 +1784,25 @@ SelectionDAGISel::FinishBasicBlock() {
}
// Update PHI Nodes
- for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
- pi != pe; ++pi) {
- MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ for (const std::pair<MachineInstr *, unsigned> &P :
+ FuncInfo->PHINodesToUpdate) {
+ MachineInstrBuilder PHI(*MF, P.first);
MachineBasicBlock *PHIBB = PHI->getParent();
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
// This is "default" BB. We have two jumps to it. From "header" BB and
// from last "case" BB, unless the latter was skipped.
if (PHIBB == BTB.Default) {
- PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent);
+ PHI.addReg(P.second).addMBB(BTB.Parent);
if (!BTB.ContiguousRange) {
- PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
- .addMBB(BTB.Cases.back().ThisBB);
+ PHI.addReg(P.second).addMBB(BTB.Cases.back().ThisBB);
}
}
// One of "cases" BB.
- for (unsigned j = 0, ej = BTB.Cases.size();
- j != ej; ++j) {
- MachineBasicBlock* cBB = BTB.Cases[j].ThisBB;
+ for (const SwitchCG::BitTestCase &BT : BTB.Cases) {
+ MachineBasicBlock* cBB = BT.ThisBB;
if (cBB->isSuccessor(PHIBB))
- PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB);
+ PHI.addReg(P.second).addMBB(cBB);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 737695b5eabe..e6b06ab93d6b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3136,6 +3136,19 @@ bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return false;
}
+bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use isSplatValue if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
// work with truncating build vectors and vectors with elements of less than
// 8 bits.
@@ -4853,13 +4866,9 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
}
// Now select chosen alternative in each constraint.
- for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
- cIndex != eIndex; ++cIndex) {
- AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
- if (cInfo.Type == InlineAsm::isClobber)
- continue;
- cInfo.selectAlternative(bestMAIndex);
- }
+ for (AsmOperandInfo &cInfo : ConstraintOperands)
+ if (cInfo.Type != InlineAsm::isClobber)
+ cInfo.selectAlternative(bestMAIndex);
}
}
@@ -4927,9 +4936,9 @@ TargetLowering::ConstraintWeight
ConstraintWeight BestWeight = CW_Invalid;
// Loop over the options, keeping track of the most general one.
- for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ for (const std::string &rCode : *rCodes) {
ConstraintWeight weight =
- getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ getSingleConstraintMatchWeight(info, rCode.c_str());
if (weight > BestWeight)
BestWeight = weight;
}
@@ -6550,15 +6559,15 @@ static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
true);
}
-bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandFunnelShift(SDNode *Node,
+ SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
- return false;
+ return SDValue();
SDValue X = Node->getOperand(0);
SDValue Y = Node->getOperand(1);
@@ -6592,8 +6601,7 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
}
Z = DAG.getNOT(DL, Z, ShVT);
}
- Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
- return true;
+ return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
}
SDValue ShX, ShY;
@@ -6633,13 +6641,12 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
}
}
- Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
- return true;
+ return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
}
// TODO: Merge with expandFunnelShift.
-bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
- SDValue &Result, SelectionDAG &DAG) const {
+SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
+ SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
bool IsLeft = Node->getOpcode() == ISD::ROTL;
@@ -6650,12 +6657,12 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
EVT ShVT = Op1.getValueType();
SDValue Zero = DAG.getConstant(0, DL, ShVT);
- // If a rotate in the other direction is supported, use it.
+ // If a rotate in the other direction is more supported, use it.
unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
- if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
+ if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
+ isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
- Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
- return true;
+ return DAG.getNode(RevRot, DL, VT, Op0, Sub);
}
if (!AllowVectorOps && VT.isVector() &&
@@ -6664,7 +6671,7 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
- return false;
+ return SDValue();
unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
@@ -6690,8 +6697,7 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
HsVal =
DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
}
- Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
- return true;
+ return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
}
void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
@@ -8048,7 +8054,8 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
return DAG.UnrollVectorOp(Node);
- SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
return DAG.getSelect(DL, VT, Cond, Op0, Op1);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 86b559fd6413..43a54ce33bf0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -162,8 +162,8 @@ Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
// doInitialization creates the generic version of this type.
std::vector<Type *> EltTys;
EltTys.push_back(StackEntryTy);
- for (size_t I = 0; I != Roots.size(); I++)
- EltTys.push_back(Roots[I].second->getAllocatedType());
+ for (const std::pair<CallInst *, AllocaInst *> &Root : Roots)
+ EltTys.push_back(Root.second->getAllocatedType());
return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str());
}
@@ -240,8 +240,8 @@ void ShadowStackGCLowering::CollectRoots(Function &F) {
SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots;
for (BasicBlock &BB : F)
- for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;)
- if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ for (Instruction &I : BB)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(&I))
if (Function *F = CI->getCalledFunction())
if (F->getIntrinsicID() == Intrinsic::gcroot) {
std::pair<CallInst *, AllocaInst *> Pair = std::make_pair(
@@ -377,9 +377,9 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
// Delete the original allocas (which are no longer used) and the intrinsic
// calls (which are no longer valid). Doing this last avoids invalidating
// iterators.
- for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
- Roots[I].first->eraseFromParent();
- Roots[I].second->eraseFromParent();
+ for (std::pair<CallInst *, AllocaInst *> &Root : Roots) {
+ Root.first->eraseFromParent();
+ Root.second->eraseFromParent();
}
Roots.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 5ccfacfc26dc..3640296adbca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -131,15 +131,15 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
bool HasStackMap = false;
// Reverse iterate over all instructions and add the current live register
// set to an instruction if we encounter a patchpoint instruction.
- for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
- if (I->getOpcode() == TargetOpcode::PATCHPOINT) {
- addLiveOutSetToMI(MF, *I);
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+ addLiveOutSetToMI(MF, MI);
HasChanged = true;
HasStackMap = true;
++NumStackMaps;
}
- LLVM_DEBUG(dbgs() << " " << LiveRegs << " " << *I);
- LiveRegs.stepBackward(*I);
+ LLVM_DEBUG(dbgs() << " " << LiveRegs << " " << MI);
+ LiveRegs.stepBackward(MI);
}
++NumBBsVisited;
if (!HasStackMap)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
index 7445f77c955d..6765fd274686 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -162,7 +162,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
}
bool StackProtector::HasAddressTaken(const Instruction *AI,
- uint64_t AllocSize) {
+ TypeSize AllocSize) {
const DataLayout &DL = M->getDataLayout();
for (const User *U : AI->users()) {
const auto *I = cast<Instruction>(U);
@@ -170,7 +170,8 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// the bounds of the allocated object.
Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
if (MemLoc.hasValue() && MemLoc->Size.hasValue() &&
- MemLoc->Size.getValue() > AllocSize)
+ !TypeSize::isKnownGE(AllocSize,
+ TypeSize::getFixed(MemLoc->Size.getValue())))
return true;
switch (I->getOpcode()) {
case Instruction::Store:
@@ -203,13 +204,19 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// would use it could also be out-of-bounds meaning stack protection is
// required.
const GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
- unsigned TypeSize = DL.getIndexTypeSizeInBits(I->getType());
- APInt Offset(TypeSize, 0);
- APInt MaxOffset(TypeSize, AllocSize);
- if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.ugt(MaxOffset))
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(I->getType());
+ APInt Offset(IndexSize, 0);
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ return true;
+ TypeSize OffsetSize = TypeSize::Fixed(Offset.getLimitedValue());
+ if (!TypeSize::isKnownGT(AllocSize, OffsetSize))
return true;
// Adjust AllocSize to be the space remaining after this offset.
- if (HasAddressTaken(I, AllocSize - Offset.getLimitedValue()))
+ // We can't subtract a fixed size from a scalable one, so in that case
+ // assume the scalable value is of minimum size.
+ TypeSize NewAllocSize =
+ TypeSize::Fixed(AllocSize.getKnownMinValue()) - OffsetSize;
+ if (HasAddressTaken(I, NewAllocSize))
return true;
break;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index f49ba5ccd447..17e6f51d0899 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -325,8 +325,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Color spill slot intervals:\n");
bool Changed = false;
- for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
- LiveInterval *li = SSIntervals[i];
+ for (LiveInterval *li : SSIntervals) {
int SS = Register::stackSlot2Index(li->reg());
int NewSS = ColorSlot(li);
assert(NewSS >= 0 && "Stack coloring failed?");
@@ -338,8 +337,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
}
LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n");
- for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
- LiveInterval *li = SSIntervals[i];
+ for (LiveInterval *li : SSIntervals) {
int SS = Register::stackSlot2Index(li->reg());
li->setWeight(SlotWeights[SS]);
}
@@ -347,8 +345,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
llvm::stable_sort(SSIntervals, IntervalSorter());
#ifndef NDEBUG
- for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
- LLVM_DEBUG(SSIntervals[i]->dump());
+ for (LiveInterval *li : SSIntervals)
+ LLVM_DEBUG(li->dump());
LLVM_DEBUG(dbgs() << '\n');
#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index 54fc6ee45d00..68a7b80d6146 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -207,35 +207,34 @@ bool TailDuplicator::tailDuplicateAndUpdate(
// Add the new vregs as available values.
DenseMap<Register, AvailableValsTy>::iterator LI =
SSAUpdateVals.find(VReg);
- for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
- MachineBasicBlock *SrcBB = LI->second[j].first;
- Register SrcReg = LI->second[j].second;
+ for (std::pair<MachineBasicBlock *, Register> &J : LI->second) {
+ MachineBasicBlock *SrcBB = J.first;
+ Register SrcReg = J.second;
SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
}
+ SmallVector<MachineOperand *> DebugUses;
// Rewrite uses that are outside of the original def's block.
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
- // Only remove instructions after loop, as DBG_VALUE_LISTs with multiple
- // uses of VReg may invalidate the use iterator when erased.
- SmallPtrSet<MachineInstr *, 4> InstrsToRemove;
- while (UI != MRI->use_end()) {
- MachineOperand &UseMO = *UI;
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(VReg))) {
MachineInstr *UseMI = UseMO.getParent();
- ++UI;
+ // Rewrite debug uses last so that they can take advantage of any
+ // register mappings introduced by other users in its BB, since we
+ // cannot create new register definitions specifically for the debug
+ // instruction (as debug instructions should not affect CodeGen).
if (UseMI->isDebugValue()) {
- // SSAUpdate can replace the use with an undef. That creates
- // a debug instruction that is a kill.
- // FIXME: Should it SSAUpdate job to delete debug instructions
- // instead of replacing the use with undef?
- InstrsToRemove.insert(UseMI);
+ DebugUses.push_back(&UseMO);
continue;
}
if (UseMI->getParent() == DefBB && !UseMI->isPHI())
continue;
SSAUpdate.RewriteUse(UseMO);
}
- for (auto *MI : InstrsToRemove)
- MI->eraseFromParent();
+ for (auto *UseMO : DebugUses) {
+ MachineInstr *UseMI = UseMO->getParent();
+ UseMO->setReg(
+ SSAUpdate.GetValueInMiddleOfBlock(UseMI->getParent(), true));
+ }
}
SSAUpdateVRs.clear();
@@ -511,8 +510,8 @@ void TailDuplicator::updateSuccessorsPHIs(
SSAUpdateVals.find(Reg);
if (LI != SSAUpdateVals.end()) {
// This register is defined in the tail block.
- for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
- MachineBasicBlock *SrcBB = LI->second[j].first;
+ for (const std::pair<MachineBasicBlock *, Register> &J : LI->second) {
+ MachineBasicBlock *SrcBB = J.first;
// If we didn't duplicate a bb into a particular predecessor, we
// might still have added an entry to SSAUpdateVals to correcly
// recompute SSA. If that case, avoid adding a dummy extra argument
@@ -520,7 +519,7 @@ void TailDuplicator::updateSuccessorsPHIs(
if (!SrcBB->isSuccessor(SuccBB))
continue;
- Register SrcReg = LI->second[j].second;
+ Register SrcReg = J.second;
if (Idx != 0) {
MI.getOperand(Idx).setReg(SrcReg);
MI.getOperand(Idx + 1).setMBB(SrcBB);
@@ -531,8 +530,7 @@ void TailDuplicator::updateSuccessorsPHIs(
}
} else {
// Live in tail block, must also be live in predecessors.
- for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
- MachineBasicBlock *SrcBB = TDBBs[j];
+ for (MachineBasicBlock *SrcBB : TDBBs) {
if (Idx != 0) {
MI.getOperand(Idx).setReg(Reg);
MI.getOperand(Idx + 1).setMBB(SrcBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 5119dac36713..3f22cc4289f2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -436,7 +436,7 @@ MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const {
assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated");
MachineFunction &MF = *MBB.getParent();
- return MF.CloneMachineInstrBundle(MBB, InsertBefore, Orig);
+ return MF.cloneMachineInstrBundle(MBB, InsertBefore, Orig);
}
// If the COPY instruction in MI can be folded to a stack operation, return
@@ -1418,3 +1418,16 @@ void TargetInstrInfo::mergeOutliningCandidateAttributes(
}))
F.addFnAttr(Attribute::NoUnwind);
}
+
+bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
+ // Some instrumentations create special TargetOpcode at the start which
+ // expands to special code sequences which must be present.
+ auto First = MBB.getFirstNonDebugInstr();
+ if (First != MBB.end() &&
+ (First->getOpcode() == TargetOpcode::FENTRY_CALL ||
+ First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER))
+ return false;
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index c4043dcf0765..f0d342d26cc4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1187,7 +1187,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// all stack slots), but we need to handle the different type of stackmap
// operands and memory effects here.
- if (!llvm::any_of(MI->operands(),
+ if (llvm::none_of(MI->operands(),
[](MachineOperand &Operand) { return Operand.isFI(); }))
return MBB;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index f4bb71535f7f..f5cb518fce3e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -248,8 +248,8 @@ static void getAllocatableSetForRC(const MachineFunction &MF,
const TargetRegisterClass *RC, BitVector &R){
assert(RC->isAllocatable() && "invalid for nonallocatable sets");
ArrayRef<MCPhysReg> Order = RC->getRawAllocationOrder(MF);
- for (unsigned i = 0; i != Order.size(); ++i)
- R.set(Order[i]);
+ for (MCPhysReg PR : Order)
+ R.set(PR);
}
BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index c9a19948ff2f..3426a03b6083 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -144,23 +144,22 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
}
// Actually remove the blocks now.
- for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ for (MachineBasicBlock *BB : DeadBlocks) {
// Remove any call site information for calls in the block.
- for (auto &I : DeadBlocks[i]->instrs())
+ for (auto &I : BB->instrs())
if (I.shouldUpdateCallSiteInfo())
- DeadBlocks[i]->getParent()->eraseCallSiteInfo(&I);
+ BB->getParent()->eraseCallSiteInfo(&I);
- DeadBlocks[i]->eraseFromParent();
+ BB->eraseFromParent();
}
// Cleanup PHI nodes.
- for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- MachineBasicBlock *BB = &*I;
+ for (MachineBasicBlock &BB : F) {
// Prune unneeded PHI entries.
- SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
- BB->pred_end());
- MachineBasicBlock::iterator phi = BB->begin();
- while (phi != BB->end() && phi->isPHI()) {
+ SmallPtrSet<MachineBasicBlock*, 8> preds(BB.pred_begin(),
+ BB.pred_end());
+ MachineBasicBlock::iterator phi = BB.begin();
+ while (phi != BB.end() && phi->isPHI()) {
for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
if (!preds.count(phi->getOperand(i).getMBB())) {
phi->RemoveOperand(i);
@@ -189,7 +188,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// insert a COPY instead of simply replacing the output
// with the input.
const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
- BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(),
+ BuildMI(BB, BB.getFirstNonPHI(), phi->getDebugLoc(),
TII->get(TargetOpcode::COPY), OutputReg)
.addReg(InputReg, getRegState(Input), InputSub);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
new file mode 100644
index 000000000000..cbc5d9ec169b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -0,0 +1,1009 @@
+//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/VLIWMachineScheduler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iomanip>
+#include <limits>
+#include <memory>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-scheduler"
+
+static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false));
+
+static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true));
+
+static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
+ cl::Hidden, cl::ZeroOrMore,
+ cl::init(1));
+
+// Check if the scheduler should penalize instructions that are available to
+// early due to a zero-latency dependence.
+static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true));
+
+// This value is used to determine if a register class is a high pressure set.
+// We compute the maximum number of registers needed and divided by the total
+// available. Then, we compare the result to this value.
+static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
+ cl::init(0.75f),
+ cl::desc("High register pressure threhold."));
+
+VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
+ const TargetSchedModel *SM)
+ : TII(STI.getInstrInfo()), SchedModel(SM) {
+ ResourcesModel = createPacketizer(STI);
+
+ // This hard requirement could be relaxed,
+ // but for now do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ Packet.reserve(SchedModel->getIssueWidth());
+ Packet.clear();
+ ResourcesModel->clearResources();
+}
+
+void VLIWResourceModel::reset() {
+ Packet.clear();
+ ResourcesModel->clearResources();
+}
+
+VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
+
+/// Return true if there is a dependence between SUd and SUu.
+bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {
+ if (SUd->Succs.size() == 0)
+ return false;
+
+ for (const auto &S : SUd->Succs) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order dependencies.
+ if (S.isCtrl())
+ continue;
+
+ if (S.getSUnit() == SUu && S.getLatency() > 0)
+ return true;
+ }
+ return false;
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+/// It is _not_ precise (statefull), it is more like
+/// another heuristic. Many corner cases are figured
+/// empirically.
+bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
+ if (!SU || !SU->getInstr())
+ return false;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(*SU->getInstr()))
+ return false;
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ break;
+ }
+
+ // Now see if there are no other dependencies to instructions already
+ // in the packet.
+ if (IsTop) {
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ if (hasDependence(Packet[i], SU))
+ return false;
+ } else {
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ if (hasDependence(SU, Packet[i]))
+ return false;
+ }
+ return true;
+}
+
+/// Keep track of available resources.
+bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
+ bool startNewCycle = false;
+ // Artificially reset state.
+ if (!SU) {
+ reset();
+ TotalPackets++;
+ return false;
+ }
+ // If this SU does not fit in the packet or the packet is now full
+ // start a new one.
+ if (!isResourceAvailable(SU, IsTop) ||
+ Packet.size() >= SchedModel->getIssueWidth()) {
+ reset();
+ TotalPackets++;
+ startNewCycle = true;
+ }
+
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ ResourcesModel->reserveResources(*SU->getInstr());
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::CFI_INSTRUCTION:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ break;
+ }
+ Packet.push_back(SU);
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+ LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
+ LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+ LLVM_DEBUG(Packet[i]->getInstr()->dump());
+ }
+#endif
+
+ return startNewCycle;
+}
+
+DFAPacketizer *
+VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
+ return STI.getInstrInfo()->CreateTargetScheduleState(STI);
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+void VLIWMachineScheduler::schedule() {
+ LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
+ << printMBBReference(*BB) << " " << BB->getName()
+ << " in_func " << BB->getParent()->getName()
+ << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
+
+ buildDAGWithRegPressure();
+
+ Topo.InitDAGTopologicalSorting();
+
+ // Postprocess the DAG to add platform-specific artificial dependencies.
+ postprocessDAG();
+
+ SmallVector<SUnit *, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ SchedImpl->initialize(this);
+
+ LLVM_DEBUG({
+ unsigned maxH = 0;
+ for (const SUnit &SU : SUnits)
+ if (SU.getHeight() > maxH)
+ maxH = SU.getHeight();
+ dbgs() << "Max Height " << maxH << "\n";
+ });
+ LLVM_DEBUG({
+ unsigned maxD = 0;
+ for (const SUnit &SU : SUnits)
+ if (SU.getDepth() > maxD)
+ maxD = SU.getDepth();
+ dbgs() << "Max Depth " << maxD << "\n";
+ });
+ LLVM_DEBUG(dump());
+ if (ViewMISchedDAGs)
+ viewGraph();
+
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (true) {
+ LLVM_DEBUG(
+ dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU)
+ break;
+
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ // Notify the scheduling strategy after updating the DAG.
+ SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule for "
+ << printMBBReference(*begin()->getParent()) << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
+ DAG = static_cast<VLIWMachineScheduler *>(dag);
+ SchedModel = DAG->getSchedModel();
+
+ Top.init(DAG, SchedModel);
+ Bot.init(DAG, SchedModel);
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
+ const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ delete Top.HazardRec;
+ delete Bot.HazardRec;
+ Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ delete Top.ResourceModel;
+ delete Bot.ResourceModel;
+ Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
+ Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
+
+ const std::vector<unsigned> &MaxPressure =
+ DAG->getRegPressure().MaxSetPressure;
+ HighPressureSets.assign(MaxPressure.size(), 0);
+ for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
+ unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
+ HighPressureSets[i] =
+ ((float)MaxPressure[i] > ((float)Limit * RPThreshold));
+ }
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
+ const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
+ return new VLIWResourceModel(STI, SchedModel);
+}
+
+void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
+ for (const SDep &PI : SU->Preds) {
+ unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
+ unsigned MinLatency = PI.getLatency();
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+ SU->TopReadyCycle = PredReadyCycle + MinLatency;
+ }
+
+ if (!SU->isScheduled)
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
+ ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned MinLatency = I->getLatency();
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+ }
+
+ if (!SU->isScheduled)
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
+ delete ResourceModel;
+ delete HazardRec;
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if (IssueCount + uops > SchedModel->getIssueWidth())
+ return true;
+
+ return false;
+}
+
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
+ SUnit *SU, unsigned ReadyCycle) {
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+
+ Pending.push(SU);
+ else
+ Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
+ unsigned Width = SchedModel->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
+ "MinReadyCycle uninitialized");
+ unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ } else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+
+ LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
+ bool startNewCycle = false;
+
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+
+ // Update DFA model.
+ startNewCycle = ResourceModel->reserveResources(SU, isTop());
+
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+ if (startNewCycle) {
+ LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ } else
+ LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
+ << CurrCycle << '\n');
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin() + i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin() + i);
+ --i;
+ --e;
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ auto AdvanceCycle = [this]() {
+ if (Available.empty())
+ return true;
+ if (Available.size() == 1 && Pending.size() > 0)
+ return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
+ getWeakLeft(*Available.begin(), isTop()) != 0;
+ return false;
+ };
+ for (unsigned i = 0; AdvanceCycle(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard");
+ (void)i;
+ ResourceModel->reserveResources(nullptr, isTop());
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return nullptr;
+}
+
+#ifndef NDEBUG
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+ const ReadyQueue &Q, SUnit *SU,
+ int Cost, PressureChange P) {
+ dbgs() << Label << " " << Q.getName() << " ";
+ if (P.isValid())
+ dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
+ << P.getUnitInc() << " ";
+ else
+ dbgs() << " ";
+ dbgs() << "cost(" << Cost << ")\t";
+ DAG->dumpNode(*SU);
+}
+
+// Very detailed queue dump, to be used with higher verbosity levels.
+void ConvergingVLIWScheduler::readyQueueVerboseDump(
+ const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
+ ReadyQueue &Q) {
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+
+ dbgs() << ">>> " << Q.getName() << "\n";
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ std::stringstream dbgstr;
+ dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
+ dbgs() << dbgstr.str();
+ SchedulingCost(Q, *I, Candidate, RPDelta, true);
+ dbgs() << "\t";
+ (*I)->getInstr()->dump();
+ }
+ dbgs() << "\n";
+}
+#endif
+
+/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
+ if (SU->NumPredsLeft == 0)
+ return false;
+
+ for (auto &Pred : SU->Preds) {
+ // We found an available, but not scheduled, predecessor.
+ if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
+ return false;
+ }
+
+ return true;
+}
+
+/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
+ if (SU->NumSuccsLeft == 0)
+ return false;
+
+ for (auto &Succ : SU->Succs) {
+ // We found an available, but not scheduled, successor.
+ if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
+ return false;
+ }
+ return true;
+}
+
+/// Check if the instruction changes the register pressure of a register in the
+/// high pressure set. The function returns a negative value if the pressure
+/// decreases and a positive value is the pressure increases. If the instruction
+/// doesn't use a high pressure register or doesn't change the register
+/// pressure, then return 0.
+int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
+ PressureDiff &PD = DAG->getPressureDiff(SU);
+ for (auto &P : PD) {
+ if (!P.isValid())
+ continue;
+ // The pressure differences are computed bottom-up, so the comparision for
+ // an increase is positive in the bottom direction, but negative in the
+ // top-down direction.
+ if (HighPressureSets[P.getPSet()])
+ return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
+ }
+ return 0;
+}
+
+/// Single point to compute overall scheduling cost.
+/// TODO: More heuristics will be used soon.
+int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
+ SchedCandidate &Candidate,
+ RegPressureDelta &Delta,
+ bool verbose) {
+ // Initial trivial priority.
+ int ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (!SU || SU->isScheduled)
+ return ResCount;
+
+ LLVM_DEBUG(if (verbose) dbgs()
+ << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
+ // Forced priority is high.
+ if (SU->isScheduleHigh) {
+ ResCount += PriorityOne;
+ LLVM_DEBUG(dbgs() << "H|");
+ }
+
+ unsigned IsAvailableAmt = 0;
+ // Critical path first.
+ if (Q.getID() == TopQID) {
+ if (Top.isLatencyBound(SU)) {
+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");
+ ResCount += (SU->getHeight() * ScaleTwo);
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
+ dbgs() << dbgstr.str();
+ });
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Top.ResourceModel->isResourceAvailable(SU, true)) {
+ IsAvailableAmt = (PriorityTwo + PriorityThree);
+ ResCount += IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) dbgs() << "A|");
+ } else
+ LLVM_DEBUG(if (verbose) dbgs() << " |");
+ } else {
+ if (Bot.isLatencyBound(SU)) {
+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");
+ ResCount += (SU->getDepth() * ScaleTwo);
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
+ dbgs() << dbgstr.str();
+ });
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
+ IsAvailableAmt = (PriorityTwo + PriorityThree);
+ ResCount += IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) dbgs() << "A|");
+ } else
+ LLVM_DEBUG(if (verbose) dbgs() << " |");
+ }
+
+ unsigned NumNodesBlocking = 0;
+ if (Q.getID() == TopQID) {
+ // How many SUs does it block from scheduling?
+ // Look at all of the successors of this node.
+ // Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ if (Top.isLatencyBound(SU))
+ for (const SDep &SI : SU->Succs)
+ if (isSingleUnscheduledPred(SI.getSUnit(), SU))
+ ++NumNodesBlocking;
+ } else {
+ // How many unscheduled predecessors block this node?
+ if (Bot.isLatencyBound(SU))
+ for (const SDep &PI : SU->Preds)
+ if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
+ ++NumNodesBlocking;
+ }
+ ResCount += (NumNodesBlocking * ScaleTwo);
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
+ dbgs() << dbgstr.str();
+ });
+
+ // Factor in reg pressure as a heuristic.
+ if (!IgnoreBBRegPressure) {
+ // Decrease priority by the amount that register pressure exceeds the limit.
+ ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
+ // Decrease priority if register pressure exceeds the limit.
+ ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
+ // Decrease priority slightly if register pressure would increase over the
+ // current maximum.
+ ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
+ // If there are register pressure issues, then we remove the value added for
+ // the instruction being available. The rationale is that we really don't
+ // want to schedule an instruction that causes a spill.
+ if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
+ (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
+ Delta.CurrentMax.getUnitInc()))
+ ResCount -= IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) {
+ dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
+ << Delta.CriticalMax.getUnitInc() << "/"
+ << Delta.CurrentMax.getUnitInc() << ")|";
+ });
+ }
+
+ // Give preference to a zero latency instruction if the dependent
+ // instruction is in the current packet.
+ if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
+ for (const SDep &PI : SU->Preds) {
+ if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
+ PI.getLatency() == 0 &&
+ Top.ResourceModel->isInPacket(PI.getSUnit())) {
+ ResCount += PriorityThree;
+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");
+ }
+ }
+ } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
+ for (const SDep &SI : SU->Succs) {
+ if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
+ SI.getLatency() == 0 &&
+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {
+ ResCount += PriorityThree;
+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");
+ }
+ }
+ }
+
+ // If the instruction has a non-zero latency dependence with an instruction in
+ // the current packet, then it should not be scheduled yet. The case occurs
+ // when the dependent instruction is scheduled in a new packet, so the
+ // scheduler updates the current cycle and pending instructions become
+ // available.
+ if (CheckEarlyAvail) {
+ if (Q.getID() == TopQID) {
+ for (const auto &PI : SU->Preds) {
+ if (PI.getLatency() > 0 &&
+ Top.ResourceModel->isInPacket(PI.getSUnit())) {
+ ResCount -= PriorityOne;
+ LLVM_DEBUG(if (verbose) dbgs() << "D|");
+ }
+ }
+ } else {
+ for (const auto &SI : SU->Succs) {
+ if (SI.getLatency() > 0 &&
+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {
+ ResCount -= PriorityOne;
+ LLVM_DEBUG(if (verbose) dbgs() << "D|");
+ }
+ }
+ }
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "Total " << std::setw(4) << ResCount << ")";
+ dbgs() << dbgstr.str();
+ });
+
+ return ResCount;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingVLIWScheduler::CandResult
+ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate) {
+ ReadyQueue &Q = Zone.Available;
+ LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
+ readyQueueVerboseDump(RPTracker, Candidate, Q);
+ else Q.dump(););
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+
+ // BestSU remains NULL if no top candidates beat the best existing candidate.
+ CandResult FoundCandidate = NoCand;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
+
+ // Initialize the candidate if needed.
+ if (!Candidate.SU) {
+ LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+
+ // Choose node order for negative cost candidates. There is no good
+ // candidate in this case.
+ if (CurrentCost < 0 && Candidate.SCost < 0) {
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
+ (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ }
+ continue;
+ }
+
+ // Best cost.
+ if (CurrentCost > Candidate.SCost) {
+ LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ continue;
+ }
+
+ // Choose an instruction that does not depend on an artificial edge.
+ unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
+ unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
+ if (CurrWeak != CandWeak) {
+ if (CurrWeak < CandWeak) {
+ LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = Weak;
+ }
+ continue;
+ }
+
+ if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
+ unsigned CurrSize, CandSize;
+ if (Q.getID() == TopQID) {
+ CurrSize = (*I)->Succs.size();
+ CandSize = Candidate.SU->Succs.size();
+ } else {
+ CurrSize = (*I)->Preds.size();
+ CandSize = Candidate.SU->Preds.size();
+ }
+ if (CurrSize > CandSize) {
+ LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ }
+ // Keep the old candidate if it's a better candidate. That is, don't use
+ // the subsequent tie breaker.
+ if (CurrSize != CandSize)
+ continue;
+ }
+
+ // Tie breaker.
+ // To avoid scheduling indeterminism, we need a tie breaker
+ // for the case when cost is identical for two nodes.
+ if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
+ (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+ }
+
+ // Fall through to original instruction order.
+ // Only consider node order if Candidate was chosen from this Q.
+ if (FoundCandidate == NoCand)
+ continue;
+ }
+ return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
+ IsTopNode = false;
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ LLVM_DEBUG(dbgs() << "Picked only Top\n");
+ IsTopNode = true;
+ return SU;
+ }
+ SchedCandidate BotCand;
+ // Prefer bottom scheduling when heuristics are silent.
+ CandResult BotResult =
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+
+ if (TopResult == SingleExcess || TopResult == SingleCritical) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (BotResult == SingleMax) {
+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ if (TopResult == SingleMax) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ if (TopCand.SCost > BotCand.SCost) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate in node order.
+ LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
+ IsTopNode = false;
+ return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return nullptr;
+ }
+ SUnit *SU;
+ if (ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+ (void)TopResult;
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ } else if (ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate BotCand;
+ CandResult BotResult =
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+ (void)BotResult;
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else {
+ SU = pickNodeBidrectional(IsTopNode);
+ }
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling instruction in cycle "
+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
+ << reportPackets() << ")\n";
+ DAG->dumpNode(*SU));
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ Top.bumpNode(SU);
+ SU->TopReadyCycle = Top.CurrCycle;
+ } else {
+ Bot.bumpNode(SU);
+ SU->BotReadyCycle = Bot.CurrCycle;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
index 4876b9e23717..0c42bef82005 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
@@ -201,9 +201,11 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::x86amx: return Type::getX86_AMXTy(Context);
case MVT::i64x8: return IntegerType::get(Context, 512);
case MVT::externref:
+ // pointer to opaque struct in addrspace(10)
return PointerType::get(StructType::create(Context), 10);
case MVT::funcref:
- return PointerType::get(StructType::create(Context), 20);
+ // pointer to i8 addrspace(20)
+ return PointerType::get(Type::getInt8Ty(Context), 20);
case MVT::v1i1:
return FixedVectorType::get(Type::getInt1Ty(Context), 1);
case MVT::v2i1:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
index 4564aa1c1278..d31183e46d65 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -573,9 +573,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
const auto *CatchSwitch = cast<CatchSwitchInst>(Pad);
int CatchState = -1, FollowerState = -1;
SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers());
- for (auto CBI = CatchBlocks.rbegin(), CBE = CatchBlocks.rend();
- CBI != CBE; ++CBI, FollowerState = CatchState) {
- const BasicBlock *CatchBlock = *CBI;
+ for (const BasicBlock *CatchBlock : llvm::reverse(CatchBlocks)) {
// Create the entry for this catch with the appropriate handler
// properties.
const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI());
@@ -591,6 +589,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
Worklist.emplace_back(I, CatchState);
// Remember this catch's state.
FuncInfo.EHPadStateMap[Catch] = CatchState;
+ FollowerState = CatchState;
}
// Associate the catchswitch with the state of its first catch.
assert(CatchSwitch->getNumHandlers());
@@ -601,11 +600,9 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
// Step two: record the TryParentState of each state. For cleanuppads that
// don't have cleanuprets, we may need to infer this from their child pads,
// so visit pads in descendant-most to ancestor-most order.
- for (auto Entry = FuncInfo.ClrEHUnwindMap.rbegin(),
- End = FuncInfo.ClrEHUnwindMap.rend();
- Entry != End; ++Entry) {
+ for (ClrEHUnwindMapEntry &Entry : llvm::reverse(FuncInfo.ClrEHUnwindMap)) {
const Instruction *Pad =
- Entry->Handler.get<const BasicBlock *>()->getFirstNonPHI();
+ Entry.Handler.get<const BasicBlock *>()->getFirstNonPHI();
// For most pads, the TryParentState is the state associated with the
// unwind dest of exceptional exits from it.
const BasicBlock *UnwindDest;
@@ -615,7 +612,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
// that's not the unwind dest of exceptions escaping the catch. Those
// cases were already assigned a TryParentState in the first pass, so
// skip them.
- if (Entry->TryParentState != -1)
+ if (Entry.TryParentState != -1)
continue;
// Otherwise, get the unwind dest from the catchswitch.
UnwindDest = Catch->getCatchSwitch()->getUnwindDest();
@@ -692,7 +689,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()];
}
- Entry->TryParentState = UnwindDestState;
+ Entry.TryParentState = UnwindDestState;
}
// Step three: transfer information from pads to invokes.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 11d1b309aa64..b66429d8a5bf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -226,6 +226,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
case Triple::ArchType::arm:
case Triple::ArchType::thumb:
case Triple::ArchType::aarch64:
+ case Triple::ArchType::hexagon:
case Triple::ArchType::mips:
case Triple::ArchType::mipsel:
case Triple::ArchType::mips64: