aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
commit344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
treef0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/CodeGen
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
downloadsrc-344a3780b2e33f6ca763666c380202b18aab72a3.tar.gz
src-344a3780b2e33f6ca763666c380202b18aab72a3.zip
the upstream release/13.x branch was created.
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp55
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp33
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AIXException.cpp7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/ARMException.cpp8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp23
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp318
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp24
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp109
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp117
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIE.cpp26
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp95
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp50
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h119
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp31
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp181
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp301
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h21
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfException.h3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp72
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp243
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h43
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp18
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp38
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h9
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinException.cpp14
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinException.h4
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp204
-rw-r--r--llvm/lib/CodeGen/BasicBlockSections.cpp43
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp16
-rw-r--r--llvm/lib/CodeGen/BuiltinGCs.cpp130
-rw-r--r--llvm/lib/CodeGen/CFIInstrInserter.cpp26
-rw-r--r--llvm/lib/CodeGen/CalcSpillWeights.cpp21
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp591
-rw-r--r--llvm/lib/CodeGen/CommandFlags.cpp72
-rw-r--r--llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp31
-rw-r--r--llvm/lib/CodeGen/DFAPacketizer.cpp2
-rw-r--r--llvm/lib/CodeGen/DeadMachineInstructionElim.cpp6
-rw-r--r--llvm/lib/CodeGen/DetectDeadLanes.cpp20
-rw-r--r--llvm/lib/CodeGen/DwarfEHPrepare.cpp49
-rw-r--r--llvm/lib/CodeGen/EHContGuardCatchret.cpp84
-rw-r--r--llvm/lib/CodeGen/EarlyIfConversion.cpp63
-rw-r--r--llvm/lib/CodeGen/EdgeBundles.cpp12
-rw-r--r--llvm/lib/CodeGen/ExecutionDomainFix.cpp12
-rw-r--r--llvm/lib/CodeGen/ExpandMemCmp.cpp105
-rw-r--r--llvm/lib/CodeGen/ExpandPostRAPseudos.cpp5
-rw-r--r--llvm/lib/CodeGen/ExpandReductions.cpp7
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp469
-rw-r--r--llvm/lib/CodeGen/FaultMaps.cpp38
-rw-r--r--llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp10
-rw-r--r--llvm/lib/CodeGen/GCMetadata.cpp13
-rw-r--r--llvm/lib/CodeGen/GCRootLowering.cpp12
-rw-r--r--llvm/lib/CodeGen/GCStrategy.cpp20
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp23
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp14
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp842
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Combiner.cpp12
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp1019
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp105
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp194
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp60
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp383
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Legalizer.cpp11
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp1823
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp326
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Localizer.cpp3
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp48
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp21
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp12
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp214
-rw-r--r--llvm/lib/CodeGen/HardwareLoops.cpp35
-rw-r--r--llvm/lib/CodeGen/IfConversion.cpp30
-rw-r--r--llvm/lib/CodeGen/ImplicitNullChecks.cpp7
-rw-r--r--llvm/lib/CodeGen/IndirectBrExpandPass.cpp62
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp53
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp7
-rw-r--r--llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp1
-rw-r--r--llvm/lib/CodeGen/LLVMTargetMachine.cpp6
-rw-r--r--llvm/lib/CodeGen/LatencyPriorityQueue.cpp11
-rw-r--r--llvm/lib/CodeGen/LexicalScopes.cpp10
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp911
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp10
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h5
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp906
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp996
-rw-r--r--llvm/lib/CodeGen/LiveInterval.cpp16
-rw-r--r--llvm/lib/CodeGen/LiveIntervalUnion.cpp19
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp98
-rw-r--r--llvm/lib/CodeGen/LivePhysRegs.cpp8
-rw-r--r--llvm/lib/CodeGen/LiveRangeCalc.cpp3
-rw-r--r--llvm/lib/CodeGen/LiveRangeEdit.cpp12
-rw-r--r--llvm/lib/CodeGen/LiveRangeShrink.cpp9
-rw-r--r--llvm/lib/CodeGen/LiveRangeUtils.h4
-rw-r--r--llvm/lib/CodeGen/LiveRegMatrix.cpp16
-rw-r--r--llvm/lib/CodeGen/LiveRegUnits.cpp13
-rw-r--r--llvm/lib/CodeGen/LiveVariables.cpp44
-rw-r--r--llvm/lib/CodeGen/LocalStackSlotAllocation.cpp4
-rw-r--r--llvm/lib/CodeGen/LowLevelType.cpp10
-rw-r--r--llvm/lib/CodeGen/MBFIWrapper.cpp3
-rw-r--r--llvm/lib/CodeGen/MIRFSDiscriminator.cpp137
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.cpp3
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.h5
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp225
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIRParser.cpp52
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp38
-rw-r--r--llvm/lib/CodeGen/MIRVRegNamerUtils.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRYamlMapping.cpp43
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp74
-rw-r--r--llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp12
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp9
-rw-r--r--llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp22
-rw-r--r--llvm/lib/CodeGen/MachineCSE.cpp26
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp34
-rw-r--r--llvm/lib/CodeGen/MachineFrameInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp283
-rw-r--r--llvm/lib/CodeGen/MachineFunctionSplitter.cpp28
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp175
-rw-r--r--llvm/lib/CodeGen/MachineInstrBundle.cpp11
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp102
-rw-r--r--llvm/lib/CodeGen/MachineLoopInfo.cpp18
-rw-r--r--llvm/lib/CodeGen/MachineModuleInfo.cpp111
-rw-r--r--llvm/lib/CodeGen/MachineModuleInfoImpls.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineModuleSlotTracker.cpp81
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp52
-rw-r--r--llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp5
-rw-r--r--llvm/lib/CodeGen/MachinePassManager.cpp13
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp141
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp34
-rw-r--r--llvm/lib/CodeGen/MachineSSAUpdater.cpp12
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp142
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp309
-rw-r--r--llvm/lib/CodeGen/MachineStableHash.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp196
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp10
-rw-r--r--llvm/lib/CodeGen/OptimizePHIs.cpp8
-rw-r--r--llvm/lib/CodeGen/PHIElimination.cpp22
-rw-r--r--llvm/lib/CodeGen/ParallelCG.cpp14
-rw-r--r--llvm/lib/CodeGen/PeepholeOptimizer.cpp29
-rw-r--r--llvm/lib/CodeGen/ProcessImplicitDefs.cpp12
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp143
-rw-r--r--llvm/lib/CodeGen/PseudoProbeInserter.cpp53
-rw-r--r--llvm/lib/CodeGen/RDFGraph.cpp8
-rw-r--r--llvm/lib/CodeGen/RDFLiveness.cpp27
-rw-r--r--llvm/lib/CodeGen/ReachingDefAnalysis.cpp13
-rw-r--r--llvm/lib/CodeGen/RegAllocBase.cpp35
-rw-r--r--llvm/lib/CodeGen/RegAllocBase.h11
-rw-r--r--llvm/lib/CodeGen/RegAllocBasic.cpp27
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp176
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp438
-rw-r--r--llvm/lib/CodeGen/RegAllocPBQP.cpp22
-rw-r--r--llvm/lib/CodeGen/RegUsageInfoPropagate.cpp2
-rw-r--r--llvm/lib/CodeGen/RegisterClassInfo.cpp12
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp283
-rw-r--r--llvm/lib/CodeGen/RegisterPressure.cpp14
-rw-r--r--llvm/lib/CodeGen/RegisterScavenging.cpp21
-rw-r--r--llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp231
-rw-r--r--llvm/lib/CodeGen/ReplaceWithVeclib.cpp254
-rw-r--r--llvm/lib/CodeGen/SafeStack.cpp61
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp13
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp1496
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp250
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp54
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp282
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h21
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp459
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp27
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp293
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp94
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp171
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h205
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp57
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp881
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp831
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h27
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp71
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp85
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp139
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp814
-rw-r--r--llvm/lib/CodeGen/ShadowStackGCLowering.cpp20
-rw-r--r--llvm/lib/CodeGen/SjLjEHPrepare.cpp8
-rw-r--r--llvm/lib/CodeGen/SlotIndexes.cpp14
-rw-r--r--llvm/lib/CodeGen/SpillPlacement.cpp69
-rw-r--r--llvm/lib/CodeGen/SpillPlacement.h3
-rw-r--r--llvm/lib/CodeGen/SplitKit.cpp188
-rw-r--r--llvm/lib/CodeGen/SplitKit.h18
-rw-r--r--llvm/lib/CodeGen/StackColoring.cpp10
-rw-r--r--llvm/lib/CodeGen/StackMaps.cpp2
-rw-r--r--llvm/lib/CodeGen/StackProtector.cpp32
-rw-r--r--llvm/lib/CodeGen/StackSlotColoring.cpp15
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp12
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp39
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp112
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp511
-rw-r--r--llvm/lib/CodeGen/TargetPassConfig.cpp54
-rw-r--r--llvm/lib/CodeGen/TargetRegisterInfo.cpp109
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp32
-rw-r--r--llvm/lib/CodeGen/TypePromotion.cpp9
-rw-r--r--llvm/lib/CodeGen/UnreachableBlockElim.cpp20
-rw-r--r--llvm/lib/CodeGen/ValueTypes.cpp38
-rw-r--r--llvm/lib/CodeGen/VirtRegMap.cpp98
-rw-r--r--llvm/lib/CodeGen/WasmEHPrepare.cpp171
-rw-r--r--llvm/lib/CodeGen/WinEHPrepare.cpp31
216 files changed, 17679 insertions, 7054 deletions
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index acf8553f7205..87a3cede601b 100644
--- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -153,9 +153,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
std::vector<unsigned> &DefIndices = State->GetDefIndices();
// Examine the live-in regs of all successors.
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- SE = BB->succ_end(); SI != SE; ++SI)
- for (const auto &LI : (*SI)->liveins()) {
+ for (MachineBasicBlock *Succ : BB->successors())
+ for (const auto &LI : Succ->liveins()) {
for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
State->UnionGroups(Reg, 0);
@@ -259,11 +258,10 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(
/// in SU that we want to consider for breaking.
static void AntiDepEdges(const SUnit *SU, std::vector<const SDep *> &Edges) {
SmallSet<unsigned, 4> RegSet;
- for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
- P != PE; ++P) {
- if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
- if (RegSet.insert(P->getReg()).second)
- Edges.push_back(&*P);
+ for (const SDep &Pred : SU->Preds) {
+ if ((Pred.getKind() == SDep::Anti) || (Pred.getKind() == SDep::Output)) {
+ if (RegSet.insert(Pred.getReg()).second)
+ Edges.push_back(&Pred);
}
}
}
@@ -275,17 +273,16 @@ static const SUnit *CriticalPathStep(const SUnit *SU) {
unsigned NextDepth = 0;
// Find the predecessor edge with the greatest depth.
if (SU) {
- for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
- P != PE; ++P) {
- const SUnit *PredSU = P->getSUnit();
- unsigned PredLatency = P->getLatency();
+ for (const SDep &Pred : SU->Preds) {
+ const SUnit *PredSU = Pred.getSUnit();
+ unsigned PredLatency = Pred.getLatency();
unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
// In the case of a latency tie, prefer an anti-dependency edge over
// other types of edges.
if (NextDepth < PredTotalLatency ||
- (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ (NextDepth == PredTotalLatency && Pred.getKind() == SDep::Anti)) {
NextDepth = PredTotalLatency;
- Next = &*P;
+ Next = &Pred;
}
}
}
@@ -886,25 +883,24 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Also, if there are dependencies on other SUnits with the
// same register as the anti-dependency, don't attempt to
// break it.
- for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
- PE = PathSU->Preds.end(); P != PE; ++P) {
- if (P->getSUnit() == NextSU ?
- (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
- (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ for (const SDep &Pred : PathSU->Preds) {
+ if (Pred.getSUnit() == NextSU ? (Pred.getKind() != SDep::Anti ||
+ Pred.getReg() != AntiDepReg)
+ : (Pred.getKind() == SDep::Data &&
+ Pred.getReg() == AntiDepReg)) {
AntiDepReg = 0;
break;
}
}
- for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
- PE = PathSU->Preds.end(); P != PE; ++P) {
- if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
- (P->getKind() != SDep::Output)) {
+ for (const SDep &Pred : PathSU->Preds) {
+ if ((Pred.getSUnit() == NextSU) && (Pred.getKind() != SDep::Anti) &&
+ (Pred.getKind() != SDep::Output)) {
LLVM_DEBUG(dbgs() << " (real dependency)\n");
AntiDepReg = 0;
break;
- } else if ((P->getSUnit() != NextSU) &&
- (P->getKind() == SDep::Data) &&
- (P->getReg() == AntiDepReg)) {
+ } else if ((Pred.getSUnit() != NextSU) &&
+ (Pred.getKind() == SDep::Data) &&
+ (Pred.getReg() == AntiDepReg)) {
LLVM_DEBUG(dbgs() << " (other dependency)\n");
AntiDepReg = 0;
break;
@@ -956,10 +952,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
<< printReg(AntiDepReg, TRI) << ":");
// Handle each group register...
- for (std::map<unsigned, unsigned>::iterator
- S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
- unsigned CurrReg = S->first;
- unsigned NewReg = S->second;
+ for (const auto &P : RenameMap) {
+ unsigned CurrReg = P.first;
+ unsigned NewReg = P.second;
LLVM_DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->"
<< printReg(NewReg, TRI) << "("
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index ebeff1fec30b..e5d576d879b5 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -43,13 +43,11 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
// Given a struct type, recursively traverse the elements.
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- for (StructType::element_iterator EB = STy->element_begin(),
- EI = EB,
- EE = STy->element_end();
- EI != EE; ++EI) {
- if (Indices && *Indices == unsigned(EI - EB))
- return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
- CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex);
+ for (auto I : llvm::enumerate(STy->elements())) {
+ Type *ET = I.value();
+ if (Indices && *Indices == I.index())
+ return ComputeLinearIndex(ET, Indices + 1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(ET, nullptr, nullptr, CurIndex);
}
assert(!Indices && "Unexpected out of bound");
return CurIndex;
@@ -513,9 +511,10 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
// not profitable. Also, if the callee is a special function (e.g.
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
- if (!Ret &&
- ((!TM.Options.GuaranteedTailCallOpt &&
- Call.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term)))
+ if (!Ret && ((!TM.Options.GuaranteedTailCallOpt &&
+ Call.getCallingConv() != CallingConv::Tail &&
+ Call.getCallingConv() != CallingConv::SwiftTail) ||
+ !isa<UnreachableInst>(Term)))
return false;
// If I will have a chain, make sure no other instruction that will have a
@@ -562,14 +561,12 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
// Following attributes are completely benign as far as calling convention
// goes, they shouldn't affect whether the call is a tail call.
- CallerAttrs.removeAttribute(Attribute::NoAlias);
- CalleeAttrs.removeAttribute(Attribute::NoAlias);
- CallerAttrs.removeAttribute(Attribute::NonNull);
- CalleeAttrs.removeAttribute(Attribute::NonNull);
- CallerAttrs.removeAttribute(Attribute::Dereferenceable);
- CalleeAttrs.removeAttribute(Attribute::Dereferenceable);
- CallerAttrs.removeAttribute(Attribute::DereferenceableOrNull);
- CalleeAttrs.removeAttribute(Attribute::DereferenceableOrNull);
+ for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
+ Attribute::DereferenceableOrNull, Attribute::NoAlias,
+ Attribute::NonNull}) {
+ CallerAttrs.removeAttribute(Attr);
+ CalleeAttrs.removeAttribute(Attr);
+ }
if (CallerAttrs.contains(Attribute::ZExt)) {
if (!CalleeAttrs.contains(Attribute::ZExt))
diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 95d878e65be4..964cef75d164 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -61,6 +61,9 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
}
void AIXException::endFunction(const MachineFunction *MF) {
+ // There is no easy way to access register information in `AIXException`
+ // class. when ShouldEmitEHBlock is false and VRs are saved, A dumy eh info
+ // table are emitted in PPCAIXAsmPrinter::emitFunctionBodyEnd.
if (!TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF))
return;
@@ -69,8 +72,8 @@ void AIXException::endFunction(const MachineFunction *MF) {
const Function &F = MF->getFunction();
assert(F.hasPersonalityFn() &&
"Landingpads are presented, but no personality routine is found.");
- const Function *Per =
- dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ const GlobalValue *Per =
+ dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
const MCSymbol *PerSym = Asm->TM.getSymbol(Per);
emitExceptionInfoTable(LSDALabel, PerSym);
diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index b634b24377fe..db4215e92d44 100644
--- a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -39,13 +39,13 @@ void ARMException::beginFunction(const MachineFunction *MF) {
if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
getTargetStreamer().emitFnStart();
// See if we need call frame info.
- AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
- assert(MoveType != AsmPrinter::CFI_M_EH &&
+ AsmPrinter::CFISection CFISecType = Asm->getFunctionCFISectionType(*MF);
+ assert(CFISecType != AsmPrinter::CFISection::EH &&
"non-EH CFI not yet supported in prologue with EHABI lowering");
- if (MoveType == AsmPrinter::CFI_M_Debug) {
+ if (CFISecType == AsmPrinter::CFISection::Debug) {
if (!hasEmittedCFISections) {
- if (Asm->needsOnlyDebugCFIMoves())
+ if (Asm->getModuleCFISectionType() == AsmPrinter::CFISection::Debug)
Asm->OutStreamer->emitCFISections(false, true);
hasEmittedCFISections = true;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 4e45a0ffc60f..65c45f73e965 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -205,7 +205,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
: CompUnitCount(CompUnitCount), BucketCount(BucketCount),
NameCount(NameCount) {}
- void emit(const Dwarf5AccelTableWriter &Ctx) const;
+ void emit(Dwarf5AccelTableWriter &Ctx);
};
struct AttributeEncoding {
dwarf::Index Index;
@@ -216,8 +216,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
DenseMap<uint32_t, SmallVector<AttributeEncoding, 2>> Abbreviations;
ArrayRef<MCSymbol *> CompUnits;
llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry;
- MCSymbol *ContributionStart = Asm->createTempSymbol("names_start");
- MCSymbol *ContributionEnd = Asm->createTempSymbol("names_end");
+ MCSymbol *ContributionEnd = nullptr;
MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start");
MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end");
MCSymbol *EntryPool = Asm->createTempSymbol("names_entries");
@@ -240,7 +239,7 @@ public:
ArrayRef<MCSymbol *> CompUnits,
llvm::function_ref<unsigned(const DataT &)> GetCUIndexForEntry);
- void emit() const;
+ void emit();
};
} // namespace
@@ -327,9 +326,9 @@ void AppleAccelTableWriter::emitBuckets() const {
void AppleAccelTableWriter::emitData() const {
const auto &Buckets = Contents.getBuckets();
- for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (const AccelTableBase::HashList &Bucket : Buckets) {
uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
- for (auto &Hash : Buckets[i]) {
+ for (auto &Hash : Bucket) {
// Terminate the previous entry if there is no hash collision with the
// current one.
if (PrevHash != std::numeric_limits<uint64_t>::max() &&
@@ -346,7 +345,7 @@ void AppleAccelTableWriter::emitData() const {
PrevHash = Hash->HashValue;
}
// Emit the final end marker for the bucket.
- if (!Buckets[i].empty())
+ if (!Bucket.empty())
Asm->emitInt32(0);
}
}
@@ -361,14 +360,12 @@ void AppleAccelTableWriter::emit() const {
}
template <typename DataT>
-void Dwarf5AccelTableWriter<DataT>::Header::emit(
- const Dwarf5AccelTableWriter &Ctx) const {
+void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
assert(CompUnitCount > 0 && "Index must have at least one CU.");
AsmPrinter *Asm = Ctx.Asm;
- Asm->emitDwarfUnitLength(Ctx.ContributionEnd, Ctx.ContributionStart,
- "Header: unit length");
- Asm->OutStreamer->emitLabel(Ctx.ContributionStart);
+ Ctx.ContributionEnd =
+ Asm->emitDwarfUnitLength("names", "Header: unit length");
Asm->OutStreamer->AddComment("Header: version");
Asm->emitInt16(Version);
Asm->OutStreamer->AddComment("Header: padding");
@@ -526,7 +523,7 @@ Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter(
Abbreviations.try_emplace(Tag, UniformAttributes);
}
-template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() const {
+template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() {
Header.emit(*this);
emitCUList();
emitBuckets();
diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 3df8e35accc4..21da9d50efba 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -25,12 +25,9 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
- StringRef Prefix = "debug_addr_";
- MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start");
- MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end");
- Asm.emitDwarfUnitLength(EndLabel, BeginLabel, "Length of contribution");
- Asm.OutStreamer->emitLabel(BeginLabel);
+ MCSymbol *EndLabel =
+ Asm.emitDwarfUnitLength("debug_addr", "Length of contribution");
Asm.OutStreamer->AddComment("DWARF version number");
Asm.emitInt16(Asm.getDwarfVersion());
Asm.OutStreamer->AddComment("Address size");
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 85754bf29d0c..e528d33b5f8c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -38,7 +38,6 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -60,6 +59,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
@@ -68,6 +68,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GCStrategy.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
#include "llvm/IR/GlobalIndirectSymbol.h"
@@ -110,6 +111,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
@@ -296,8 +298,24 @@ bool AsmPrinter::doInitialization(Module &M) {
// don't, this at least helps the user find where a global came from.
if (MAI->hasSingleParameterDotFile()) {
// .file "foo.c"
- OutStreamer->emitFileDirective(
- llvm::sys::path::filename(M.getSourceFileName()));
+
+ SmallString<128> FileName;
+ if (MAI->hasBasenameOnlyForFileDirective())
+ FileName = llvm::sys::path::filename(M.getSourceFileName());
+ else
+ FileName = M.getSourceFileName();
+ if (MAI->hasFourStringsDotFile()) {
+#ifdef PACKAGE_VENDOR
+ const char VerStr[] =
+ PACKAGE_VENDOR " " PACKAGE_NAME " version " PACKAGE_VERSION;
+#else
+ const char VerStr[] = PACKAGE_NAME " version " PACKAGE_VERSION;
+#endif
+ // TODO: Add timestamp and description.
+ OutStreamer->emitFileDirective(FileName, VerStr, "", "");
+ } else {
+ OutStreamer->emitFileDirective(FileName);
+ }
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -341,37 +359,39 @@ bool AsmPrinter::doInitialization(Module &M) {
}
if (M.getNamedMetadata(PseudoProbeDescMetadataName)) {
- PP = new PseudoProbeHandler(this, &M);
+ PP = new PseudoProbeHandler(this);
Handlers.emplace_back(std::unique_ptr<PseudoProbeHandler>(PP), PPTimerName,
PPTimerDescription, PPGroupName, PPGroupDescription);
}
switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::None:
+ // We may want to emit CFI for debug.
+ LLVM_FALLTHROUGH;
case ExceptionHandling::SjLj:
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
- isCFIMoveForDebugging = true;
- if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
- break;
- for (auto &F: M.getFunctionList()) {
- // If the module contains any function with unwind data,
- // .eh_frame has to be emitted.
- // Ignore functions that won't get emitted.
- if (!F.isDeclarationForLinker() && F.needsUnwindTableEntry()) {
- isCFIMoveForDebugging = false;
+ for (auto &F : M.getFunctionList()) {
+ if (getFunctionCFISectionType(F) != CFISection::None)
+ ModuleCFISection = getFunctionCFISectionType(F);
+ // If any function needsUnwindTableEntry(), it needs .eh_frame and hence
+ // the module needs .eh_frame. If we have found that case, we are done.
+ if (ModuleCFISection == CFISection::EH)
break;
- }
}
+ assert(MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI ||
+ ModuleCFISection != CFISection::EH);
break;
default:
- isCFIMoveForDebugging = false;
break;
}
EHStreamer *ES = nullptr;
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
- break;
+ if (!needsCFIForDebug())
+ break;
+ LLVM_FALLTHROUGH;
case ExceptionHandling::SjLj:
case ExceptionHandling::DwarfCFI:
ES = new DwarfCFIException(this);
@@ -709,7 +729,12 @@ void AsmPrinter::emitFunctionHeader() {
emitConstantPool();
// Print the 'header' of function.
- MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM));
+ // If basic block sections are desired, explicitly request a unique section
+ // for this function's entry block.
+ if (MF->front().isBeginSection())
+ MF->setSection(getObjFileLowering().getUniqueSectionForFunction(F, TM));
+ else
+ MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM));
OutStreamer->SwitchSection(MF->getSection());
if (!MAI->hasVisibilityOnlyWithLinkage())
@@ -786,6 +811,16 @@ void AsmPrinter::emitFunctionHeader() {
// their wild and crazy things as required.
emitFunctionEntryLabel();
+ // If the function had address-taken blocks that got deleted, then we have
+ // references to the dangling symbols. Emit them at the start of the function
+ // so that we don't get references to undefined symbols.
+ std::vector<MCSymbol*> DeadBlockSyms;
+ MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
+ for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+ OutStreamer->AddComment("Address taken block that was later removed");
+ OutStreamer->emitLabel(DeadBlockSyms[i]);
+ }
+
if (CurrentFnBegin) {
if (MAI->useAssignmentForEHBegin()) {
MCSymbol *CurPos = OutContext.createTempSymbol();
@@ -819,9 +854,6 @@ void AsmPrinter::emitFunctionEntryLabel() {
if (CurrentFnSym->isVariable())
report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
"' is a protected alias");
- if (CurrentFnSym->isDefined())
- report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
- "' label emitted multiple times to assembly file");
OutStreamer->emitLabel(CurrentFnSym);
@@ -900,7 +932,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
/// means the target will need to handle MI in EmitInstruction.
static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
// This code handles only the 4-operand target-independent form.
- if (MI->getNumOperands() != 4)
+ if (MI->isNonListDebugValue() && MI->getNumOperands() != 4)
return false;
SmallString<128> Str;
@@ -916,19 +948,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << V->getName();
OS << " <- ";
- // The second operand is only an offset if it's an immediate.
- bool MemLoc = MI->isIndirectDebugValue();
- auto Offset = StackOffset::getFixed(MemLoc ? MI->getOperand(1).getImm() : 0);
const DIExpression *Expr = MI->getDebugExpression();
if (Expr->getNumElements()) {
OS << '[';
- bool NeedSep = false;
+ ListSeparator LS;
for (auto Op : Expr->expr_ops()) {
- if (NeedSep)
- OS << ", ";
- else
- NeedSep = true;
- OS << dwarf::OperationEncodingString(Op.getOp());
+ OS << LS << dwarf::OperationEncodingString(Op.getOp());
for (unsigned I = 0; I < Op.getNumArgs(); ++I)
OS << ' ' << Op.getArg(I);
}
@@ -936,56 +961,71 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
// Register or immediate value. Register 0 means undef.
- if (MI->getDebugOperand(0).isFPImm()) {
- APFloat APF = APFloat(MI->getDebugOperand(0).getFPImm()->getValueAPF());
- if (MI->getDebugOperand(0).getFPImm()->getType()->isFloatTy()) {
- OS << (double)APF.convertToFloat();
- } else if (MI->getDebugOperand(0).getFPImm()->getType()->isDoubleTy()) {
- OS << APF.convertToDouble();
- } else {
- // There is no good way to print long double. Convert a copy to
- // double. Ah well, it's only a comment.
- bool ignored;
- APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
- &ignored);
- OS << "(long double) " << APF.convertToDouble();
+ for (const MachineOperand &Op : MI->debug_operands()) {
+ if (&Op != MI->debug_operands().begin())
+ OS << ", ";
+ switch (Op.getType()) {
+ case MachineOperand::MO_FPImmediate: {
+ APFloat APF = APFloat(Op.getFPImm()->getValueAPF());
+ Type *ImmTy = Op.getFPImm()->getType();
+ if (ImmTy->isBFloatTy() || ImmTy->isHalfTy() || ImmTy->isFloatTy() ||
+ ImmTy->isDoubleTy()) {
+ OS << APF.convertToDouble();
+ } else {
+ // There is no good way to print long double. Convert a copy to
+ // double. Ah well, it's only a comment.
+ bool ignored;
+ APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &ignored);
+ OS << "(long double) " << APF.convertToDouble();
+ }
+ break;
}
- } else if (MI->getDebugOperand(0).isImm()) {
- OS << MI->getDebugOperand(0).getImm();
- } else if (MI->getDebugOperand(0).isCImm()) {
- MI->getDebugOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
- } else if (MI->getDebugOperand(0).isTargetIndex()) {
- auto Op = MI->getDebugOperand(0);
- OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")";
- // NOTE: Want this comment at start of line, don't emit with AddComment.
- AP.OutStreamer->emitRawComment(OS.str());
- return true;
- } else {
- Register Reg;
- if (MI->getDebugOperand(0).isReg()) {
- Reg = MI->getDebugOperand(0).getReg();
- } else {
- assert(MI->getDebugOperand(0).isFI() && "Unknown operand type");
- const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering();
- Offset += TFI->getFrameIndexReference(
- *AP.MF, MI->getDebugOperand(0).getIndex(), Reg);
- MemLoc = true;
+ case MachineOperand::MO_Immediate: {
+ OS << Op.getImm();
+ break;
}
- if (Reg == 0) {
- // Suppress offset, it is not meaningful here.
- OS << "undef";
+ case MachineOperand::MO_CImmediate: {
+ Op.getCImm()->getValue().print(OS, false /*isSigned*/);
+ break;
+ }
+ case MachineOperand::MO_TargetIndex: {
+ OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")";
// NOTE: Want this comment at start of line, don't emit with AddComment.
AP.OutStreamer->emitRawComment(OS.str());
- return true;
+ break;
+ }
+ case MachineOperand::MO_Register:
+ case MachineOperand::MO_FrameIndex: {
+ Register Reg;
+ Optional<StackOffset> Offset;
+ if (Op.isReg()) {
+ Reg = Op.getReg();
+ } else {
+ const TargetFrameLowering *TFI =
+ AP.MF->getSubtarget().getFrameLowering();
+ Offset = TFI->getFrameIndexReference(*AP.MF, Op.getIndex(), Reg);
+ }
+ if (!Reg) {
+ // Suppress offset, it is not meaningful here.
+ OS << "undef";
+ break;
+ }
+ // The second operand is only an offset if it's an immediate.
+ if (MI->isIndirectDebugValue())
+ Offset = StackOffset::getFixed(MI->getDebugOffset().getImm());
+ if (Offset)
+ OS << '[';
+ OS << printReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
+ if (Offset)
+ OS << '+' << Offset->getFixed() << ']';
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown operand type");
}
- if (MemLoc)
- OS << '[';
- OS << printReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
}
- if (MemLoc)
- OS << '+' << Offset.getFixed() << ']';
-
// NOTE: Want this comment at start of line, don't emit with AddComment.
AP.OutStreamer->emitRawComment(OS.str());
return true;
@@ -1016,28 +1056,44 @@ static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) {
return true;
}
-AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const {
+AsmPrinter::CFISection
+AsmPrinter::getFunctionCFISectionType(const Function &F) const {
+ // Ignore functions that won't get emitted.
+ if (F.isDeclarationForLinker())
+ return CFISection::None;
+
if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
- MF->getFunction().needsUnwindTableEntry())
- return CFI_M_EH;
+ F.needsUnwindTableEntry())
+ return CFISection::EH;
+
+ if (MMI->hasDebugInfo() || TM.Options.ForceDwarfFrameSection)
+ return CFISection::Debug;
- if (MMI->hasDebugInfo() || MF->getTarget().Options.ForceDwarfFrameSection)
- return CFI_M_Debug;
+ return CFISection::None;
+}
- return CFI_M_None;
+AsmPrinter::CFISection
+AsmPrinter::getFunctionCFISectionType(const MachineFunction &MF) const {
+ return getFunctionCFISectionType(MF.getFunction());
}
bool AsmPrinter::needsSEHMoves() {
return MAI->usesWindowsCFI() && MF->getFunction().needsUnwindTableEntry();
}
+bool AsmPrinter::needsCFIForDebug() const {
+ return MAI->getExceptionHandlingType() == ExceptionHandling::None &&
+ MAI->doesUseCFIForDebug() && ModuleCFISection == CFISection::Debug;
+}
+
void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
- if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
+ if (!needsCFIForDebug() &&
+ ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
ExceptionHandlingType != ExceptionHandling::ARM)
return;
- if (needsCFIMoves() == CFI_M_None)
+ if (getFunctionCFISectionType(*MF) == CFISection::None)
return;
// If there is no "real" instruction following this CFI instruction, skip
@@ -1068,17 +1124,19 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
/// Returns the BB metadata to be emitted in the .llvm_bb_addr_map section for a
/// given basic block. This can be used to capture more precise profile
-/// information. We use the last 3 bits (LSBs) to ecnode the following
+/// information. We use the last 4 bits (LSBs) to encode the following
/// information:
/// * (1): set if return block (ret or tail call).
/// * (2): set if ends with a tail call.
/// * (3): set if exception handling (EH) landing pad.
+/// * (4): set if the block can fall through to its next.
/// The remaining bits are zero.
static unsigned getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
return ((unsigned)MBB.isReturnBlock()) |
((!MBB.empty() && TII->isTailCall(MBB.back())) << 1) |
- (MBB.isEHPad() << 2);
+ (MBB.isEHPad() << 2) |
+ (const_cast<MachineBasicBlock &>(MBB).canFallThrough() << 3);
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
@@ -1141,6 +1199,37 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
OutStreamer->PopSection();
}
+void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
+ const std::string &OutputFilename = MF.getTarget().Options.StackUsageOutput;
+
+ // OutputFilename empty implies -fstack-usage is not passed.
+ if (OutputFilename.empty())
+ return;
+
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ uint64_t StackSize = FrameInfo.getStackSize();
+
+ if (StackUsageStream == nullptr) {
+ std::error_code EC;
+ StackUsageStream =
+ std::make_unique<raw_fd_ostream>(OutputFilename, EC, sys::fs::OF_Text);
+ if (EC) {
+ errs() << "Could not open file: " << EC.message();
+ return;
+ }
+ }
+
+ *StackUsageStream << MF.getFunction().getParent()->getName();
+ if (const DISubprogram *DSP = MF.getFunction().getSubprogram())
+ *StackUsageStream << ':' << DSP->getLine();
+
+ *StackUsageStream << ':' << MF.getName() << '\t' << StackSize << '\t';
+ if (FrameInfo.hasVarSizedObjects())
+ *StackUsageStream << "dynamic\n";
+ else
+ *StackUsageStream << "static\n";
+}
+
static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF) {
MachineModuleInfo &MMI = MF.getMMI();
if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI.hasDebugInfo())
@@ -1227,6 +1316,7 @@ void AsmPrinter::emitFunctionBody() {
emitInlineAsm(&MI);
break;
case TargetOpcode::DBG_VALUE:
+ case TargetOpcode::DBG_VALUE_LIST:
if (isVerbose()) {
if (!emitDebugValueComment(&MI, *this))
emitInstruction(&MI);
@@ -1237,6 +1327,10 @@ void AsmPrinter::emitFunctionBody() {
// location, and a nearby DBG_VALUE created. We can safely ignore
// the instruction reference.
break;
+ case TargetOpcode::DBG_PHI:
+ // This instruction is only used to label a program point, it's purely
+ // meta information.
+ break;
case TargetOpcode::DBG_LABEL:
if (isVerbose()) {
if (!emitDebugLabelComment(&MI, *this))
@@ -1252,6 +1346,10 @@ void AsmPrinter::emitFunctionBody() {
case TargetOpcode::PSEUDO_PROBE:
emitPseudoProbe(MI);
break;
+ case TargetOpcode::ARITH_FENCE:
+ if (isVerbose())
+ OutStreamer->emitRawComment("ARITH_FENCE");
+ break;
default:
emitInstruction(&MI);
if (CanDoExtraAnalysis) {
@@ -1277,11 +1375,9 @@ void AsmPrinter::emitFunctionBody() {
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
- // section (except the section containing the entry basic block as the end
- // symbol for that section is CurrentFnEnd).
+ // section.
if (MF->hasBBLabels() ||
- (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection() &&
- !MBB.sameSection(&MF->front())))
+ (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection()))
OutStreamer->emitLabel(MBB.getEndSymbol());
if (MBB.isEndSection()) {
@@ -1352,8 +1448,7 @@ void AsmPrinter::emitFunctionBody() {
const Triple &TT = TM.getTargetTriple();
if (!HasAnyRealCode && (MAI->hasSubsectionsViaSymbols() ||
(TT.isOSWindows() && TT.isOSBinFormatCOFF()))) {
- MCInst Noop;
- MF->getSubtarget().getInstrInfo()->getNoop(Noop);
+ MCInst Noop = MF->getSubtarget().getInstrInfo()->getNop();
// Targets can opt-out of emitting the noop here by leaving the opcode
// unspecified.
@@ -1418,13 +1513,16 @@ void AsmPrinter::emitFunctionBody() {
}
// Emit section containing BB address offsets and their metadata, when
- // BB labels are requested for this function.
- if (MF->hasBBLabels())
+ // BB labels are requested for this function. Skip empty functions.
+ if (MF->hasBBLabels() && HasAnyRealCode)
emitBBAddrMapSection(*MF);
// Emit section containing stack size metadata.
emitStackSizeSection(*MF);
+ // Emit .su file containing function stack size information.
+ emitStackUsage(*MF);
+
emitPatchableFunctionEntries();
if (isVerbose())
@@ -1600,7 +1698,7 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
std::string Buf;
raw_string_ostream OS(Buf);
std::unique_ptr<remarks::MetaSerializer> MetaSerializer =
- Filename ? RemarkSerializer.metaSerializer(OS, StringRef(*Filename))
+ Filename ? RemarkSerializer.metaSerializer(OS, Filename->str())
: RemarkSerializer.metaSerializer(OS);
MetaSerializer->emit();
@@ -1814,11 +1912,12 @@ bool AsmPrinter::doFinalization(Module &M) {
if (TM.Options.EmitAddrsig) {
// Emit address-significance attributes for all globals.
OutStreamer->emitAddrsig();
- for (const GlobalValue &GV : M.global_values())
- if (!GV.use_empty() && !GV.isThreadLocal() &&
- !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") &&
- !GV.hasAtLeastLocalUnnamedAddr())
+ for (const GlobalValue &GV : M.global_values()) {
+ if (!GV.use_empty() && !GV.isTransitiveUsedByMetadataOnly() &&
+ !GV.isThreadLocal() && !GV.hasDLLImportStorageClass() &&
+ !GV.getName().startswith("llvm.") && !GV.hasAtLeastLocalUnnamedAddr())
OutStreamer->emitAddrsigSym(getSymbol(&GV));
+ }
}
// Emit symbol partition specifications (ELF only).
@@ -1831,7 +1930,7 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer->SwitchSection(
OutContext.getELFSection(".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0,
- "", ++UniqueID, nullptr));
+ "", false, ++UniqueID, nullptr));
OutStreamer->emitBytes(GV.getPartition());
OutStreamer->emitZeros(1);
OutStreamer->emitValue(
@@ -2225,6 +2324,11 @@ void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
if (Structors.empty())
return;
+ // Emit the structors in reverse order if we are using the .ctor/.dtor
+ // initialization scheme.
+ if (!TM.Options.UseInitArray)
+ std::reverse(Structors.begin(), Structors.end());
+
const Align Align = DL.getPointerPrefAlignment();
for (Structor &S : Structors) {
const TargetLoweringObjectFile &Obj = getObjFileLowering();
@@ -2992,8 +3096,7 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
}
void AsmPrinter::emitNops(unsigned N) {
- MCInst Nop;
- MF->getSubtarget().getInstrInfo()->getNoop(Nop);
+ MCInst Nop = MF->getSubtarget().getInstrInfo()->getNop();
for (; N; --N)
EmitToStreamer(*OutStreamer, Nop);
}
@@ -3201,6 +3304,11 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
}
}
+ if (MBB.isEHCatchretTarget() &&
+ MAI->getExceptionHandlingType() == ExceptionHandling::WinEH) {
+ OutStreamer->emitLabel(MBB.getEHCatchretSymbol());
+ }
+
// With BB sections, each basic block must handle CFI information on its own
// if it begins a section (Entry block is handled separately by
// AsmPrinterHandler::beginFunction).
@@ -3378,13 +3486,13 @@ void AsmPrinter::emitXRayTable() {
GroupName = F.getComdat()->getName();
}
InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
- Flags, 0, GroupName,
+ Flags, 0, GroupName, F.hasComdat(),
MCSection::NonUniqueID, LinkedToSym);
if (!TM.Options.XRayOmitFunctionIndex)
FnSledIndex = OutContext.getELFSection(
"xray_fn_idx", ELF::SHT_PROGBITS, Flags | ELF::SHF_WRITE, 0,
- GroupName, MCSection::NonUniqueID, LinkedToSym);
+ GroupName, F.hasComdat(), MCSection::NonUniqueID, LinkedToSym);
} else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
SectionKind::getReadOnlyWithRel());
@@ -3468,9 +3576,9 @@ void AsmPrinter::emitPatchableFunctionEntries() {
const MCSymbolELF *LinkedToSym = nullptr;
StringRef GroupName;
- // GNU as < 2.35 did not support section flag 'o'. Use SHF_LINK_ORDER only
- // if we are using the integrated assembler.
- if (MAI->useIntegratedAssembler()) {
+ // GNU as < 2.35 did not support section flag 'o'. GNU ld < 2.36 did not
+ // support mixed SHF_LINK_ORDER and non-SHF_LINK_ORDER sections.
+ if (MAI->useIntegratedAssembler() || MAI->binutilsIsAtLeast(2, 36)) {
Flags |= ELF::SHF_LINK_ORDER;
if (F.hasComdat()) {
Flags |= ELF::SHF_GROUP;
@@ -3480,7 +3588,7 @@ void AsmPrinter::emitPatchableFunctionEntries() {
}
OutStreamer->SwitchSection(OutContext.getELFSection(
"__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, GroupName,
- MCSection::NonUniqueID, LinkedToSym));
+ F.hasComdat(), MCSection::NonUniqueID, LinkedToSym));
emitAlignment(Align(PointerSize));
OutStreamer->emitSymbolValue(CurrentPatchableFunctionEntrySym, PointerSize);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index c6e43445e7d0..fc127f4cf9da 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -198,26 +198,14 @@ void AsmPrinter::emitDwarfLengthOrOffset(uint64_t Value) const {
OutStreamer->emitIntValue(Value, getDwarfOffsetByteSize());
}
-void AsmPrinter::maybeEmitDwarf64Mark() const {
- if (!isDwarf64())
- return;
- OutStreamer->AddComment("DWARF64 Mark");
- OutStreamer->emitInt32(dwarf::DW_LENGTH_DWARF64);
-}
-
void AsmPrinter::emitDwarfUnitLength(uint64_t Length,
const Twine &Comment) const {
- assert(isDwarf64() || Length <= dwarf::DW_LENGTH_lo_reserved);
- maybeEmitDwarf64Mark();
- OutStreamer->AddComment(Comment);
- OutStreamer->emitIntValue(Length, getDwarfOffsetByteSize());
+ OutStreamer->emitDwarfUnitLength(Length, Comment);
}
-void AsmPrinter::emitDwarfUnitLength(const MCSymbol *Hi, const MCSymbol *Lo,
- const Twine &Comment) const {
- maybeEmitDwarf64Mark();
- OutStreamer->AddComment(Comment);
- OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, getDwarfOffsetByteSize());
+MCSymbol *AsmPrinter::emitDwarfUnitLength(const Twine &Prefix,
+ const Twine &Comment) const {
+ return OutStreamer->emitDwarfUnitLength(Prefix, Comment);
}
void AsmPrinter::emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
@@ -257,6 +245,10 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpDefCfaRegister:
OutStreamer->emitCFIDefCfaRegister(Inst.getRegister());
break;
+ case MCCFIInstruction::OpLLVMDefAspaceCfa:
+ OutStreamer->emitCFILLVMDefAspaceCfa(Inst.getRegister(), Inst.getOffset(),
+ Inst.getAddressSpace());
+ break;
case MCCFIInstruction::OpOffset:
OutStreamer->emitCFIOffset(Inst.getRegister(), Inst.getOffset());
break;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 4a67b0bc2c4d..4a93181f5439 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
@@ -39,54 +40,12 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an
-/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
-/// struct above.
-static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
- AsmPrinter::SrcMgrDiagInfo *DiagInfo =
- static_cast<AsmPrinter::SrcMgrDiagInfo *>(diagInfo);
- assert(DiagInfo && "Diagnostic context not passed down?");
-
- // Look up a LocInfo for the buffer this diagnostic is coming from.
- unsigned BufNum = DiagInfo->SrcMgr.FindBufferContainingLoc(Diag.getLoc());
- const MDNode *LocInfo = nullptr;
- if (BufNum > 0 && BufNum <= DiagInfo->LocInfos.size())
- LocInfo = DiagInfo->LocInfos[BufNum-1];
-
- // If the inline asm had metadata associated with it, pull out a location
- // cookie corresponding to which line the error occurred on.
- unsigned LocCookie = 0;
- if (LocInfo) {
- unsigned ErrorLine = Diag.getLineNo()-1;
- if (ErrorLine >= LocInfo->getNumOperands())
- ErrorLine = 0;
-
- if (LocInfo->getNumOperands() != 0)
- if (const ConstantInt *CI =
- mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine)))
- LocCookie = CI->getZExtValue();
- }
-
- DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
-}
-
unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr,
const MDNode *LocMDNode) const {
- if (!DiagInfo) {
- DiagInfo = std::make_unique<SrcMgrDiagInfo>();
-
- MCContext &Context = MMI->getContext();
- Context.setInlineSourceManager(&DiagInfo->SrcMgr);
-
- LLVMContext &LLVMCtx = MMI->getModule()->getContext();
- if (LLVMCtx.getInlineAsmDiagnosticHandler()) {
- DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
- DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
- DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get());
- }
- }
-
- SourceMgr &SrcMgr = DiagInfo->SrcMgr;
+ MCContext &Context = MMI->getContext();
+ Context.initInlineSourceManager();
+ SourceMgr &SrcMgr = *Context.getInlineSourceManager();
+ std::vector<const MDNode *> &LocInfos = Context.getLocInfos();
std::unique_ptr<MemoryBuffer> Buffer;
// The inline asm source manager will outlive AsmStr, so make a copy of the
@@ -98,8 +57,8 @@ unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr,
// Store LocMDNode in DiagInfo, using BufNum as an identifier.
if (LocMDNode) {
- DiagInfo->LocInfos.resize(BufNum);
- DiagInfo->LocInfos[BufNum - 1] = LocMDNode;
+ LocInfos.resize(BufNum);
+ LocInfos[BufNum - 1] = LocMDNode;
}
return BufNum;
@@ -119,13 +78,14 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
Str = Str.substr(0, Str.size()-1);
// If the output streamer does not have mature MC support or the integrated
- // assembler has been disabled, just emit the blob textually.
+ // assembler has been disabled or not required, just emit the blob textually.
// Otherwise parse the asm and emit it via MC support.
// This is useful in case the asm parser doesn't handle something but the
// system assembler does.
const MCAsmInfo *MCAI = TM.getMCAsmInfo();
assert(MCAI && "No MCAsmInfo");
if (!MCAI->useIntegratedAssembler() &&
+ !MCAI->parseInlineAsmUsingAsmParser() &&
!OutStreamer->isIntegratedAssemblerRequired()) {
emitInlineAsmStart();
OutStreamer->emitRawText(Str);
@@ -134,10 +94,11 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
}
unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode);
- DiagInfo->SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+ SourceMgr &SrcMgr = *MMI->getContext().getInlineSourceManager();
+ SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
- std::unique_ptr<MCAsmParser> Parser(createMCAsmParser(
- DiagInfo->SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
+ std::unique_ptr<MCAsmParser> Parser(
+ createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
// Do not use assembler-level information for parsing inline assembly.
OutStreamer->setUseAssemblerInfoForParsing(false);
@@ -162,17 +123,14 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
emitInlineAsmStart();
// Don't implicitly switch to the text section before the asm.
- int Res = Parser->Run(/*NoInitialTextSection*/ true,
- /*NoFinalize*/ true);
+ (void)Parser->Run(/*NoInitialTextSection*/ true,
+ /*NoFinalize*/ true);
emitInlineAsmEnd(STI, &TAP->getSTI());
-
- if (Res && !DiagInfo->DiagHandler)
- report_fatal_error("Error parsing inline asm\n");
}
static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
MachineModuleInfo *MMI, AsmPrinter *AP,
- unsigned LocCookie, raw_ostream &OS) {
+ uint64_t LocCookie, raw_ostream &OS) {
// Switch to the inline assembly variant.
OS << "\t.intel_syntax\n\t";
@@ -313,14 +271,16 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
}
static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, int AsmPrinterVariant,
- AsmPrinter *AP, unsigned LocCookie,
+ MachineModuleInfo *MMI, const MCAsmInfo *MAI,
+ AsmPrinter *AP, uint64_t LocCookie,
raw_ostream &OS) {
int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
- OS << '\t';
+ if (MAI->getEmitGNUAsmStartIndentationMarker())
+ OS << '\t';
while (*LastEmitted) {
switch (*LastEmitted) {
@@ -523,7 +483,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
// Get the !srcloc metadata node if we have it, and decode the loc cookie from
// it.
- unsigned LocCookie = 0;
+ uint64_t LocCookie = 0;
const MDNode *LocMD = nullptr;
for (unsigned i = MI->getNumOperands(); i != 0; --i) {
if (MI->getOperand(i-1).isMetadata() &&
@@ -542,11 +502,9 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
SmallString<256> StringData;
raw_svector_ostream OS(StringData);
- // The variant of the current asmprinter.
- int AsmPrinterVariant = MAI->getAssemblerDialect();
AsmPrinter *AP = const_cast<AsmPrinter*>(this);
if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
- EmitGCCInlineAsmStr(AsmStr, MI, MMI, AsmPrinterVariant, AP, LocCookie, OS);
+ EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
else
EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS);
@@ -571,23 +529,20 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
}
if (!RestrRegs.empty()) {
- unsigned BufNum = addInlineAsmDiagBuffer(OS.str(), LocMD);
- auto &SrcMgr = DiagInfo->SrcMgr;
- SMLoc Loc = SMLoc::getFromPointer(
- SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin());
-
std::string Msg = "inline asm clobber list contains reserved registers: ";
- for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; ++I) {
- if(I != RestrRegs.begin())
- Msg += ", ";
- Msg += TRI->getName(*I);
+ ListSeparator LS;
+ for (const Register &RR : RestrRegs) {
+ Msg += LS;
+ Msg += TRI->getName(RR);
}
const char *Note =
"Reserved registers on the clobber list may not be "
"preserved across the asm statement, and clobbering them may "
"lead to undefined behaviour.";
- SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg);
- SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note);
+ MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm(
+ LocCookie, Msg.c_str(), DiagnosticSeverity::DS_Warning));
+ MMI->getModule()->getContext().diagnose(
+ DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note));
}
emitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD,
@@ -633,7 +588,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
void AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS) {
assert(MO.isGlobal() && "caller should check MO.isGlobal");
- getSymbol(MO.getGlobal())->print(OS, MAI);
+ getSymbolPreferLocal(*MO.getGlobal())->print(OS, MAI);
printOffset(MO.getOffset(), OS);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index b15e750aaf85..bbb0504550c3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -273,9 +273,9 @@ static StringRef getPrettyScopeName(const DIScope *Scope) {
return "<unnamed-tag>";
case dwarf::DW_TAG_namespace:
return "`anonymous namespace'";
+ default:
+ return StringRef();
}
-
- return StringRef();
}
const DISubprogram *CodeViewDebug::collectParentScopeNames(
@@ -358,6 +358,25 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
return recordTypeIndexForDINode(Scope, TI);
}
+static StringRef removeTemplateArgs(StringRef Name) {
+ // Remove template args from the display name. Assume that the template args
+ // are the last thing in the name.
+ if (Name.empty() || Name.back() != '>')
+ return Name;
+
+ int OpenBrackets = 0;
+ for (int i = Name.size() - 1; i >= 0; --i) {
+ if (Name[i] == '>')
+ ++OpenBrackets;
+ else if (Name[i] == '<') {
+ --OpenBrackets;
+ if (OpenBrackets == 0)
+ return Name.substr(0, i);
+ }
+ }
+ return Name;
+}
+
TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
assert(SP);
@@ -367,8 +386,9 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
return I->second;
// The display name includes function template arguments. Drop them to match
- // MSVC.
- StringRef DisplayName = SP->getName().split('<').first;
+ // MSVC. We need to have the template arguments in the DISubprogram name
+ // because they are used in other symbol records, such as S_GPROC32_IDs.
+ StringRef DisplayName = removeTemplateArgs(SP->getName());
const DIScope *Scope = SP->getScope();
TypeIndex TI;
@@ -784,6 +804,9 @@ void CodeViewDebug::emitCompilerInformation() {
// The low byte of the flags indicates the source language.
Flags = MapDWLangToCVLang(CU->getSourceLanguage());
// TODO: Figure out which other flags need to be set.
+ if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) {
+ Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO);
+ }
OS.AddComment("Flags and language");
OS.emitInt32(Flags);
@@ -794,8 +817,8 @@ void CodeViewDebug::emitCompilerInformation() {
StringRef CompilerVersion = CU->getProducer();
Version FrontVer = parseVersion(CompilerVersion);
OS.AddComment("Frontend version");
- for (int N = 0; N < 4; ++N)
- OS.emitInt16(FrontVer.Part[N]);
+ for (int N : FrontVer.Part)
+ OS.emitInt16(N);
// Some Microsoft tools, like Binscope, expect a backend version number of at
// least 8.something, so we'll coerce the LLVM version into a form that
@@ -807,8 +830,8 @@ void CodeViewDebug::emitCompilerInformation() {
Major = std::min<int>(Major, std::numeric_limits<uint16_t>::max());
Version BackVer = {{ Major, 0, 0, 0 }};
OS.AddComment("Backend version");
- for (int N = 0; N < 4; ++N)
- OS.emitInt16(BackVer.Part[N]);
+ for (int N : BackVer.Part)
+ OS.emitInt16(N);
OS.AddComment("Null-terminated compiler version string");
emitNullTerminatedSymbolName(OS, CompilerVersion);
@@ -1357,7 +1380,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn->CSRSize = MFI.getCVBytesOfCalleeSavedRegisters();
CurFn->FrameSize = MFI.getStackSize();
CurFn->OffsetAdjustment = MFI.getOffsetAdjustment();
- CurFn->HasStackRealignment = TRI->needsStackRealignment(*MF);
+ CurFn->HasStackRealignment = TRI->hasStackRealignment(*MF);
// For this function S_FRAMEPROC record, figure out which codeview register
// will be the frame pointer.
@@ -1408,6 +1431,10 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
if (Asm->TM.getOptLevel() != CodeGenOpt::None &&
!GV.hasOptSize() && !GV.hasOptNone())
FPO |= FrameProcedureOptions::OptimizedForSpeed;
+ if (GV.hasProfileData()) {
+ FPO |= FrameProcedureOptions::ValidProfileCounts;
+ FPO |= FrameProcedureOptions::ProfileGuidedOptimization;
+ }
// FIXME: Set GuardCfg when it is implemented.
CurFn->FrameProcOpts = FPO;
@@ -1460,6 +1487,9 @@ static bool shouldEmitUdt(const DIType *T) {
case dwarf::DW_TAG_class_type:
case dwarf::DW_TAG_union_type:
return false;
+ default:
+ // do nothing.
+ ;
}
}
}
@@ -2005,10 +2035,13 @@ static MethodKind translateMethodKindFlags(const DISubprogram *SP,
static TypeRecordKind getRecordKind(const DICompositeType *Ty) {
switch (Ty->getTag()) {
- case dwarf::DW_TAG_class_type: return TypeRecordKind::Class;
- case dwarf::DW_TAG_structure_type: return TypeRecordKind::Struct;
+ case dwarf::DW_TAG_class_type:
+ return TypeRecordKind::Class;
+ case dwarf::DW_TAG_structure_type:
+ return TypeRecordKind::Struct;
+ default:
+ llvm_unreachable("unexpected tag");
}
- llvm_unreachable("unexpected tag");
}
/// Return ClassOptions that should be present on both the forward declaration
@@ -2083,6 +2116,7 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
// We assume that the frontend provides all members in source declaration
// order, which is what MSVC does.
if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) {
+ // FIXME: Is it correct to always emit these as unsigned here?
EnumeratorRecord ER(MemberAccess::Public,
APSInt(Enumerator->getValue(), true),
Enumerator->getName());
@@ -3124,6 +3158,27 @@ void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
}
}
+void CodeViewDebug::emitConstantSymbolRecord(const DIType *DTy, APSInt &Value,
+ const std::string &QualifiedName) {
+ MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
+ OS.AddComment("Type");
+ OS.emitInt32(getTypeIndex(DTy).getIndex());
+
+ OS.AddComment("Value");
+
+ // Encoded integers shouldn't need more than 10 bytes.
+ uint8_t Data[10];
+ BinaryStreamWriter Writer(Data, llvm::support::endianness::little);
+ CodeViewRecordIO IO(Writer);
+ cantFail(IO.mapEncodedInteger(Value));
+ StringRef SRef((char *)Data, Writer.getOffset());
+ OS.emitBinaryData(SRef);
+
+ OS.AddComment("Name");
+ emitNullTerminatedSymbolName(OS, QualifiedName);
+ endSymbolRecord(SConstantEnd);
+}
+
void CodeViewDebug::emitStaticConstMemberList() {
for (const DIDerivedType *DTy : StaticConstMembers) {
const DIScope *Scope = DTy->getScope();
@@ -3139,24 +3194,8 @@ void CodeViewDebug::emitStaticConstMemberList() {
else
llvm_unreachable("cannot emit a constant without a value");
- std::string QualifiedName = getFullyQualifiedName(Scope, DTy->getName());
-
- MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
- OS.AddComment("Type");
- OS.emitInt32(getTypeIndex(DTy->getBaseType()).getIndex());
- OS.AddComment("Value");
-
- // Encoded integers shouldn't need more than 10 bytes.
- uint8_t Data[10];
- BinaryStreamWriter Writer(Data, llvm::support::endianness::little);
- CodeViewRecordIO IO(Writer);
- cantFail(IO.mapEncodedInteger(Value));
- StringRef SRef((char *)Data, Writer.getOffset());
- OS.emitBinaryData(SRef);
-
- OS.AddComment("Name");
- emitNullTerminatedSymbolName(OS, QualifiedName);
- endSymbolRecord(SConstantEnd);
+ emitConstantSymbolRecord(DTy->getBaseType(), Value,
+ getFullyQualifiedName(Scope, DTy->getName()));
}
}
@@ -3220,22 +3259,6 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
? true
: DebugHandlerBase::isUnsignedDIType(DIGV->getType());
APSInt Value(APInt(/*BitWidth=*/64, DIE->getElement(1)), isUnsigned);
-
- MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
- OS.AddComment("Type");
- OS.emitInt32(getTypeIndex(DIGV->getType()).getIndex());
- OS.AddComment("Value");
-
- // Encoded integers shouldn't need more than 10 bytes.
- uint8_t data[10];
- BinaryStreamWriter Writer(data, llvm::support::endianness::little);
- CodeViewRecordIO IO(Writer);
- cantFail(IO.mapEncodedInteger(Value));
- StringRef SRef((char *)data, Writer.getOffset());
- OS.emitBinaryData(SRef);
-
- OS.AddComment("Name");
- emitNullTerminatedSymbolName(OS, QualifiedName);
- endSymbolRecord(SConstantEnd);
+ emitConstantSymbolRecord(DIGV->getType(), Value, QualifiedName);
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 9eee5492bc81..d133474ee5aa 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -315,6 +315,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void collectDebugInfoForGlobals();
void emitDebugInfoForGlobals();
void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
+ void emitConstantSymbolRecord(const DIType *DTy, APSInt &Value,
+ const std::string &QualifiedName);
void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV);
void emitStaticConstMemberList();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 39b0b027c765..2834d9c3ebbf 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -785,6 +785,7 @@ void DIEBlock::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break;
case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break;
case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break;
+ case dwarf::DW_FORM_exprloc:
case dwarf::DW_FORM_block:
Asm->emitULEB128(Size);
break;
@@ -803,6 +804,7 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_exprloc:
case dwarf::DW_FORM_block: return Size + getULEB128Size(Size);
case dwarf::DW_FORM_data16: return 16;
default: llvm_unreachable("Improper form for block");
@@ -853,3 +855,27 @@ void DIELocList::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
LLVM_DUMP_METHOD
void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; }
+
+//===----------------------------------------------------------------------===//
+// DIEAddrOffset Implementation
+//===----------------------------------------------------------------------===//
+
+unsigned DIEAddrOffset::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ return Addr.SizeOf(AP, dwarf::DW_FORM_addrx) +
+ Offset.SizeOf(AP, dwarf::DW_FORM_data4);
+}
+
+/// EmitValue - Emit label value.
+///
+void DIEAddrOffset::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ Addr.emitValue(AP, dwarf::DW_FORM_addrx);
+ Offset.emitValue(AP, dwarf::DW_FORM_data4);
+}
+
+LLVM_DUMP_METHOD
+void DIEAddrOffset::print(raw_ostream &O) const {
+ O << "AddrOffset: ";
+ Addr.print(O);
+ O << " + ";
+ Offset.print(O);
+}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index da9997efc01f..802f0e880514 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -319,6 +319,7 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
case DIEValue::isLabel:
case DIEValue::isBaseTypeRef:
case DIEValue::isDelta:
+ case DIEValue::isAddrOffset:
llvm_unreachable("Add support for additional value types.");
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 1c9131edab83..bb24f1414ef1 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -37,22 +37,6 @@ namespace {
using EntryIndex = DbgValueHistoryMap::EntryIndex;
}
-// If @MI is a DBG_VALUE with debug value described by a
-// defined register, returns the number of this register.
-// In the other case, returns 0.
-static Register isDescribedByReg(const MachineInstr &MI) {
- assert(MI.isDebugValue());
- assert(MI.getNumOperands() == 4);
- // If the location of variable is an entry value (DW_OP_LLVM_entry_value)
- // do not consider it as a register location.
- if (MI.getDebugExpression()->isEntryValue())
- return 0;
- // If location of variable is described using a register (directly or
- // indirectly), this register is always a first operand.
- return MI.getDebugOperand(0).isReg() ? MI.getDebugOperand(0).getReg()
- : Register();
-}
-
void InstructionOrdering::initialize(const MachineFunction &MF) {
// We give meta instructions the same ordinal as the preceding instruction
// because this class is written for the task of comparing positions of
@@ -273,6 +257,23 @@ void DbgValueHistoryMap::trimLocationRanges(
}
}
+bool DbgValueHistoryMap::hasNonEmptyLocation(const Entries &Entries) const {
+ for (const auto &Entry : Entries) {
+ if (!Entry.isDbgValue())
+ continue;
+
+ const MachineInstr *MI = Entry.getInstr();
+ assert(MI->isDebugValue());
+ // A DBG_VALUE $noreg is an empty variable location
+ if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0)
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
assert(MI.isDebugLabel() && "not a DBG_LABEL");
LabelInstr[Label] = &MI;
@@ -316,24 +317,44 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
}
/// Create a clobbering entry and end all open debug value entries
-/// for \p Var that are described by \p RegNo using that entry.
+/// for \p Var that are described by \p RegNo using that entry. Inserts into \p
+/// FellowRegisters the set of Registers that were also used to describe \p Var
+/// alongside \p RegNo.
static void clobberRegEntries(InlinedEntity Var, unsigned RegNo,
const MachineInstr &ClobberingInstr,
DbgValueEntriesMap &LiveEntries,
- DbgValueHistoryMap &HistMap) {
+ DbgValueHistoryMap &HistMap,
+ SmallVectorImpl<Register> &FellowRegisters) {
EntryIndex ClobberIndex = HistMap.startClobber(Var, ClobberingInstr);
-
// Close all entries whose values are described by the register.
SmallVector<EntryIndex, 4> IndicesToErase;
+ // If a given register appears in a live DBG_VALUE_LIST for Var alongside the
+ // clobbered register, and never appears in a live DBG_VALUE* for Var without
+ // the clobbered register, then it is no longer linked to the variable.
+ SmallSet<Register, 4> MaybeRemovedRegisters;
+ SmallSet<Register, 4> KeepRegisters;
for (auto Index : LiveEntries[Var]) {
auto &Entry = HistMap.getEntry(Var, Index);
assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries");
- if (isDescribedByReg(*Entry.getInstr()) == RegNo) {
+ if (Entry.getInstr()->isDebugEntryValue())
+ continue;
+ if (Entry.getInstr()->hasDebugOperandForReg(RegNo)) {
IndicesToErase.push_back(Index);
Entry.endEntry(ClobberIndex);
+ for (auto &MO : Entry.getInstr()->debug_operands())
+ if (MO.isReg() && MO.getReg() && MO.getReg() != RegNo)
+ MaybeRemovedRegisters.insert(MO.getReg());
+ } else {
+ for (auto &MO : Entry.getInstr()->debug_operands())
+ if (MO.isReg() && MO.getReg())
+ KeepRegisters.insert(MO.getReg());
}
}
+ for (Register Reg : MaybeRemovedRegisters)
+ if (!KeepRegisters.contains(Reg))
+ FellowRegisters.push_back(Reg);
+
// Drop all entries that have ended.
for (auto Index : IndicesToErase)
LiveEntries[Var].erase(Index);
@@ -361,17 +382,24 @@ static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV,
IndicesToErase.push_back(Index);
Entry.endEntry(NewIndex);
}
- if (Register Reg = isDescribedByReg(DV))
- TrackedRegs[Reg] |= !Overlaps;
+ if (!DV.isDebugEntryValue())
+ for (const MachineOperand &Op : DV.debug_operands())
+ if (Op.isReg() && Op.getReg())
+ TrackedRegs[Op.getReg()] |= !Overlaps;
}
// If the new debug value is described by a register, add tracking of
// that register if it is not already tracked.
- if (Register NewReg = isDescribedByReg(DV)) {
- if (!TrackedRegs.count(NewReg))
- addRegDescribedVar(RegVars, NewReg, Var);
- LiveEntries[Var].insert(NewIndex);
- TrackedRegs[NewReg] = true;
+ if (!DV.isDebugEntryValue()) {
+ for (const MachineOperand &Op : DV.debug_operands()) {
+ if (Op.isReg() && Op.getReg()) {
+ Register NewReg = Op.getReg();
+ if (!TrackedRegs.count(NewReg))
+ addRegDescribedVar(RegVars, NewReg, Var);
+ LiveEntries[Var].insert(NewIndex);
+ TrackedRegs[NewReg] = true;
+ }
+ }
}
// Drop tracking of registers that are no longer used.
@@ -394,9 +422,16 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
DbgValueEntriesMap &LiveEntries,
const MachineInstr &ClobberingInstr) {
// Iterate over all variables described by this register and add this
- // instruction to their history, clobbering it.
- for (const auto &Var : I->second)
- clobberRegEntries(Var, I->first, ClobberingInstr, LiveEntries, HistMap);
+ // instruction to their history, clobbering it. All registers that also
+ // describe the clobbered variables (i.e. in variadic debug values) will have
+ // those Variables removed from their DescribedVars.
+ for (const auto &Var : I->second) {
+ SmallVector<Register, 4> FellowRegisters;
+ clobberRegEntries(Var, I->first, ClobberingInstr, LiveEntries, HistMap,
+ FellowRegisters);
+ for (Register RegNo : FellowRegisters)
+ dropRegDescribedVar(RegVars, RegNo, Var);
+ }
RegVars.erase(I);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 68a4bfba42a7..c81288c0e460 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -35,7 +35,8 @@ Optional<DbgVariableLocation>
DbgVariableLocation::extractFromMachineInstruction(
const MachineInstr &Instruction) {
DbgVariableLocation Location;
- if (!Instruction.isDebugValue())
+ // Variables calculated from multiple locations can't be represented here.
+ if (Instruction.getNumDebugOperands() != 1)
return None;
if (!Instruction.getDebugOperand(0).isReg())
return None;
@@ -46,6 +47,15 @@ DbgVariableLocation::extractFromMachineInstruction(
int64_t Offset = 0;
const DIExpression *DIExpr = Instruction.getDebugExpression();
auto Op = DIExpr->expr_op_begin();
+ // We can handle a DBG_VALUE_LIST iff it has exactly one location operand that
+ // appears exactly once at the start of the expression.
+ if (Instruction.isDebugValueList()) {
+ if (Instruction.getNumDebugOperands() == 1 &&
+ Op->getOp() == dwarf::DW_OP_LLVM_arg)
+ ++Op;
+ else
+ return None;
+ }
while (Op != DIExpr->expr_op_end()) {
switch (Op->getOp()) {
case dwarf::DW_OP_constu: {
@@ -164,6 +174,12 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
}
bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
+ // SROA may generate dbg value intrinsics to assign an unsigned value to a
+ // Fortran CHARACTER(1) type variables. Make them as unsigned.
+ if (isa<DIStringType>(Ty)) {
+ assert((Ty->getSizeInBits()) == 8 && "Not a valid unsigned type!");
+ return true;
+ }
if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
// FIXME: Enums without a fixed underlying type have unknown signedness
// here, leading to incorrectly emitted constants.
@@ -261,7 +277,8 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
continue;
auto IsDescribedByReg = [](const MachineInstr *MI) {
- return MI->getDebugOperand(0).isReg() && MI->getDebugOperand(0).getReg();
+ return any_of(MI->debug_operands(),
+ [](auto &MO) { return MO.isReg() && MO.getReg(); });
};
// The first mention of a function argument gets the CurrentFnBegin label,
@@ -273,16 +290,10 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
// doing that violates the ranges that are calculated in the history map.
// However, we currently do not emit debug values for constant arguments
// directly at the start of the function, so this code is still useful.
- // FIXME: If the first mention of an argument is in a unique section basic
- // block, we cannot always assign the CurrentFnBeginLabel as it lies in a
- // different section. Temporarily, we disable generating loc list
- // information or DW_AT_const_value when the block is in a different
- // section.
const DILocalVariable *DIVar =
Entries.front().getInstr()->getDebugVariable();
if (DIVar->isParameter() &&
- getDISubprogram(DIVar->getScope())->describes(&MF->getFunction()) &&
- Entries.front().getInstr()->getParent()->sameSection(&MF->front())) {
+ getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) {
if (!IsDescribedByReg(Entries.front().getInstr()))
LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin();
if (Entries.front().getInstr()->getDebugExpression()->isFragment()) {
@@ -368,22 +379,25 @@ void DebugHandlerBase::endInstruction() {
DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
LabelsAfterInsn.find(CurMI);
- CurMI = nullptr;
-
- // No label needed.
- if (I == LabelsAfterInsn.end())
- return;
- // Label already assigned.
- if (I->second)
+ // No label needed or label already assigned.
+ if (I == LabelsAfterInsn.end() || I->second) {
+ CurMI = nullptr;
return;
+ }
- // We need a label after this instruction.
- if (!PrevLabel) {
+ // We need a label after this instruction. With basic block sections, just
+ // use the end symbol of the section if this is the last instruction of the
+ // section. This reduces the need for an additional label and also helps
+ // merging ranges.
+ if (CurMI->getParent()->isEndSection() && CurMI->getNextNode() == nullptr) {
+ PrevLabel = CurMI->getParent()->getEndSymbol();
+ } else if (!PrevLabel) {
PrevLabel = MMI->getContext().createTempSymbol();
Asm->OutStreamer->emitLabel(PrevLabel);
}
I->second = PrevLabel;
+ CurMI = nullptr;
}
void DebugHandlerBase::endFunction(const MachineFunction *MF) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 36278f2e9e2d..62ebadaf3cbe 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -34,10 +34,10 @@ struct TargetIndexLocation {
}
};
-/// A single location or constant.
-class DbgValueLoc {
- /// Any complex address location expression for this DbgValueLoc.
- const DIExpression *Expression;
+/// A single location or constant within a variable location description, with
+/// either a single entry (with an optional DIExpression) used for a DBG_VALUE,
+/// or a list of entries used for a DBG_VALUE_LIST.
+class DbgValueLocEntry {
/// Type of entry that this represents.
enum EntryType {
@@ -64,24 +64,16 @@ class DbgValueLoc {
};
public:
- DbgValueLoc(const DIExpression *Expr, int64_t i)
- : Expression(Expr), EntryKind(E_Integer) {
- Constant.Int = i;
- }
- DbgValueLoc(const DIExpression *Expr, const ConstantFP *CFP)
- : Expression(Expr), EntryKind(E_ConstantFP) {
+ DbgValueLocEntry(int64_t i) : EntryKind(E_Integer) { Constant.Int = i; }
+ DbgValueLocEntry(const ConstantFP *CFP) : EntryKind(E_ConstantFP) {
Constant.CFP = CFP;
}
- DbgValueLoc(const DIExpression *Expr, const ConstantInt *CIP)
- : Expression(Expr), EntryKind(E_ConstantInt) {
+ DbgValueLocEntry(const ConstantInt *CIP) : EntryKind(E_ConstantInt) {
Constant.CIP = CIP;
}
- DbgValueLoc(const DIExpression *Expr, MachineLocation Loc)
- : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
- assert(cast<DIExpression>(Expr)->isValid());
- }
- DbgValueLoc(const DIExpression *Expr, TargetIndexLocation Loc)
- : Expression(Expr), EntryKind(E_TargetIndexLocation), TIL(Loc) {}
+ DbgValueLocEntry(MachineLocation Loc) : EntryKind(E_Location), Loc(Loc) {}
+ DbgValueLocEntry(TargetIndexLocation Loc)
+ : EntryKind(E_TargetIndexLocation), TIL(Loc) {}
bool isLocation() const { return EntryKind == E_Location; }
bool isTargetIndexLocation() const {
@@ -95,11 +87,7 @@ public:
const ConstantInt *getConstantInt() const { return Constant.CIP; }
MachineLocation getLoc() const { return Loc; }
TargetIndexLocation getTargetIndexLocation() const { return TIL; }
- bool isFragment() const { return getExpression()->isFragment(); }
- bool isEntryVal() const { return getExpression()->isEntryValue(); }
- const DIExpression *getExpression() const { return Expression; }
- friend bool operator==(const DbgValueLoc &, const DbgValueLoc &);
- friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);
+ friend bool operator==(const DbgValueLocEntry &, const DbgValueLocEntry &);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const {
if (isLocation()) {
@@ -111,6 +99,67 @@ public:
Constant.CIP->dump();
else if (isConstantFP())
Constant.CFP->dump();
+ }
+#endif
+};
+
+/// The location of a single variable, composed of an expression and 0 or more
+/// DbgValueLocEntries.
+class DbgValueLoc {
+ /// Any complex address location expression for this DbgValueLoc.
+ const DIExpression *Expression;
+
+ SmallVector<DbgValueLocEntry, 2> ValueLocEntries;
+
+ bool IsVariadic;
+
+public:
+ DbgValueLoc(const DIExpression *Expr, ArrayRef<DbgValueLocEntry> Locs)
+ : Expression(Expr), ValueLocEntries(Locs.begin(), Locs.end()),
+ IsVariadic(true) {
+#ifndef NDEBUG
+ // Currently, DBG_VALUE_VAR expressions must use stack_value.
+ assert(Expr && Expr->isValid() &&
+ is_contained(Locs, dwarf::DW_OP_stack_value));
+#endif
+ }
+
+ DbgValueLoc(const DIExpression *Expr, ArrayRef<DbgValueLocEntry> Locs,
+ bool IsVariadic)
+ : Expression(Expr), ValueLocEntries(Locs.begin(), Locs.end()),
+ IsVariadic(IsVariadic) {
+#ifndef NDEBUG
+ assert(cast<DIExpression>(Expr)->isValid() ||
+ !any_of(Locs, [](auto LE) { return LE.isLocation(); }));
+ if (!IsVariadic) {
+ assert(ValueLocEntries.size() == 1);
+ } else {
+ // Currently, DBG_VALUE_VAR expressions must use stack_value.
+ assert(Expr && Expr->isValid() &&
+ is_contained(Expr->getElements(), dwarf::DW_OP_stack_value));
+ }
+#endif
+ }
+
+ DbgValueLoc(const DIExpression *Expr, DbgValueLocEntry Loc)
+ : Expression(Expr), ValueLocEntries(1, Loc), IsVariadic(false) {
+ assert(((Expr && Expr->isValid()) || !Loc.isLocation()) &&
+ "DBG_VALUE with a machine location must have a valid expression.");
+ }
+
+ bool isFragment() const { return getExpression()->isFragment(); }
+ bool isEntryVal() const { return getExpression()->isEntryValue(); }
+ bool isVariadic() const { return IsVariadic; }
+ const DIExpression *getExpression() const { return Expression; }
+ const ArrayRef<DbgValueLocEntry> getLocEntries() const {
+ return ValueLocEntries;
+ }
+ friend bool operator==(const DbgValueLoc &, const DbgValueLoc &);
+ friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
+ for (DbgValueLocEntry DV : ValueLocEntries)
+ DV.dump();
if (Expression)
Expression->dump();
}
@@ -180,30 +229,32 @@ public:
DwarfCompileUnit &TheCU);
};
-/// Compare two DbgValueLocs for equality.
-inline bool operator==(const DbgValueLoc &A,
- const DbgValueLoc &B) {
+/// Compare two DbgValueLocEntries for equality.
+inline bool operator==(const DbgValueLocEntry &A, const DbgValueLocEntry &B) {
if (A.EntryKind != B.EntryKind)
return false;
- if (A.Expression != B.Expression)
- return false;
-
switch (A.EntryKind) {
- case DbgValueLoc::E_Location:
+ case DbgValueLocEntry::E_Location:
return A.Loc == B.Loc;
- case DbgValueLoc::E_TargetIndexLocation:
+ case DbgValueLocEntry::E_TargetIndexLocation:
return A.TIL == B.TIL;
- case DbgValueLoc::E_Integer:
+ case DbgValueLocEntry::E_Integer:
return A.Constant.Int == B.Constant.Int;
- case DbgValueLoc::E_ConstantFP:
+ case DbgValueLocEntry::E_ConstantFP:
return A.Constant.CFP == B.Constant.CFP;
- case DbgValueLoc::E_ConstantInt:
+ case DbgValueLocEntry::E_ConstantInt:
return A.Constant.CIP == B.Constant.CIP;
}
llvm_unreachable("unhandled EntryKind");
}
+/// Compare two DbgValueLocs for equality.
+inline bool operator==(const DbgValueLoc &A, const DbgValueLoc &B) {
+ return A.ValueLocEntries == B.ValueLocEntries &&
+ A.Expression == B.Expression && A.IsVariadic == B.IsVariadic;
+}
+
/// Compare two fragments based on their offset.
inline bool operator<(const DbgValueLoc &A,
const DbgValueLoc &B) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index c20ac6040aef..e36b7e2ae885 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -53,8 +53,7 @@ void DwarfCFIExceptionBase::endFragment() {
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
: DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
- forceEmitPersonality(false), shouldEmitLSDA(false),
- shouldEmitMoves(false) {}
+ forceEmitPersonality(false), shouldEmitLSDA(false) {}
DwarfCFIException::~DwarfCFIException() {}
@@ -87,16 +86,15 @@ static MCSymbol *getExceptionSym(AsmPrinter *Asm,
}
void DwarfCFIException::beginFunction(const MachineFunction *MF) {
- shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+ shouldEmitPersonality = shouldEmitLSDA = false;
const Function &F = MF->getFunction();
// If any landing pads survive, we need an EH table.
bool hasLandingPads = !MF->getLandingPads().empty();
// See if we need frame move info.
- AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
-
- shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
+ bool shouldEmitMoves =
+ Asm->getFunctionCFISectionType(*MF) != AsmPrinter::CFISection::None;
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
@@ -122,8 +120,13 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitLSDA = shouldEmitPersonality &&
LSDAEncoding != dwarf::DW_EH_PE_omit;
- shouldEmitCFI = MF->getMMI().getContext().getAsmInfo()->usesCFIForEH() &&
- (shouldEmitPersonality || shouldEmitMoves);
+ const MCAsmInfo &MAI = *MF->getMMI().getContext().getAsmInfo();
+ if (MAI.getExceptionHandlingType() != ExceptionHandling::None)
+ shouldEmitCFI =
+ MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves);
+ else
+ shouldEmitCFI = Asm->needsCFIForDebug() && shouldEmitMoves;
+
beginFragment(&*MF->begin(), getExceptionSym);
}
@@ -133,10 +136,14 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
return;
if (!hasEmittedCFISections) {
- if (Asm->needsOnlyDebugCFIMoves())
- Asm->OutStreamer->emitCFISections(false, true);
- else if (Asm->TM.Options.ForceDwarfFrameSection)
- Asm->OutStreamer->emitCFISections(true, true);
+ AsmPrinter::CFISection CFISecType = Asm->getModuleCFISectionType();
+ // If we don't say anything it implies `.cfi_sections .eh_frame`, so we
+ // chose not to be verbose in that case. And with `ForceDwarfFrameSection`,
+ // we should always emit .debug_frame.
+ if (CFISecType == AsmPrinter::CFISection::Debug ||
+ Asm->TM.Options.ForceDwarfFrameSection)
+ Asm->OutStreamer->emitCFISections(
+ CFISecType == AsmPrinter::CFISection::EH, true);
hasEmittedCFISections = true;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index befc4bba19a2..faa14dca1c3f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -73,11 +74,35 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
if (Label)
DD->addArangeLabel(SymbolCU(this, Label));
- unsigned idx = DD->getAddressPool().getIndex(Label);
- Die.addValue(DIEValueAllocator, Attribute,
- DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx
- : dwarf::DW_FORM_GNU_addr_index,
- DIEInteger(idx));
+ bool UseAddrOffsetFormOrExpressions =
+ DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions();
+
+ const MCSymbol *Base = nullptr;
+ if (Label->isInSection() && UseAddrOffsetFormOrExpressions)
+ Base = DD->getSectionLabel(&Label->getSection());
+
+ if (!Base || Base == Label) {
+ unsigned idx = DD->getAddressPool().getIndex(Label);
+ addAttribute(Die, Attribute,
+ DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx
+ : dwarf::DW_FORM_GNU_addr_index,
+ DIEInteger(idx));
+ return;
+ }
+
+ // Could be extended to work with DWARFv4 Split DWARF if that's important for
+ // someone. In that case DW_FORM_data would be used.
+ assert(DD->getDwarfVersion() >= 5 &&
+ "Addr+offset expressions are only valuable when using debug_addr (to "
+ "reduce relocations) available in DWARFv5 or higher");
+ if (DD->useAddrOffsetExpressions()) {
+ auto *Loc = new (DIEValueAllocator) DIEBlock();
+ addPoolOpAddress(*Loc, Label);
+ addBlock(Die, Attribute, dwarf::DW_FORM_exprloc, Loc);
+ } else
+ addAttribute(Die, Attribute, dwarf::DW_FORM_LLVM_addrx_offset,
+ new (DIEValueAllocator) DIEAddrOffset(
+ DD->getAddressPool().getIndex(Base), Label, Base));
}
void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
@@ -87,11 +112,9 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
DD->addArangeLabel(SymbolCU(this, Label));
if (Label)
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr,
- DIELabel(Label));
+ addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIELabel(Label));
else
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr,
- DIEInteger(0));
+ addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIEInteger(0));
}
unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
@@ -184,11 +207,16 @@ void DwarfCompileUnit::addLocationAttribute(
const DIExpression *Expr = GE.Expr;
// For compatibility with DWARF 3 and earlier,
- // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes
+ // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) or
+ // DW_AT_location(DW_OP_consts, X, DW_OP_stack_value) becomes
// DW_AT_const_value(X).
if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) {
addToAccelTable = true;
- addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1));
+ addConstantValue(
+ *VariableDIE,
+ DIExpression::SignedOrUnsignedConstant::UnsignedConstant ==
+ *Expr->isConstant(),
+ Expr->getElement(1));
break;
}
@@ -422,10 +450,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
// FIXME: duplicated from Target/WebAssembly/WebAssembly.h
// don't want to depend on target specific headers in this code?
const unsigned TI_GLOBAL_RELOC = 3;
- // FIXME: when writing dwo, we need to avoid relocations. Probably
- // the "right" solution is to treat globals the way func and data symbols
- // are (with entries in .debug_addr).
- if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC && !isDwoUnit()) {
+ if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) {
// These need to be relocatable.
assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
auto SPSym = cast<MCSymbolWasm>(
@@ -443,8 +468,16 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
- addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
- DD->addArangeLabel(SymbolCU(this, SPSym));
+ if (!isDwoUnit()) {
+ addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
+ DD->addArangeLabel(SymbolCU(this, SPSym));
+ } else {
+ // FIXME: when writing dwo, we need to avoid relocations. Probably
+ // the "right" solution is to treat globals the way func and data
+ // symbols are (with entries in .debug_addr).
+ // For now, since we only ever use index 0, this should work as-is.
+ addUInt(*Loc, dwarf::DW_FORM_data4, FrameBase.Location.WasmLoc.Index);
+ }
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
} else {
@@ -698,36 +731,92 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
// Check if variable has a single location description.
if (auto *DVal = DV.getValueLoc()) {
- if (DVal->isLocation())
- addVariableAddress(DV, *VariableDie, DVal->getLoc());
- else if (DVal->isInt()) {
- auto *Expr = DV.getSingleExpression();
- if (Expr && Expr->getNumElements()) {
+ if (!DVal->isVariadic()) {
+ const DbgValueLocEntry *Entry = DVal->getLocEntries().begin();
+ if (Entry->isLocation()) {
+ addVariableAddress(DV, *VariableDie, Entry->getLoc());
+ } else if (Entry->isInt()) {
+ auto *Expr = DV.getSingleExpression();
+ if (Expr && Expr->getNumElements()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ // If there is an expression, emit raw unsigned bytes.
+ DwarfExpr.addFragmentOffset(Expr);
+ DwarfExpr.addUnsignedConstant(Entry->getInt());
+ DwarfExpr.addExpression(Expr);
+ addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset,
+ dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
+ } else
+ addConstantValue(*VariableDie, Entry->getInt(), DV.getType());
+ } else if (Entry->isConstantFP()) {
+ addConstantFPValue(*VariableDie, Entry->getConstantFP());
+ } else if (Entry->isConstantInt()) {
+ addConstantValue(*VariableDie, Entry->getConstantInt(), DV.getType());
+ } else if (Entry->isTargetIndexLocation()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- // If there is an expression, emit raw unsigned bytes.
- DwarfExpr.addFragmentOffset(Expr);
- DwarfExpr.addUnsignedConstant(DVal->getInt());
- DwarfExpr.addExpression(Expr);
+ const DIBasicType *BT = dyn_cast<DIBasicType>(
+ static_cast<const Metadata *>(DV.getVariable()->getType()));
+ DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr);
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
- if (DwarfExpr.TagOffset)
- addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset,
- dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
-
- } else
- addConstantValue(*VariableDie, DVal->getInt(), DV.getType());
- } else if (DVal->isConstantFP()) {
- addConstantFPValue(*VariableDie, DVal->getConstantFP());
- } else if (DVal->isConstantInt()) {
- addConstantValue(*VariableDie, DVal->getConstantInt(), DV.getType());
- } else if (DVal->isTargetIndexLocation()) {
- DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- const DIBasicType *BT = dyn_cast<DIBasicType>(
- static_cast<const Metadata *>(DV.getVariable()->getType()));
- DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr);
- addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ }
+ return VariableDie;
}
+ // If any of the location entries are registers with the value 0, then the
+ // location is undefined.
+ if (any_of(DVal->getLocEntries(), [](const DbgValueLocEntry &Entry) {
+ return Entry.isLocation() && !Entry.getLoc().getReg();
+ }))
+ return VariableDie;
+ const DIExpression *Expr = DV.getSingleExpression();
+ assert(Expr && "Variadic Debug Value must have an Expression.");
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ DwarfExpr.addFragmentOffset(Expr);
+ DIExpressionCursor Cursor(Expr);
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+
+ auto AddEntry = [&](const DbgValueLocEntry &Entry,
+ DIExpressionCursor &Cursor) {
+ if (Entry.isLocation()) {
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor,
+ Entry.getLoc().getReg()))
+ return false;
+ } else if (Entry.isInt()) {
+ // If there is an expression, emit raw unsigned bytes.
+ DwarfExpr.addUnsignedConstant(Entry.getInt());
+ } else if (Entry.isConstantFP()) {
+ APInt RawBytes = Entry.getConstantFP()->getValueAPF().bitcastToAPInt();
+ DwarfExpr.addUnsignedConstant(RawBytes);
+ } else if (Entry.isConstantInt()) {
+ APInt RawBytes = Entry.getConstantInt()->getValue();
+ DwarfExpr.addUnsignedConstant(RawBytes);
+ } else if (Entry.isTargetIndexLocation()) {
+ TargetIndexLocation Loc = Entry.getTargetIndexLocation();
+ // TODO TargetIndexLocation is a target-independent. Currently only the
+ // WebAssembly-specific encoding is supported.
+ assert(Asm->TM.getTargetTriple().isWasm());
+ DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
+ } else {
+ llvm_unreachable("Unsupported Entry type.");
+ }
+ return true;
+ };
+
+ DwarfExpr.addExpression(
+ std::move(Cursor),
+ [&](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
+ return AddEntry(DVal->getLocEntries()[Idx], Cursor);
+ });
+
+ // Now attach the location information to the DIE.
+ addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
+
return VariableDie;
}
@@ -1381,7 +1470,7 @@ void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
dwarf::Form Form = (DD->getDwarfVersion() >= 5)
? dwarf::DW_FORM_loclistx
: DD->getDwarfSectionOffsetForm();
- Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index));
+ addAttribute(Die, Attribute, Form, DIELocList(Index));
}
void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
@@ -1413,7 +1502,7 @@ void DwarfCompileUnit::applyLabelAttributes(const DbgLabel &Label,
/// Add a Dwarf expression attribute data and value.
void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
const MCExpr *Expr) {
- Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr));
+ addAttribute(Die, (dwarf::Attribute)0, Form, DIEExpr(Expr));
}
void DwarfCompileUnit::applySubprogramAttributesToDefinition(
@@ -1447,7 +1536,7 @@ void DwarfCompileUnit::addAddrTableBase() {
}
void DwarfCompileUnit::addBaseTypeRef(DIEValueList &Die, int64_t Idx) {
- Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, dwarf::DW_FORM_udata,
+ addAttribute(Die, (dwarf::Attribute)0, dwarf::DW_FORM_udata,
new (DIEValueAllocator) DIEBaseTypeRef(this, Idx));
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 462682743c6a..ee14423ca3d0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -160,6 +160,13 @@ static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option(
clEnumValN(DwarfDebug::MinimizeAddrInV5::Ranges, "Ranges",
"Use rnglists for contiguous ranges if that allows "
"using a pre-existing base address"),
+ clEnumValN(DwarfDebug::MinimizeAddrInV5::Expressions,
+ "Expressions",
+ "Use exprloc addrx+offset expressions for any "
+ "address with a prior base address"),
+ clEnumValN(DwarfDebug::MinimizeAddrInV5::Form, "Form",
+ "Use addrx+offset extension form for any address "
+ "with a prior base address"),
clEnumValN(DwarfDebug::MinimizeAddrInV5::Disabled, "Disabled",
"Stuff")),
cl::init(DwarfDebug::MinimizeAddrInV5::Default));
@@ -228,29 +235,27 @@ const DIType *DbgVariable::getType() const {
/// Get .debug_loc entry for the instruction range starting at MI.
static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
const DIExpression *Expr = MI->getDebugExpression();
- assert(MI->getNumOperands() == 4);
- if (MI->getDebugOperand(0).isReg()) {
- const auto &RegOp = MI->getDebugOperand(0);
- const auto &Op1 = MI->getDebugOffset();
- // If the second operand is an immediate, this is a
- // register-indirect address.
- assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
- MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
- return DbgValueLoc(Expr, MLoc);
- }
- if (MI->getDebugOperand(0).isTargetIndex()) {
- const auto &Op = MI->getDebugOperand(0);
- return DbgValueLoc(Expr,
- TargetIndexLocation(Op.getIndex(), Op.getOffset()));
- }
- if (MI->getDebugOperand(0).isImm())
- return DbgValueLoc(Expr, MI->getDebugOperand(0).getImm());
- if (MI->getDebugOperand(0).isFPImm())
- return DbgValueLoc(Expr, MI->getDebugOperand(0).getFPImm());
- if (MI->getDebugOperand(0).isCImm())
- return DbgValueLoc(Expr, MI->getDebugOperand(0).getCImm());
-
- llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
+ const bool IsVariadic = MI->isDebugValueList();
+ assert(MI->getNumOperands() >= 3);
+ SmallVector<DbgValueLocEntry, 4> DbgValueLocEntries;
+ for (const MachineOperand &Op : MI->debug_operands()) {
+ if (Op.isReg()) {
+ MachineLocation MLoc(Op.getReg(),
+ MI->isNonListDebugValue() && MI->isDebugOffsetImm());
+ DbgValueLocEntries.push_back(DbgValueLocEntry(MLoc));
+ } else if (Op.isTargetIndex()) {
+ DbgValueLocEntries.push_back(
+ DbgValueLocEntry(TargetIndexLocation(Op.getIndex(), Op.getOffset())));
+ } else if (Op.isImm())
+ DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getImm()));
+ else if (Op.isFPImm())
+ DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getFPImm()));
+ else if (Op.isCImm())
+ DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getCImm()));
+ else
+ llvm_unreachable("Unexpected debug operand in DBG_VALUE* instruction!");
+ }
+ return DbgValueLoc(Expr, DbgValueLocEntries, IsVariadic);
}
void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) {
@@ -357,11 +362,13 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
DebuggerTuning = DebuggerKind::LLDB;
else if (TT.isPS4CPU())
DebuggerTuning = DebuggerKind::SCE;
+ else if (TT.isOSAIX())
+ DebuggerTuning = DebuggerKind::DBX;
else
DebuggerTuning = DebuggerKind::GDB;
if (DwarfInlinedStrings == Default)
- UseInlineStrings = TT.isNVPTX();
+ UseInlineStrings = TT.isNVPTX() || tuneForDBX();
else
UseInlineStrings = DwarfInlinedStrings == Enable;
@@ -385,10 +392,21 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
DwarfVersion =
TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION);
- bool Dwarf64 = Asm->TM.Options.MCOptions.Dwarf64 &&
- DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3.
- TT.isArch64Bit() && // DWARF64 requires 64-bit relocations.
- TT.isOSBinFormatELF(); // Support only ELF for now.
+ bool Dwarf64 = DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3.
+ TT.isArch64Bit(); // DWARF64 requires 64-bit relocations.
+
+ // Support DWARF64
+ // 1: For ELF when requested.
+ // 2: For XCOFF64: the AIX assembler will fill in debug section lengths
+ // according to the DWARF64 format for 64-bit assembly, so we must use
+ // DWARF64 in the compiler too for 64-bit mode.
+ Dwarf64 &=
+ ((Asm->TM.Options.MCOptions.Dwarf64 || MMI->getModule()->isDwarf64()) &&
+ TT.isOSBinFormatELF()) ||
+ TT.isOSBinFormatXCOFF();
+
+ if (!Dwarf64 && TT.isArch64Bit() && TT.isOSBinFormatXCOFF())
+ report_fatal_error("XCOFF requires DWARF64 for 64-bit mode!");
UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX();
@@ -627,7 +645,7 @@ static void finishCallSiteParams(ValT Val, const DIExpression *Expr,
assert((!CombinedExpr || CombinedExpr->isValid()) &&
"Combined debug expression is invalid");
- DbgValueLoc DbgLocVal(CombinedExpr, Val);
+ DbgValueLoc DbgLocVal(CombinedExpr, DbgValueLocEntry(Val));
DbgCallSiteParam CSParm(Param.ParamReg, DbgLocVal);
Params.push_back(CSParm);
++NumCSParams;
@@ -701,7 +719,7 @@ static void interpretValues(const MachineInstr *CurMI,
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef() &&
Register::isPhysicalRegister(MO.getReg())) {
- for (auto FwdReg : ForwardedRegWorklist)
+ for (auto &FwdReg : ForwardedRegWorklist)
if (TRI.regsOverlap(FwdReg.first, MO.getReg()))
Defs.insert(FwdReg.first);
}
@@ -750,7 +768,7 @@ static void interpretValues(const MachineInstr *CurMI,
// Now that we are done handling this instruction, add items from the
// temporary worklist to the real one.
- for (auto New : TmpWorklistItems)
+ for (auto &New : TmpWorklistItems)
addToFwdRegWorklist(ForwardedRegWorklist, New.first, EmptyExpr, New.second);
TmpWorklistItems.clear();
}
@@ -785,7 +803,7 @@ static bool interpretNextInstr(const MachineInstr *CurMI,
static void collectCallSiteParameters(const MachineInstr *CallMI,
ParamSet &Params) {
const MachineFunction *MF = CallMI->getMF();
- auto CalleesMap = MF->getCallSitesInfo();
+ const auto &CalleesMap = MF->getCallSitesInfo();
auto CallFwdRegsInfo = CalleesMap.find(CallMI);
// There is no information for the call instruction.
@@ -803,7 +821,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
DIExpression::get(MF->getFunction().getContext(), {});
// Add all the forwarding registers into the ForwardedRegWorklist.
- for (auto ArgReg : CallFwdRegsInfo->second) {
+ for (const auto &ArgReg : CallFwdRegsInfo->second) {
bool InsertedReg =
ForwardedRegWorklist.insert({ArgReg.Reg, {{ArgReg.Reg, EmptyExpr}}})
.second;
@@ -851,7 +869,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
// Create an expression where the register's entry value is used.
DIExpression *EntryExpr = DIExpression::get(
MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1});
- for (auto RegEntry : ForwardedRegWorklist) {
+ for (auto &RegEntry : ForwardedRegWorklist) {
MachineLocation MLoc(RegEntry.first);
finishCallSiteParams(MLoc, EntryExpr, RegEntry.second, Params);
}
@@ -920,8 +938,10 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
// If this is a direct call, find the callee's subprogram.
// In the case of an indirect call find the register that holds
// the callee.
- const MachineOperand &CalleeOp = MI.getOperand(0);
- if (!CalleeOp.isGlobal() && !CalleeOp.isReg())
+ const MachineOperand &CalleeOp = TII->getCalleeOperand(MI);
+ if (!CalleeOp.isGlobal() &&
+ (!CalleeOp.isReg() ||
+ !Register::isPhysicalRegister(CalleeOp.getReg())))
continue;
unsigned CallReg = 0;
@@ -1216,6 +1236,7 @@ void DwarfDebug::beginModule(Module *M) {
if (!GVMapEntry.size() || (Expr && Expr->isConstant()))
GVMapEntry.push_back({nullptr, Expr});
}
+
DenseSet<DIGlobalVariable *> Processed;
for (auto *GVE : CUNode->getGlobalVariables()) {
DIGlobalVariable *GV = GVE->getVariable();
@@ -1533,6 +1554,7 @@ void DwarfDebug::collectVariableInfoFromMFTable(
RegVar->initializeMMI(VI.Expr, VI.Slot);
LLVM_DEBUG(dbgs() << "Created DbgVariable for " << VI.Var->getName()
<< "\n");
+
if (DbgVariable *DbgVar = MFVars.lookup(Var))
DbgVar->addMMIEntry(*RegVar);
else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
@@ -1595,7 +1617,9 @@ static bool validThroughout(LexicalScopes &LScopes,
// throughout the function. This is a hack, presumably for DWARF v2 and not
// necessarily correct. It would be much better to use a dbg.declare instead
// if we know the constant is live throughout the scope.
- if (DbgValue->getDebugOperand(0).isImm() && MBB->pred_empty())
+ if (MBB->pred_empty() &&
+ all_of(DbgValue->debug_operands(),
+ [](const MachineOperand &Op) { return Op.isImm(); }))
return true;
// Test if the location terminates before the end of the scope.
@@ -1719,7 +1743,30 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
SmallVector<DbgValueLoc, 4> Values;
for (auto &R : OpenRanges)
Values.push_back(R.second);
- DebugLoc.emplace_back(StartLabel, EndLabel, Values);
+
+ // With Basic block sections, it is posssible that the StartLabel and the
+ // Instr are not in the same section. This happens when the StartLabel is
+ // the function begin label and the dbg value appears in a basic block
+ // that is not the entry. In this case, the range needs to be split to
+ // span each individual section in the range from StartLabel to EndLabel.
+ if (Asm->MF->hasBBSections() && StartLabel == Asm->getFunctionBegin() &&
+ !Instr->getParent()->sameSection(&Asm->MF->front())) {
+ const MCSymbol *BeginSectionLabel = StartLabel;
+
+ for (const MachineBasicBlock &MBB : *Asm->MF) {
+ if (MBB.isBeginSection() && &MBB != &Asm->MF->front())
+ BeginSectionLabel = MBB.getSymbol();
+
+ if (MBB.sameSection(Instr->getParent())) {
+ DebugLoc.emplace_back(BeginSectionLabel, EndLabel, Values);
+ break;
+ }
+ if (MBB.isEndSection())
+ DebugLoc.emplace_back(BeginSectionLabel, MBB.getEndSymbol(), Values);
+ }
+ } else {
+ DebugLoc.emplace_back(StartLabel, EndLabel, Values);
+ }
// Attempt to coalesce the ranges of two otherwise identical
// DebugLocEntries.
@@ -1736,8 +1783,46 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
DebugLoc.pop_back();
}
- return DebugLoc.size() == 1 && isSafeForSingleLocation &&
- validThroughout(LScopes, StartDebugMI, EndMI, getInstOrdering());
+ if (!isSafeForSingleLocation ||
+ !validThroughout(LScopes, StartDebugMI, EndMI, getInstOrdering()))
+ return false;
+
+ if (DebugLoc.size() == 1)
+ return true;
+
+ if (!Asm->MF->hasBBSections())
+ return false;
+
+ // Check here to see if loclist can be merged into a single range. If not,
+ // we must keep the split loclists per section. This does exactly what
+ // MergeRanges does without sections. We don't actually merge the ranges
+ // as the split ranges must be kept intact if this cannot be collapsed
+ // into a single range.
+ const MachineBasicBlock *RangeMBB = nullptr;
+ if (DebugLoc[0].getBeginSym() == Asm->getFunctionBegin())
+ RangeMBB = &Asm->MF->front();
+ else
+ RangeMBB = Entries.begin()->getInstr()->getParent();
+ auto *CurEntry = DebugLoc.begin();
+ auto *NextEntry = std::next(CurEntry);
+ while (NextEntry != DebugLoc.end()) {
+ // Get the last machine basic block of this section.
+ while (!RangeMBB->isEndSection())
+ RangeMBB = RangeMBB->getNextNode();
+ if (!RangeMBB->getNextNode())
+ return false;
+ // CurEntry should end the current section and NextEntry should start
+ // the next section and the Values must match for these two ranges to be
+ // merged.
+ if (CurEntry->getEndSym() != RangeMBB->getEndSymbol() ||
+ NextEntry->getBeginSym() != RangeMBB->getNextNode()->getSymbol() ||
+ CurEntry->getValues() != NextEntry->getValues())
+ return false;
+ RangeMBB = RangeMBB->getNextNode();
+ CurEntry = NextEntry;
+ NextEntry = std::next(CurEntry);
+ }
+ return true;
}
DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
@@ -1776,7 +1861,10 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Instruction ranges, specifying where IV is accessible.
const auto &HistoryMapEntries = I.second;
- if (HistoryMapEntries.empty())
+
+ // Try to find any non-empty variable location. Do not create a concrete
+ // entity if there are no locations.
+ if (!DbgValues.hasNonEmptyLocation(HistoryMapEntries))
continue;
LexicalScope *Scope = nullptr;
@@ -2363,12 +2451,8 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
TheU = Skeleton;
// Emit the header.
- MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
- MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
- Asm->emitDwarfUnitLength(EndLabel, BeginLabel,
- "Length of Public " + Name + " Info");
-
- Asm->OutStreamer->emitLabel(BeginLabel);
+ MCSymbol *EndLabel = Asm->emitDwarfUnitLength(
+ "pub" + Name, "Length of Public " + Name + " Info");
Asm->OutStreamer->AddComment("DWARF Version");
Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION);
@@ -2469,51 +2553,93 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
auto *DIExpr = Value.getExpression();
DIExpressionCursor ExprCursor(DIExpr);
DwarfExpr.addFragmentOffset(DIExpr);
- // Regular entry.
- if (Value.isInt()) {
- if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
- BT->getEncoding() == dwarf::DW_ATE_signed_char))
- DwarfExpr.addSignedConstant(Value.getInt());
- else
- DwarfExpr.addUnsignedConstant(Value.getInt());
- } else if (Value.isLocation()) {
- MachineLocation Location = Value.getLoc();
+
+ // If the DIExpr is is an Entry Value, we want to follow the same code path
+ // regardless of whether the DBG_VALUE is variadic or not.
+ if (DIExpr && DIExpr->isEntryValue()) {
+ // Entry values can only be a single register with no additional DIExpr,
+ // so just add it directly.
+ assert(Value.getLocEntries().size() == 1);
+ assert(Value.getLocEntries()[0].isLocation());
+ MachineLocation Location = Value.getLocEntries()[0].getLoc();
DwarfExpr.setLocation(Location, DIExpr);
- DIExpressionCursor Cursor(DIExpr);
- if (DIExpr->isEntryValue())
- DwarfExpr.beginEntryValueExpression(Cursor);
+ DwarfExpr.beginEntryValueExpression(ExprCursor);
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
- if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
- return;
- return DwarfExpr.addExpression(std::move(Cursor));
- } else if (Value.isTargetIndexLocation()) {
- TargetIndexLocation Loc = Value.getTargetIndexLocation();
- // TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific
- // encoding is supported.
- assert(AP.TM.getTargetTriple().isWasm());
- DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
- DwarfExpr.addExpression(std::move(ExprCursor));
- return;
- } else if (Value.isConstantFP()) {
- if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() &&
- !ExprCursor) {
- DwarfExpr.addConstantFP(Value.getConstantFP()->getValueAPF(), AP);
+ if (!DwarfExpr.addMachineRegExpression(TRI, ExprCursor, Location.getReg()))
return;
+ return DwarfExpr.addExpression(std::move(ExprCursor));
+ }
+
+ // Regular entry.
+ auto EmitValueLocEntry = [&DwarfExpr, &BT,
+ &AP](const DbgValueLocEntry &Entry,
+ DIExpressionCursor &Cursor) -> bool {
+ if (Entry.isInt()) {
+ if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
+ BT->getEncoding() == dwarf::DW_ATE_signed_char))
+ DwarfExpr.addSignedConstant(Entry.getInt());
+ else
+ DwarfExpr.addUnsignedConstant(Entry.getInt());
+ } else if (Entry.isLocation()) {
+ MachineLocation Location = Entry.getLoc();
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
+
+ const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
+ return false;
+ } else if (Entry.isTargetIndexLocation()) {
+ TargetIndexLocation Loc = Entry.getTargetIndexLocation();
+ // TODO TargetIndexLocation is a target-independent. Currently only the
+ // WebAssembly-specific encoding is supported.
+ assert(AP.TM.getTargetTriple().isWasm());
+ DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
+ } else if (Entry.isConstantFP()) {
+ if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() &&
+ !Cursor) {
+ DwarfExpr.addConstantFP(Entry.getConstantFP()->getValueAPF(), AP);
+ } else if (Entry.getConstantFP()
+ ->getValueAPF()
+ .bitcastToAPInt()
+ .getBitWidth() <= 64 /*bits*/) {
+ DwarfExpr.addUnsignedConstant(
+ Entry.getConstantFP()->getValueAPF().bitcastToAPInt());
+ } else {
+ LLVM_DEBUG(
+ dbgs() << "Skipped DwarfExpression creation for ConstantFP of size"
+ << Entry.getConstantFP()
+ ->getValueAPF()
+ .bitcastToAPInt()
+ .getBitWidth()
+ << " bits\n");
+ return false;
+ }
}
- if (Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth() <=
- 64 /*bits*/)
- DwarfExpr.addUnsignedConstant(
- Value.getConstantFP()->getValueAPF().bitcastToAPInt());
- else
- LLVM_DEBUG(
- dbgs()
- << "Skipped DwarfExpression creation for ConstantFP of size"
- << Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth()
- << " bits\n");
+ return true;
+ };
+
+ if (!Value.isVariadic()) {
+ if (!EmitValueLocEntry(Value.getLocEntries()[0], ExprCursor))
+ return;
+ DwarfExpr.addExpression(std::move(ExprCursor));
+ return;
}
- DwarfExpr.addExpression(std::move(ExprCursor));
+
+ // If any of the location entries are registers with the value 0, then the
+ // location is undefined.
+ if (any_of(Value.getLocEntries(), [](const DbgValueLocEntry &Entry) {
+ return Entry.isLocation() && !Entry.getLoc().getReg();
+ }))
+ return;
+
+ DwarfExpr.addExpression(
+ std::move(ExprCursor),
+ [EmitValueLocEntry, &Value](unsigned Idx,
+ DIExpressionCursor &Cursor) -> bool {
+ return EmitValueLocEntry(Value.getLocEntries()[Idx], Cursor);
+ });
}
void DebugLocEntry::finalize(const AsmPrinter &AP,
@@ -3397,7 +3523,10 @@ dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const {
}
const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
- return SectionLabels.find(S)->second;
+ auto I = SectionLabels.find(S);
+ if (I == SectionLabels.end())
+ return nullptr;
+ return I->second;
}
void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
if (SectionLabels.insert(std::make_pair(&S->getSection(), S)).second)
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index df19ef458888..6356a65b50d3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -383,6 +383,8 @@ public:
Default,
Disabled,
Ranges,
+ Expressions,
+ Form,
};
private:
@@ -438,7 +440,11 @@ private:
AccelTable<AppleAccelTableOffsetData> AccelNamespace;
AccelTable<AppleAccelTableTypeData> AccelTypes;
- // Identify a debugger for "tuning" the debug info.
+ /// Identify a debugger for "tuning" the debug info.
+ ///
+ /// The "tuning" should be used to set defaults for individual feature flags
+ /// in DwarfDebug; if a given feature has a more specific command-line option,
+ /// that option should take precedence over the tuning.
DebuggerKind DebuggerTuning = DebuggerKind::Default;
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
@@ -706,6 +712,18 @@ public:
return MinimizeAddr == MinimizeAddrInV5::Ranges;
}
+ // Returns whether novel exprloc addrx+offset encodings should be used to
+ // reduce debug_addr size.
+ bool useAddrOffsetExpressions() const {
+ return MinimizeAddr == MinimizeAddrInV5::Expressions;
+ }
+
+ // Returns whether addrx+offset LLVM extension form should be used to reduce
+ // debug_addr size.
+ bool useAddrOffsetForm() const {
+ return MinimizeAddr == MinimizeAddrInV5::Form;
+ }
+
/// Returns whether to use sections as labels rather than temp symbols.
bool useSectionsAsReferences() const {
return UseSectionsAsReferences;
@@ -820,6 +838,7 @@ public:
bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
+ bool tuneForDBX() const { return DebuggerTuning == DebuggerKind::DBX; }
/// @}
const MCSymbol *getSectionLabel(const MCSection *S);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index b19b4365383f..40898c9fc855 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -44,9 +44,6 @@ class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
/// Per-function flag to indicate if .cfi_lsda should be emitted.
bool shouldEmitLSDA;
- /// Per-function flag to indicate if frame moves info should be emitted.
- bool shouldEmitMoves;
-
public:
//===--------------------------------------------------------------------===//
// Main entry points.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 59ad7646ce1c..6409c39e7849 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -285,22 +285,29 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// a call site parameter expression and if that expression is just a register
// location, emit it with addBReg and offset 0, because we should emit a DWARF
// expression representing a value, rather than a location.
- if (!isMemoryLocation() && !HasComplexExpression &&
- (!isParameterValue() || isEntryValue())) {
+ if ((!isParameterValue() && !isMemoryLocation() && !HasComplexExpression) ||
+ isEntryValue()) {
for (auto &Reg : DwarfRegs) {
if (Reg.DwarfRegNo >= 0)
addReg(Reg.DwarfRegNo, Reg.Comment);
addOpPiece(Reg.SubRegSize);
}
- if (isEntryValue())
+ if (isEntryValue()) {
finalizeEntryValue();
- if (isEntryValue() && !isIndirect() && !isParameterValue() &&
- DwarfVersion >= 4)
- emitOp(dwarf::DW_OP_stack_value);
+ if (!isIndirect() && !isParameterValue() && !HasComplexExpression &&
+ DwarfVersion >= 4)
+ emitOp(dwarf::DW_OP_stack_value);
+ }
DwarfRegs.clear();
+ // If we need to mask out a subregister, do it now, unless the next
+ // operation would emit an OpPiece anyway.
+ auto NextOp = ExprCursor.peek();
+ if (SubRegisterSizeInBits && NextOp &&
+ (NextOp->getOp() != dwarf::DW_OP_LLVM_fragment))
+ maskSubRegister();
return true;
}
@@ -353,6 +360,14 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
else
addBReg(Reg.DwarfRegNo, SignedOffset);
DwarfRegs.clear();
+
+ // If we need to mask out a subregister, do it now, unless the next
+ // operation would emit an OpPiece anyway.
+ auto NextOp = ExprCursor.peek();
+ if (SubRegisterSizeInBits && NextOp &&
+ (NextOp->getOp() != dwarf::DW_OP_LLVM_fragment))
+ maskSubRegister();
+
return true;
}
@@ -365,11 +380,7 @@ void DwarfExpression::setEntryValueFlags(const MachineLocation &Loc) {
void DwarfExpression::setLocation(const MachineLocation &Loc,
const DIExpression *DIExpr) {
if (Loc.isIndirect())
- // Do not treat entry value descriptions of indirect parameters as memory
- // locations. This allows DwarfExpression::addReg() to add DW_OP_regN to an
- // entry value description.
- if (!DIExpr->isEntryValue())
- setMemoryLocationKind();
+ setMemoryLocationKind();
if (DIExpr->isEntryValue())
setEntryValueFlags(Loc);
@@ -380,12 +391,12 @@ void DwarfExpression::beginEntryValueExpression(
auto Op = ExprCursor.take();
(void)Op;
assert(Op && Op->getOp() == dwarf::DW_OP_LLVM_entry_value);
- assert(!isMemoryLocation() &&
- "We don't support entry values of memory locations yet");
assert(!IsEmittingEntryValue && "Already emitting entry value?");
assert(Op->getArg(0) == 1 &&
"Can currently only emit entry values covering a single operation");
+ SavedLocationKind = LocationKind;
+ LocationKind = Register;
IsEmittingEntryValue = true;
enableTemporaryBuffer();
}
@@ -403,6 +414,8 @@ void DwarfExpression::finalizeEntryValue() {
// Emit the entry value's DWARF block operand.
commitTemporaryBuffer();
+ LocationFlags &= ~EntryValue;
+ LocationKind = SavedLocationKind;
IsEmittingEntryValue = false;
}
@@ -415,6 +428,7 @@ void DwarfExpression::cancelEntryValue() {
assert(getTemporaryBufferSize() == 0 &&
"Began emitting entry value block before cancelling entry value");
+ LocationKind = SavedLocationKind;
IsEmittingEntryValue = false;
}
@@ -451,16 +465,19 @@ static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
unsigned FragmentOffsetInBits) {
+ addExpression(std::move(ExprCursor),
+ [](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
+ llvm_unreachable("unhandled opcode found in expression");
+ });
+}
+
+void DwarfExpression::addExpression(
+ DIExpressionCursor &&ExprCursor,
+ llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg) {
// Entry values can currently only cover the initial register location,
// and not any other parts of the following DWARF expression.
assert(!IsEmittingEntryValue && "Can't emit entry value around expression");
- // If we need to mask out a subregister, do it now, unless the next
- // operation would emit an OpPiece anyway.
- auto N = ExprCursor.peek();
- if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment))
- maskSubRegister();
-
Optional<DIExpression::ExprOperand> PrevConvertOp = None;
while (ExprCursor) {
@@ -476,6 +493,12 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
}
switch (OpNum) {
+ case dwarf::DW_OP_LLVM_arg:
+ if (!InsertArg(Op->getArg(0), ExprCursor)) {
+ LocationKind = Unknown;
+ return;
+ }
+ break;
case dwarf::DW_OP_LLVM_fragment: {
unsigned SizeInBits = Op->getArg(1);
unsigned FragmentOffset = Op->getArg(0);
@@ -664,9 +687,14 @@ void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
}
void DwarfExpression::addWasmLocation(unsigned Index, uint64_t Offset) {
- assert(LocationKind == Implicit || LocationKind == Unknown);
- LocationKind = Implicit;
emitOp(dwarf::DW_OP_WASM_location);
- emitUnsigned(Index);
+ emitUnsigned(Index == 4/*TI_LOCAL_INDIRECT*/ ? 0/*TI_LOCAL*/ : Index);
emitUnsigned(Offset);
+ if (Index == 4 /*TI_LOCAL_INDIRECT*/) {
+ assert(LocationKind == Unknown);
+ LocationKind = Memory;
+ } else {
+ assert(LocationKind == Implicit || LocationKind == Unknown);
+ LocationKind = Implicit;
+ }
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 8fca9f5a630b..513e9072309e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -148,6 +148,7 @@ protected:
enum { EntryValue = 1 << 0, Indirect = 1 << 1, CallSiteParamValue = 1 << 2 };
unsigned LocationKind : 3;
+ unsigned SavedLocationKind : 3;
unsigned LocationFlags : 3;
unsigned DwarfVersion : 4;
@@ -284,8 +285,8 @@ protected:
public:
DwarfExpression(unsigned DwarfVersion, DwarfCompileUnit &CU)
: CU(CU), SubRegisterSizeInBits(0), SubRegisterOffsetInBits(0),
- LocationKind(Unknown), LocationFlags(Unknown),
- DwarfVersion(DwarfVersion) {}
+ LocationKind(Unknown), SavedLocationKind(Unknown),
+ LocationFlags(Unknown), DwarfVersion(DwarfVersion) {}
/// This needs to be called last to commit any pending changes.
void finalize();
@@ -346,6 +347,9 @@ public:
/// fragment inside the entire variable.
void addExpression(DIExpressionCursor &&Expr,
unsigned FragmentOffsetInBits = 0);
+ void
+ addExpression(DIExpressionCursor &&Expr,
+ llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg);
/// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
/// the fragment described by \c Expr.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 118b5fcc3bf6..344d30fad347 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -100,10 +100,10 @@ DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
}
DwarfUnit::~DwarfUnit() {
- for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
- DIEBlocks[j]->~DIEBlock();
- for (unsigned j = 0, M = DIELocs.size(); j < M; ++j)
- DIELocs[j]->~DIELoc();
+ for (DIEBlock *B : DIEBlocks)
+ B->~DIEBlock();
+ for (DIELoc *L : DIELocs)
+ L->~DIELoc();
}
int64_t DwarfUnit::getDefaultLowerBound() const {
@@ -219,11 +219,9 @@ void DwarfUnit::insertDIE(DIE *D) {
void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
if (DD->getDwarfVersion() >= 4)
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present,
- DIEInteger(1));
+ addAttribute(Die, Attribute, dwarf::DW_FORM_flag_present, DIEInteger(1));
else
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag,
- DIEInteger(1));
+ addAttribute(Die, Attribute, dwarf::DW_FORM_flag, DIEInteger(1));
}
void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
@@ -232,7 +230,7 @@ void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
Form = DIEInteger::BestForm(false, Integer);
assert(Form != dwarf::DW_FORM_implicit_const &&
"DW_FORM_implicit_const is used only for signed integers");
- Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
+ addAttribute(Die, Attribute, *Form, DIEInteger(Integer));
}
void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form,
@@ -244,7 +242,7 @@ void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, int64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(true, Integer);
- Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
+ addAttribute(Die, Attribute, *Form, DIEInteger(Integer));
}
void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
@@ -258,7 +256,7 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
return;
if (DD->useInlineStrings()) {
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_string,
+ addAttribute(Die, Attribute, dwarf::DW_FORM_string,
new (DIEValueAllocator)
DIEInlineString(String, DIEValueAllocator));
return;
@@ -282,15 +280,12 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
else if (Index > 0xff)
IxForm = dwarf::DW_FORM_strx2;
}
- Die.addValue(DIEValueAllocator, Attribute, IxForm,
- DIEString(StringPoolEntry));
+ addAttribute(Die, Attribute, IxForm, DIEString(StringPoolEntry));
}
-DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die,
- dwarf::Attribute Attribute,
- dwarf::Form Form,
- const MCSymbol *Label) {
- return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label));
+void DwarfUnit::addLabel(DIEValueList &Die, dwarf::Attribute Attribute,
+ dwarf::Form Form, const MCSymbol *Label) {
+ addAttribute(Die, Attribute, Form, DIELabel(Label));
}
void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
@@ -315,17 +310,39 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
Asm->OutContext.getDwarfVersion(), File->getSource());
}
-void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
+void DwarfUnit::addPoolOpAddress(DIEValueList &Die, const MCSymbol *Label) {
+ bool UseAddrOffsetFormOrExpressions =
+ DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions();
+
+ const MCSymbol *Base = nullptr;
+ if (Label->isInSection() && UseAddrOffsetFormOrExpressions)
+ Base = DD->getSectionLabel(&Label->getSection());
+
+ uint32_t Index = DD->getAddressPool().getIndex(Base ? Base : Label);
+
if (DD->getDwarfVersion() >= 5) {
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx);
- addUInt(Die, dwarf::DW_FORM_addrx, DD->getAddressPool().getIndex(Sym));
+ addUInt(Die, dwarf::DW_FORM_addrx, Index);
+ } else {
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
+ addUInt(Die, dwarf::DW_FORM_GNU_addr_index, Index);
+ }
+
+ if (Base && Base != Label) {
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_const4u);
+ addLabelDelta(Die, (dwarf::Attribute)0, Label, Base);
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ }
+}
+
+void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
+ if (DD->getDwarfVersion() >= 5) {
+ addPoolOpAddress(Die, Sym);
return;
}
if (DD->useSplitDwarf()) {
- addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
- addUInt(Die, dwarf::DW_FORM_GNU_addr_index,
- DD->getAddressPool().getIndex(Sym));
+ addPoolOpAddress(Die, Sym);
return;
}
@@ -333,9 +350,9 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
addLabel(Die, dwarf::DW_FORM_addr, Sym);
}
-void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
+void DwarfUnit::addLabelDelta(DIEValueList &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_data4,
+ addAttribute(Die, Attribute, dwarf::DW_FORM_data4,
new (DIEValueAllocator) DIEDelta(Hi, Lo));
}
@@ -350,8 +367,8 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) {
// and think this is a full definition.
addFlag(Die, dwarf::DW_AT_declaration);
- Die.addValue(DIEValueAllocator, dwarf::DW_AT_signature,
- dwarf::DW_FORM_ref_sig8, DIEInteger(Signature));
+ addAttribute(Die, dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8,
+ DIEInteger(Signature));
}
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
@@ -363,13 +380,13 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
CU = getUnitDie().getUnit();
if (!EntryCU)
EntryCU = getUnitDie().getUnit();
- Die.addValue(DIEValueAllocator, Attribute,
+ addAttribute(Die, Attribute,
EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
Entry);
}
-DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
- DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag));
+DIE &DwarfUnit::createAndAddDIE(dwarf::Tag Tag, DIE &Parent, const DINode *N) {
+ DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, Tag));
if (N)
insertDIE(N, &Die);
return Die;
@@ -378,15 +395,19 @@ DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
Loc->ComputeSize(Asm);
DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
- Die.addValue(DIEValueAllocator, Attribute,
- Loc->BestForm(DD->getDwarfVersion()), Loc);
+ addAttribute(Die, Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc);
}
-void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
DIEBlock *Block) {
Block->ComputeSize(Asm);
DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
- Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block);
+ addAttribute(Die, Attribute, Form, Block);
+}
+
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
+ DIEBlock *Block) {
+ addBlock(Die, Attribute, Block->BestForm(), Block);
}
void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
@@ -918,14 +939,17 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
// Add the type's non-standard calling convention.
- uint8_t CC = 0;
- if (CTy->isTypePassByValue())
- CC = dwarf::DW_CC_pass_by_value;
- else if (CTy->isTypePassByReference())
- CC = dwarf::DW_CC_pass_by_reference;
- if (CC)
- addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
- CC);
+ // DW_CC_pass_by_value/DW_CC_pass_by_reference are introduced in DWARF 5.
+ if (!Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= 5) {
+ uint8_t CC = 0;
+ if (CTy->isTypePassByValue())
+ CC = dwarf::DW_CC_pass_by_value;
+ else if (CTy->isTypePassByReference())
+ CC = dwarf::DW_CC_pass_by_reference;
+ if (CC)
+ addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
+ CC);
+ }
break;
}
default:
@@ -1106,32 +1130,34 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
}
bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
- DIE &SPDie) {
+ DIE &SPDie, bool Minimal) {
DIE *DeclDie = nullptr;
StringRef DeclLinkageName;
if (auto *SPDecl = SP->getDeclaration()) {
- DITypeRefArray DeclArgs, DefinitionArgs;
- DeclArgs = SPDecl->getType()->getTypeArray();
- DefinitionArgs = SP->getType()->getTypeArray();
-
- if (DeclArgs.size() && DefinitionArgs.size())
- if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0])
- addType(SPDie, DefinitionArgs[0]);
-
- DeclDie = getDIE(SPDecl);
- assert(DeclDie && "This DIE should've already been constructed when the "
- "definition DIE was created in "
- "getOrCreateSubprogramDIE");
- // Look at the Decl's linkage name only if we emitted it.
- if (DD->useAllLinkageNames())
- DeclLinkageName = SPDecl->getLinkageName();
- unsigned DeclID = getOrCreateSourceID(SPDecl->getFile());
- unsigned DefID = getOrCreateSourceID(SP->getFile());
- if (DeclID != DefID)
- addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);
-
- if (SP->getLine() != SPDecl->getLine())
- addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine());
+ if (!Minimal) {
+ DITypeRefArray DeclArgs, DefinitionArgs;
+ DeclArgs = SPDecl->getType()->getTypeArray();
+ DefinitionArgs = SP->getType()->getTypeArray();
+
+ if (DeclArgs.size() && DefinitionArgs.size())
+ if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0])
+ addType(SPDie, DefinitionArgs[0]);
+
+ DeclDie = getDIE(SPDecl);
+ assert(DeclDie && "This DIE should've already been constructed when the "
+ "definition DIE was created in "
+ "getOrCreateSubprogramDIE");
+ // Look at the Decl's linkage name only if we emitted it.
+ if (DD->useAllLinkageNames())
+ DeclLinkageName = SPDecl->getLinkageName();
+ unsigned DeclID = getOrCreateSourceID(SPDecl->getFile());
+ unsigned DefID = getOrCreateSourceID(SP->getFile());
+ if (DeclID != DefID)
+ addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);
+
+ if (SP->getLine() != SPDecl->getLine())
+ addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine());
+ }
}
// Add function template parameters.
@@ -1163,7 +1189,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
bool SkipSPSourceLocation = SkipSPAttributes &&
!CUNode->getDebugInfoForProfiling();
if (!SkipSPSourceLocation)
- if (applySubprogramDefinitionAttributes(SP, SPDie))
+ if (applySubprogramDefinitionAttributes(SP, SPDie, SkipSPAttributes))
return;
// Constructors and operators for anonymous aggregates do not have names.
@@ -1286,9 +1312,6 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
// Count == -1 then the array is unbounded and we do not emit
// DW_AT_lower_bound and DW_AT_count attributes.
int64_t DefaultLowerBound = getDefaultLowerBound();
- int64_t Count = -1;
- if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>())
- Count = CI->getSExtValue();
auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
DISubrange::BoundType Bound) -> void {
@@ -1302,19 +1325,18 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
DwarfExpr.addExpression(BE);
addBlock(DW_Subrange, Attr, DwarfExpr.finalize());
} else if (auto *BI = Bound.dyn_cast<ConstantInt *>()) {
- if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
- BI->getSExtValue() != DefaultLowerBound)
+ if (Attr == dwarf::DW_AT_count) {
+ if (BI->getSExtValue() != -1)
+ addUInt(DW_Subrange, Attr, None, BI->getSExtValue());
+ } else if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
+ BI->getSExtValue() != DefaultLowerBound)
addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue());
}
};
AddBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound());
- if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) {
- if (auto *CountVarDIE = getDIE(CV))
- addDIEEntry(DW_Subrange, dwarf::DW_AT_count, *CountVarDIE);
- } else if (Count != -1)
- addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count);
+ AddBoundTypeEntry(dwarf::DW_AT_count, SR->getCount());
AddBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound());
@@ -1336,7 +1358,9 @@ void DwarfUnit::constructGenericSubrangeDIE(DIE &Buffer,
if (auto *VarDIE = getDIE(BV))
addDIEEntry(DwGenericSubrange, Attr, *VarDIE);
} else if (auto *BE = Bound.dyn_cast<DIExpression *>()) {
- if (BE->isSignedConstant()) {
+ if (BE->isConstant() &&
+ DIExpression::SignedOrUnsignedConstant::SignedConstant ==
+ *BE->isConstant()) {
if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
static_cast<int64_t>(BE->getElement(1)) != DefaultLowerBound)
addSInt(DwGenericSubrange, Attr, dwarf::DW_FORM_sdata,
@@ -1462,9 +1486,9 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
// Add subranges to array type.
DINodeArray Elements = CTy->getElements();
- for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+ for (DINode *E : Elements) {
// FIXME: Should this really be such a loose cast?
- if (auto *Element = dyn_cast_or_null<DINode>(Elements[i])) {
+ if (auto *Element = dyn_cast_or_null<DINode>(E)) {
if (Element->getTag() == dwarf::DW_TAG_subrange_type)
constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy);
else if (Element->getTag() == dwarf::DW_TAG_generic_subrange)
@@ -1490,8 +1514,8 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DINodeArray Elements = CTy->getElements();
// Add enumerators to enumeration type.
- for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
- auto *Enum = dyn_cast_or_null<DIEnumerator>(Elements[i]);
+ for (const DINode *E : Elements) {
+ auto *Enum = dyn_cast_or_null<DIEnumerator>(E);
if (Enum) {
DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
StringRef Name = Enum->getName();
@@ -1504,10 +1528,9 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
}
void DwarfUnit::constructContainingTypeDIEs() {
- for (auto CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end();
- CI != CE; ++CI) {
- DIE &SPDie = *CI->first;
- const DINode *D = CI->second;
+ for (auto &P : ContainingTypeMap) {
+ DIE &SPDie = *P.first;
+ const DINode *D = P.second;
if (!D)
continue;
DIE *NDie = getDIE(D);
@@ -1595,9 +1618,18 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes);
addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie);
- } else if (!IsBitfield || DD->useDWARF2Bitfields())
- addUInt(MemberDie, dwarf::DW_AT_data_member_location, None,
- OffsetInBytes);
+ } else if (!IsBitfield || DD->useDWARF2Bitfields()) {
+ // In DWARF v3, DW_FORM_data4/8 in DW_AT_data_member_location are
+ // interpreted as location-list pointers. Interpreting constants as
+ // pointers is not expected, so we use DW_FORM_udata to encode the
+ // constants here.
+ if (DD->getDwarfVersion() == 3)
+ addUInt(MemberDie, dwarf::DW_AT_data_member_location,
+ dwarf::DW_FORM_udata, OffsetInBytes);
+ else
+ addUInt(MemberDie, dwarf::DW_AT_data_member_location, None,
+ OffsetInBytes);
+ }
}
if (DT->isProtected())
@@ -1617,8 +1649,8 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
// Objective-C properties.
if (DINode *PNode = DT->getObjCProperty())
if (DIE *PDie = getDIE(PNode))
- MemberDie.addValue(DIEValueAllocator, dwarf::DW_AT_APPLE_property,
- dwarf::DW_FORM_ref4, DIEEntry(*PDie));
+ addAttribute(MemberDie, dwarf::DW_AT_APPLE_property,
+ dwarf::DW_FORM_ref4, DIEEntry(*PDie));
if (DT->isArtificial())
addFlag(MemberDie, dwarf::DW_AT_artificial);
@@ -1675,13 +1707,10 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
// Emit size of content not including length itself
- if (!DD->useSectionsAsReferences()) {
- StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_";
- MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start");
- EndLabel = Asm->createTempSymbol(Prefix + "end");
- Asm->emitDwarfUnitLength(EndLabel, BeginLabel, "Length of Unit");
- Asm->OutStreamer->emitLabel(BeginLabel);
- } else
+ if (!DD->useSectionsAsReferences())
+ EndLabel = Asm->emitDwarfUnitLength(
+ isDwoUnit() ? "debug_info_dwo" : "debug_info", "Length of Unit");
+ else
Asm->emitDwarfUnitLength(getHeaderSize() + getUnitDie().getSize(),
"Length of Unit");
@@ -1725,20 +1754,18 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) {
Asm->emitDwarfLengthOrOffset(Ty ? Ty->getOffset() : 0);
}
-DIE::value_iterator
-DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo) {
- return Die.addValue(DIEValueAllocator, Attribute,
- DD->getDwarfSectionOffsetForm(),
- new (DIEValueAllocator) DIEDelta(Hi, Lo));
+void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ addAttribute(Die, Attribute, DD->getDwarfSectionOffsetForm(),
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
}
-DIE::value_iterator
-DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label, const MCSymbol *Sec) {
+void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec) {
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- return addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label);
- return addSectionDelta(Die, Attribute, Label, Sec);
+ addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label);
+ else
+ addSectionDelta(Die, Attribute, Label, Sec);
}
bool DwarfTypeUnit::isDwoUnit() const {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 5c643760fd56..4d31dd0daf59 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
+#include "llvm/Target/TargetMachine.h"
#include <string>
namespace llvm {
@@ -72,10 +73,26 @@ protected:
DwarfUnit(dwarf::Tag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU);
- bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
+ bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie, bool Minimal);
bool isShareableAcrossCUs(const DINode *D) const;
+ template <typename T>
+ void addAttribute(DIEValueList &Die, dwarf::Attribute Attribute,
+ dwarf::Form Form, T &&Value) {
+ // For strict DWARF mode, only generate attributes available to current
+ // DWARF version.
+ // Attribute 0 is used when emitting form-encoded values in blocks, which
+ // don't have attributes (only forms) so we cannot detect their DWARF
+ // version compatibility here and assume they are compatible.
+ if (Attribute != 0 && Asm->TM.Options.DebugStrictDwarf &&
+ DD->getDwarfVersion() < dwarf::AttributeVersion(Attribute))
+ return;
+
+ Die.addValue(DIEValueAllocator,
+ DIEValue(Attribute, Form, std::forward<T>(Value)));
+ }
+
public:
// Accessors.
AsmPrinter* getAsmPrinter() const { return Asm; }
@@ -147,10 +164,8 @@ public:
void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
/// Add a Dwarf label attribute data and value.
- DIEValueList::value_iterator addLabel(DIEValueList &Die,
- dwarf::Attribute Attribute,
- dwarf::Form Form,
- const MCSymbol *Label);
+ void addLabel(DIEValueList &Die, dwarf::Attribute Attribute, dwarf::Form Form,
+ const MCSymbol *Label);
void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
@@ -160,10 +175,11 @@ public:
/// Add a dwarf op address data and value using the form given and an
/// op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
void addOpAddress(DIELoc &Die, const MCSymbol *Sym);
+ void addPoolOpAddress(DIEValueList &Die, const MCSymbol *Label);
/// Add a label delta attribute data and value.
- void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
- const MCSymbol *Lo);
+ void addLabelDelta(DIEValueList &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo);
/// Add a DIE attribute data and value.
void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry);
@@ -179,6 +195,8 @@ public:
/// Add block data.
void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
+ void addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
+ DIEBlock *Block);
/// Add location information to specified debug information entry.
void addSourceLine(DIE &Die, unsigned Line, const DIFile *File);
@@ -239,7 +257,7 @@ public:
/// Create a DIE with the given Tag, add the DIE to its parent, and
/// call insertDIE if MD is not null.
- DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr);
+ DIE &createAndAddDIE(dwarf::Tag Tag, DIE &Parent, const DINode *N = nullptr);
bool useSegmentedStringOffsetsTable() const {
return DD->useSegmentedStringOffsetsTable();
@@ -269,13 +287,12 @@ public:
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
/// addSectionDelta - Add a label delta attribute data and value.
- DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo);
+ void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
+ const MCSymbol *Lo);
/// Add a Dwarf section label attribute data and value.
- DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label,
- const MCSymbol *Sec);
+ void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec);
/// Get context owner's DIE.
DIE *createTypeDIE(const DICompositeType *Ty);
diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 2ffe8a7b0469..e589c2e64abd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -83,10 +83,9 @@ void EHStreamer::computeActionsTable(
FilterOffsets.reserve(FilterIds.size());
int Offset = -1;
- for (std::vector<unsigned>::const_iterator
- I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
+ for (unsigned FilterId : FilterIds) {
FilterOffsets.push_back(Offset);
- Offset -= getULEB128Size(*I);
+ Offset -= getULEB128Size(FilterId);
}
FirstActions.reserve(LandingPads.size());
@@ -95,9 +94,7 @@ void EHStreamer::computeActionsTable(
unsigned SizeActions = 0; // Total size of all action entries for a function
const LandingPadInfo *PrevLPI = nullptr;
- for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
- I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
- const LandingPadInfo *LPI = *I;
+ for (const LandingPadInfo *LPI : LandingPads) {
const std::vector<int> &TypeIds = LPI->TypeIds;
unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0;
unsigned SizeSiteActions = 0; // Total size of all entries for a landingpad
@@ -420,8 +417,8 @@ MCSymbol *EHStreamer::emitExceptionTable() {
bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
// Type infos.
- MCSection *LSDASection =
- Asm->getObjFileLowering().getSectionForLSDA(MF->getFunction(), Asm->TM);
+ MCSection *LSDASection = Asm->getObjFileLowering().getSectionForLSDA(
+ MF->getFunction(), *Asm->CurrentFnSym, Asm->TM);
unsigned TTypeEncoding;
if (!HaveTTData) {
@@ -757,10 +754,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// Emit the Action Table.
int Entry = 0;
- for (SmallVectorImpl<ActionEntry>::const_iterator
- I = Actions.begin(), E = Actions.end(); I != E; ++I) {
- const ActionEntry &Action = *I;
-
+ for (const ActionEntry &Action : Actions) {
if (VerboseAsm) {
// Emit comments that decode the action table.
Asm->OutStreamer->AddComment(">> Action Record " + Twine(++Entry) + " <<");
diff --git a/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 59a84e6f2d7b..70777f07fc6c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -14,10 +14,9 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/BuiltinGCs.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/BuiltinGCs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@@ -79,11 +78,10 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AP.emitInt16(MD.size());
// And each safe point...
- for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE;
- ++PI) {
+ for (const GCPoint &P : MD) {
// Emit the address of the safe point.
OS.AddComment("safe point address");
- MCSymbol *Label = PI->Label;
+ MCSymbol *Label = P.Label;
AP.emitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 354b638b47a2..a9fb31d42679 100644
--- a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -14,9 +14,9 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/BuiltinGCs.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/BuiltinGCs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Mangler.h"
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index e8636052c54c..35a830f416f6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -20,31 +20,6 @@
using namespace llvm;
-#define DEBUG_TYPE "pseudoprobe"
-
-PseudoProbeHandler::~PseudoProbeHandler() = default;
-
-PseudoProbeHandler::PseudoProbeHandler(AsmPrinter *A, Module *M) : Asm(A) {
- NamedMDNode *FuncInfo = M->getNamedMetadata(PseudoProbeDescMetadataName);
- assert(FuncInfo && "Pseudo probe descriptors are missing");
- for (const auto *Operand : FuncInfo->operands()) {
- const auto *MD = cast<MDNode>(Operand);
- auto GUID =
- mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
- auto Name = cast<MDString>(MD->getOperand(2))->getString();
- // We may see pairs with same name but different GUIDs here in LTO mode, due
- // to static same-named functions inlined from other modules into this
- // module. Function profiles with the same name will be merged no matter
- // whether they are collected on the same function. Therefore we just pick
- // up the last <Name, GUID> pair here to represent the same-named function
- // collection and all probes from the collection will be merged into a
- // single profile eventually.
- Names[Name] = GUID;
- }
-
- LLVM_DEBUG(dump());
-}
-
void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
uint64_t Type, uint64_t Attr,
const DILocation *DebugLoc) {
@@ -60,8 +35,7 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
auto Name = SP->getLinkageName();
if (Name.empty())
Name = SP->getName();
- assert(Names.count(Name) && "Pseudo probe descriptor missing for function");
- uint64_t CallerGuid = Names[Name];
+ uint64_t CallerGuid = Function::getGUID(Name);
uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex(
InlinedAt->getDiscriminator());
ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId);
@@ -72,13 +46,3 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
ReversedInlineStack.rend());
Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack);
}
-
-#ifndef NDEBUG
-void PseudoProbeHandler::dump() const {
- dbgs() << "\n=============================\n";
- dbgs() << "\nFunction Name to GUID map:\n";
- dbgs() << "\n=============================\n";
- for (const auto &Item : Names)
- dbgs() << "Func: " << Item.first << " GUID: " << Item.second << "\n";
-}
-#endif
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
index bea07ceae9d4..f2026a118bf5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
@@ -26,12 +26,9 @@ class DILocation;
class PseudoProbeHandler : public AsmPrinterHandler {
// Target of pseudo probe emission.
AsmPrinter *Asm;
- // Name to GUID map
- DenseMap<StringRef, uint64_t> Names;
public:
- PseudoProbeHandler(AsmPrinter *A, Module *M);
- ~PseudoProbeHandler() override;
+ PseudoProbeHandler(AsmPrinter *A) : Asm(A){};
void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attr, const DILocation *DebugLoc);
@@ -43,10 +40,6 @@ public:
void endFunction(const MachineFunction *MF) override {}
void beginInstruction(const MachineInstr *MI) override {}
void endInstruction() override {}
-
-#ifndef NDEBUG
- void dump() const;
-#endif
};
} // namespace llvm
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index 3a9c9df79783..b30d9cc12abc 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -55,6 +55,14 @@ void WinException::endModule() {
for (const Function &F : *M)
if (F.hasFnAttribute("safeseh"))
OS.EmitCOFFSafeSEH(Asm->getSymbol(&F));
+
+ if (M->getModuleFlag("ehcontguard") && !EHContTargets.empty()) {
+ // Emit the symbol index of each ehcont target.
+ OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection());
+ for (const MCSymbol *S : EHContTargets) {
+ OS.EmitCOFFSymbolIndex(S);
+ }
+ }
}
void WinException::beginFunction(const MachineFunction *MF) {
@@ -164,6 +172,12 @@ void WinException::endFunction(const MachineFunction *MF) {
Asm->OutStreamer->PopSection();
}
+
+ if (!MF->getCatchretTargets().empty()) {
+ // Copy the function's catchret targets to a module-level list.
+ EHContTargets.insert(EHContTargets.end(), MF->getCatchretTargets().begin(),
+ MF->getCatchretTargets().end());
+ }
}
/// Retrieve the MCSymbol for a GlobalValue or MachineBasicBlock.
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.h b/llvm/lib/CodeGen/AsmPrinter/WinException.h
index 8bd5d1bc6d2a..feea05ba63ad 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H
#include "EHStreamer.h"
+#include <vector>
namespace llvm {
class GlobalValue;
@@ -44,6 +45,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// The section of the last funclet start.
MCSection *CurrentFuncletTextSection = nullptr;
+ /// The list of symbols to add to the ehcont section
+ std::vector<const MCSymbol *> EHContTargets;
+
void emitCSpecificHandlerTable(const MachineFunction *MF);
void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 4026022caa07..125a3be585cb 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -78,12 +78,14 @@ namespace {
StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
bool expandAtomicStore(StoreInst *SI);
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
+ AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
Value *
insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
- AtomicOrdering MemOpOrder,
+ Align AddrAlign, AtomicOrdering MemOpOrder,
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
void expandAtomicOpToLLSC(
- Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
+ Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder,
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
void expandPartwordAtomicRMW(
AtomicRMWInst *I,
@@ -95,8 +97,8 @@ namespace {
AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
static Value *insertRMWCmpXchgLoop(
- IRBuilder<> &Builder, Type *ResultType, Value *Addr,
- AtomicOrdering MemOpOrder,
+ IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder, SyncScope::ID SSID,
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
CreateCmpXchgInstFun CreateCmpXchg);
bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
@@ -234,12 +236,13 @@ bool AtomicExpand::runOnFunction(Function &F) {
TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
TargetLoweringBase::AtomicExpansionKind::None &&
(isReleaseOrStronger(CASI->getSuccessOrdering()) ||
- isAcquireOrStronger(CASI->getSuccessOrdering()))) {
+ isAcquireOrStronger(CASI->getSuccessOrdering()) ||
+ isAcquireOrStronger(CASI->getFailureOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
// insertion, with a stronger one on the success path than on the
// failure path. As a result, fence insertion is directly done by
// expandAtomicCmpXchg in that case.
- FenceOrdering = CASI->getSuccessOrdering();
+ FenceOrdering = CASI->getMergedOrdering();
CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
CASI->setFailureOrdering(AtomicOrdering::Monotonic);
}
@@ -280,9 +283,18 @@ bool AtomicExpand::runOnFunction(Function &F) {
if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
MadeChange = true;
} else {
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ if (Op == AtomicRMWInst::Xchg &&
+ RMWI->getValOperand()->getType()->isFloatingPointTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ RMWI = convertAtomicXchgToIntegerType(RMWI);
+ assert(RMWI->getValOperand()->getType()->isIntegerTy() &&
+ "invariant broken");
+ MadeChange = true;
+ }
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(RMWI);
- AtomicRMWInst::BinOp Op = RMWI->getOperation();
if (ValueSize < MinCASSize &&
(Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
Op == AtomicRMWInst::And)) {
@@ -362,13 +374,40 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
return NewLI;
}
+AtomicRMWInst *
+AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
+ auto *M = RMWI->getModule();
+ Type *NewTy =
+ getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
+
+ IRBuilder<> Builder(RMWI);
+
+ Value *Addr = RMWI->getPointerOperand();
+ Value *Val = RMWI->getValOperand();
+ Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+ Value *NewVal = Builder.CreateBitCast(Val, NewTy);
+
+ auto *NewRMWI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
+ RMWI->getAlign(), RMWI->getOrdering());
+ NewRMWI->setVolatile(RMWI->isVolatile());
+ LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
+
+ Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType());
+ RMWI->replaceAllUsesWith(NewRVal);
+ RMWI->eraseFromParent();
+ return NewRMWI;
+}
+
bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC:
expandAtomicOpToLLSC(
- LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
+ LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
+ LI->getOrdering(),
[](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
return true;
case TargetLoweringBase::AtomicExpansionKind::LLOnly:
@@ -386,8 +425,8 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
// On some architectures, load-linked instructions are atomic for larger
// sizes than normal loads. For example, the only 64-bit load guaranteed
// to be single-copy atomic by ARM is an ldrexd (A3.5.3).
- Value *Val =
- TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
+ Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
+ LI->getPointerOperand(), LI->getOrdering());
TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
LI->replaceAllUsesWith(Val);
@@ -403,11 +442,11 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
Order = AtomicOrdering::Monotonic;
Value *Addr = LI->getPointerOperand();
- Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+ Type *Ty = LI->getType();
Constant *DummyVal = Constant::getNullValue(Ty);
Value *Pair = Builder.CreateAtomicCmpXchg(
- Addr, DummyVal, DummyVal, Order,
+ Addr, DummyVal, DummyVal, LI->getAlign(), Order,
AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
@@ -454,9 +493,9 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
// It is the responsibility of the target to only signal expansion via
// shouldExpandAtomicRMW in cases where this is required and possible.
IRBuilder<> Builder(SI);
- AtomicRMWInst *AI =
- Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
- SI->getValueOperand(), SI->getOrdering());
+ AtomicRMWInst *AI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
+ SI->getAlign(), SI->getOrdering());
SI->eraseFromParent();
// Now we have an appropriate swap instruction, lower it as usual.
@@ -464,8 +503,8 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
}
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
- Value *Loaded, Value *NewVal,
- AtomicOrdering MemOpOrder,
+ Value *Loaded, Value *NewVal, Align AddrAlign,
+ AtomicOrdering MemOpOrder, SyncScope::ID SSID,
Value *&Success, Value *&NewLoaded) {
Type *OrigTy = NewVal->getType();
@@ -479,9 +518,9 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
Loaded = Builder.CreateBitCast(Loaded, IntTy);
}
- Value* Pair = Builder.CreateAtomicCmpXchg(
- Addr, Loaded, NewVal, MemOpOrder,
- AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
Success = Builder.CreateExtractValue(Pair, 1, "success");
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
@@ -546,7 +585,7 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
AI->getValOperand());
};
expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
- AI->getOrdering(), PerformOp);
+ AI->getAlign(), AI->getOrdering(), PerformOp);
}
return true;
}
@@ -581,6 +620,7 @@ struct PartwordMaskValues {
Type *WordType = nullptr;
Type *ValueType = nullptr;
Value *AlignedAddr = nullptr;
+ Align AlignedAddrAlignment;
// The remaining fields can be null.
Value *ShiftAmt = nullptr;
Value *Mask = nullptr;
@@ -603,6 +643,7 @@ raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
PrintObj(PMV.ValueType);
O << " AlignedAddr: ";
PrintObj(PMV.AlignedAddr);
+ O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
O << " ShiftAmt: ";
PrintObj(PMV.ShiftAmt);
O << " Mask: ";
@@ -633,6 +674,7 @@ raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
/// Inv_Mask: The inverse of Mask.
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
Type *ValueType, Value *Addr,
+ Align AddrAlign,
unsigned MinWordSize) {
PartwordMaskValues PMV;
@@ -646,6 +688,9 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
: ValueType;
if (PMV.ValueType == PMV.WordType) {
PMV.AlignedAddr = Addr;
+ PMV.AlignedAddrAlignment = AddrAlign;
+ PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
+ PMV.Mask = ConstantInt::get(PMV.ValueType, ~0);
return PMV;
}
@@ -654,10 +699,12 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
Type *WordPtrType =
PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
+ // TODO: we could skip some of this if AddrAlign >= MinWordSize.
Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
PMV.AlignedAddr = Builder.CreateIntToPtr(
Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType,
"AlignedAddr");
+ PMV.AlignedAddrAlignment = Align(MinWordSize);
Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
if (DL.isLittleEndian()) {
@@ -760,12 +807,13 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
void AtomicExpand::expandPartwordAtomicRMW(
AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
AtomicOrdering MemOpOrder = AI->getOrdering();
+ SyncScope::ID SSID = AI->getSyncScopeID();
IRBuilder<> Builder(AI);
PartwordMaskValues PMV =
createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
- TLI->getMinCmpXchgSizeInBits() / 8);
+ AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
Value *ValOperand_Shifted =
Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
@@ -778,13 +826,15 @@ void AtomicExpand::expandPartwordAtomicRMW(
Value *OldResult;
if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
- OldResult =
- insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
- PerformPartwordOp, createCmpXchgInstFun);
+ OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
+ PMV.AlignedAddrAlignment, MemOpOrder,
+ SSID, PerformPartwordOp,
+ createCmpXchgInstFun);
} else {
assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
- MemOpOrder, PerformPartwordOp);
+ PMV.AlignedAddrAlignment, MemOpOrder,
+ PerformPartwordOp);
}
Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
@@ -803,7 +853,7 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
PartwordMaskValues PMV =
createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
- TLI->getMinCmpXchgSizeInBits() / 8);
+ AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
Value *ValOperand_Shifted =
Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
@@ -817,8 +867,9 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
else
NewOperand = ValOperand_Shifted;
- AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
- NewOperand, AI->getOrdering());
+ AtomicRMWInst *NewAI =
+ Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand,
+ PMV.AlignedAddrAlignment, AI->getOrdering());
Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
AI->replaceAllUsesWith(FinalOldResult);
@@ -871,8 +922,6 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
IRBuilder<> Builder(CI);
LLVMContext &Ctx = Builder.getContext();
- const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
-
BasicBlock *EndBB =
BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
auto FailureBB =
@@ -884,8 +933,9 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- PartwordMaskValues PMV = createMaskInstrs(
- Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
+ CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
// Shift the incoming values over, into the right location in the word.
Value *NewVal_Shifted =
@@ -909,8 +959,8 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
- PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
- CI->getFailureOrdering(), CI->getSyncScopeID());
+ PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
+ CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID());
NewCI->setVolatile(CI->isVolatile());
// When we're building a strong cmpxchg, we need a loop, so you
// might think we could use a weak cmpxchg inside. But, using strong
@@ -953,11 +1003,12 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
}
void AtomicExpand::expandAtomicOpToLLSC(
- Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
+ Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder,
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
IRBuilder<> Builder(I);
- Value *Loaded =
- insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
+ Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
+ MemOpOrder, PerformOp);
I->replaceAllUsesWith(Loaded);
I->eraseFromParent();
@@ -968,7 +1019,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
PartwordMaskValues PMV =
createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
- TLI->getMinCmpXchgSizeInBits() / 8);
+ AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
// The value operand must be sign-extended for signed min/max so that the
// target's signed comparison instructions can be used. Otherwise, just
@@ -994,7 +1045,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
PartwordMaskValues PMV = createMaskInstrs(
Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
- TLI->getMinCmpXchgSizeInBits() / 8);
+ CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
Value *CmpVal_Shifted = Builder.CreateShl(
Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
@@ -1004,7 +1055,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
"NewVal_Shifted");
Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
- CI->getSuccessOrdering());
+ CI->getMergedOrdering());
Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
Value *Res = UndefValue::get(CI->getType());
Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
@@ -1017,13 +1068,17 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
}
Value *AtomicExpand::insertRMWLLSCLoop(
- IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
+ IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
AtomicOrdering MemOpOrder,
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
LLVMContext &Ctx = Builder.getContext();
BasicBlock *BB = Builder.GetInsertBlock();
Function *F = BB->getParent();
+ assert(AddrAlign >=
+ F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
+ "Expected at least natural alignment at this point.");
+
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
//
// The standard expansion we produce is:
@@ -1048,7 +1103,7 @@ Value *AtomicExpand::insertRMWLLSCLoop(
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
- Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
Value *NewVal = PerformOp(Builder, Loaded);
@@ -1082,11 +1137,9 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *
Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
-
- auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
- CI->getSuccessOrdering(),
- CI->getFailureOrdering(),
- CI->getSyncScopeID());
+ auto *NewCI = Builder.CreateAtomicCmpXchg(
+ NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
+ CI->getFailureOrdering(), CI->getSyncScopeID());
NewCI->setVolatile(CI->isVolatile());
NewCI->setWeak(CI->isWeak());
LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
@@ -1117,8 +1170,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
// should preserve the ordering.
bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
- AtomicOrdering MemOpOrder =
- ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
+ AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
+ ? AtomicOrdering::Monotonic
+ : CI->getMergedOrdering();
// In implementations which use a barrier to achieve release semantics, we can
// delay emitting this barrier until we know a store is actually going to be
@@ -1211,13 +1265,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
PartwordMaskValues PMV =
createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
- TLI->getMinCmpXchgSizeInBits() / 8);
+ CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
Builder.CreateBr(StartBB);
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(StartBB);
Value *UnreleasedLoad =
- TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder);
+ TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
Value *UnreleasedLoadExtract =
extractMaskedValue(Builder, UnreleasedLoad, PMV);
Value *ShouldStore = Builder.CreateICmpEQ(
@@ -1250,7 +1304,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.SetInsertPoint(ReleasedLoadBB);
Value *SecondLoad;
if (HasReleasedLoadBB) {
- SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder);
+ SecondLoad =
+ TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
CI->getCompareOperand(), "should_store");
@@ -1379,8 +1434,8 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
}
Value *AtomicExpand::insertRMWCmpXchgLoop(
- IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
- AtomicOrdering MemOpOrder,
+ IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder, SyncScope::ID SSID,
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
CreateCmpXchgInstFun CreateCmpXchg) {
LLVMContext &Ctx = Builder.getContext();
@@ -1411,9 +1466,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
- // Atomics require at least natural alignment.
- InitLoaded->setAlignment(Align(ResultTy->getPrimitiveSizeInBits() / 8));
+ LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
@@ -1426,11 +1479,11 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
Value *NewLoaded = nullptr;
Value *Success = nullptr;
- CreateCmpXchg(Builder, Addr, Loaded, NewVal,
+ CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
MemOpOrder == AtomicOrdering::Unordered
? AtomicOrdering::Monotonic
: MemOpOrder,
- Success, NewLoaded);
+ SSID, Success, NewLoaded);
assert(Success && NewLoaded);
Loaded->addIncoming(NewLoaded, LoopBB);
@@ -1466,7 +1519,8 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg) {
IRBuilder<> Builder(AI);
Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
- Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
+ Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
+ AI->getOrdering(), AI->getSyncScopeID(),
[&](IRBuilder<> &Builder, Value *Loaded) {
return performAtomicOp(AI->getOperation(), Builder, Loaded,
AI->getValOperand());
@@ -1614,20 +1668,20 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
// libcalls (add/sub/etc) and we needed a generic. So, expand to a
// CAS libcall, via a CAS loop, instead.
if (!Success) {
- expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
- Value *Loaded, Value *NewVal,
- AtomicOrdering MemOpOrder,
- Value *&Success, Value *&NewLoaded) {
- // Create the CAS instruction normally...
- AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
- Addr, Loaded, NewVal, MemOpOrder,
- AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
- Success = Builder.CreateExtractValue(Pair, 1, "success");
- NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
-
- // ...and then expand the CAS into a libcall.
- expandAtomicCASToLibcall(Pair);
- });
+ expandAtomicRMWToCmpXchg(
+ I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded,
+ Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
+ SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
+ // Create the CAS instruction normally...
+ AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, Alignment, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
+ Success = Builder.CreateExtractValue(Pair, 1, "success");
+ NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+
+ // ...and then expand the CAS into a libcall.
+ expandAtomicCASToLibcall(Pair);
+ });
}
}
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 7499ea8b42d4..1a6eed272ca2 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -88,6 +88,12 @@ cl::opt<std::string> llvm::BBSectionsColdTextPrefix(
cl::desc("The text prefix to use for cold basic block clusters"),
cl::init(".text.split."), cl::Hidden);
+cl::opt<bool> BBSectionsDetectSourceDrift(
+ "bbsections-detect-source-drift",
+ cl::desc("This checks if there is a fdo instr. profile hash "
+ "mismatch for this function"),
+ cl::init(true), cl::Hidden);
+
namespace {
// This struct represents the cluster information for a machine basic block.
@@ -303,20 +309,51 @@ static bool avoidZeroOffsetLandingPad(MachineFunction &MF) {
MachineBasicBlock::iterator MI = MBB.begin();
while (!MI->isEHLabel())
++MI;
- MCInst Noop;
- MF.getSubtarget().getInstrInfo()->getNoop(Noop);
+ MCInst Nop = MF.getSubtarget().getInstrInfo()->getNop();
BuildMI(MBB, MI, DebugLoc(),
- MF.getSubtarget().getInstrInfo()->get(Noop.getOpcode()));
+ MF.getSubtarget().getInstrInfo()->get(Nop.getOpcode()));
return false;
}
}
return true;
}
+// This checks if the source of this function has drifted since this binary was
+// profiled previously. For now, we are piggy backing on what PGO does to
+// detect this with instrumented profiles. PGO emits an hash of the IR and
+// checks if the hash has changed. Advanced basic block layout is usually done
+// on top of PGO optimized binaries and hence this check works well in practice.
+static bool hasInstrProfHashMismatch(MachineFunction &MF) {
+ if (!BBSectionsDetectSourceDrift)
+ return false;
+
+ const char MetadataName[] = "instr_prof_hash_mismatch";
+ auto *Existing = MF.getFunction().getMetadata(LLVMContext::MD_annotation);
+ if (Existing) {
+ MDTuple *Tuple = cast<MDTuple>(Existing);
+ for (auto &N : Tuple->operands())
+ if (cast<MDString>(N.get())->getString() == MetadataName)
+ return true;
+ }
+
+ return false;
+}
+
bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
auto BBSectionsType = MF.getTarget().getBBSectionsType();
assert(BBSectionsType != BasicBlockSection::None &&
"BB Sections not enabled!");
+
+ // Check for source drift. If the source has changed since the profiles
+ // were obtained, optimizing basic blocks might be sub-optimal.
+ // This only applies to BasicBlockSection::List as it creates
+ // clusters of basic blocks using basic block ids. Source drift can
+ // invalidate these groupings leading to sub-optimal code generation with
+ // regards to performance.
+ if (BBSectionsType == BasicBlockSection::List &&
+ hasInstrProfHashMismatch(MF))
+ return true;
+
// Renumber blocks before sorting them for basic block sections. This is
// useful during sorting, basic blocks in the same section will retain the
// default order. This renumbering should also be done for basic block
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index fd3f465fb390..65e7e92fe152 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -164,10 +164,10 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
TriedMerging.erase(MBB);
// Update call site info.
- std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) {
+ for (const MachineInstr &MI : *MBB)
if (MI.shouldUpdateCallSiteInfo())
MF->eraseCallSiteInfo(&MI);
- });
+
// Remove the block.
MF->erase(MBB);
EHScopeMembership.erase(MBB);
@@ -286,7 +286,7 @@ static unsigned HashMachineInstr(const MachineInstr &MI) {
/// HashEndOfMBB - Hash the last instruction in the MBB.
static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
- MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
+ MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(false);
if (I == MBB.end())
return 0;
@@ -566,9 +566,9 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// Move the iterators to the beginning of the MBB if we only got debug
// instructions before the tail. This is to avoid splitting a block when we
// only got debug instructions before the tail (to be invariant on -g).
- if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1)
+ if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end(), false) == I1)
I1 = MBB1->begin();
- if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2)
+ if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end(), false) == I2)
I2 = MBB2->begin();
bool FullBlockTail1 = I1 == MBB1->begin();
@@ -1217,7 +1217,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
// Blocks should be considered empty if they contain only debug info;
// else the debug info would affect codegen.
static bool IsEmptyBlock(MachineBasicBlock *MBB) {
- return MBB->getFirstNonDebugInstr() == MBB->end();
+ return MBB->getFirstNonDebugInstr(true) == MBB->end();
}
// Blocks with only debug info and branches should be considered the same
@@ -1919,8 +1919,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator FIE = FBB->end();
while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
- TIB = skipDebugInstructionsForward(TIB, TIE);
- FIB = skipDebugInstructionsForward(FIB, FIE);
+ TIB = skipDebugInstructionsForward(TIB, TIE, false);
+ FIB = skipDebugInstructionsForward(FIB, FIE, false);
if (TIB == TIE || FIB == FIE)
break;
diff --git a/llvm/lib/CodeGen/BuiltinGCs.cpp b/llvm/lib/CodeGen/BuiltinGCs.cpp
deleted file mode 100644
index bfc10cb3fef2..000000000000
--- a/llvm/lib/CodeGen/BuiltinGCs.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-//===- BuiltinGCs.cpp - Boilerplate for our built in GC types -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the boilerplate required to define our various built in
-// gc lowering strategies.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/BuiltinGCs.h"
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/Support/Casting.h"
-
-using namespace llvm;
-
-namespace {
-
-/// An example GC which attempts to be compatibile with Erlang/OTP garbage
-/// collector.
-///
-/// The frametable emitter is in ErlangGCPrinter.cpp.
-class ErlangGC : public GCStrategy {
-public:
- ErlangGC() {
- NeededSafePoints = true;
- UsesMetadata = true;
- }
-};
-
-/// An example GC which attempts to be compatible with Objective Caml 3.10.0
-///
-/// The frametable emitter is in OcamlGCPrinter.cpp.
-class OcamlGC : public GCStrategy {
-public:
- OcamlGC() {
- NeededSafePoints = true;
- UsesMetadata = true;
- }
-};
-
-/// A GC strategy for uncooperative targets. This implements lowering for the
-/// llvm.gc* intrinsics for targets that do not natively support them (which
-/// includes the C backend). Note that the code generated is not quite as
-/// efficient as algorithms which generate stack maps to identify roots.
-///
-/// In order to support this particular transformation, all stack roots are
-/// coallocated in the stack. This allows a fully target-independent stack map
-/// while introducing only minor runtime overhead.
-class ShadowStackGC : public GCStrategy {
-public:
- ShadowStackGC() {}
-};
-
-/// A GCStrategy which serves as an example for the usage of a statepoint based
-/// lowering strategy. This GCStrategy is intended to suitable as a default
-/// implementation usable with any collector which can consume the standard
-/// stackmap format generated by statepoints, uses the default addrespace to
-/// distinguish between gc managed and non-gc managed pointers, and has
-/// reasonable relocation semantics.
-class StatepointGC : public GCStrategy {
-public:
- StatepointGC() {
- UseStatepoints = true;
- // These options are all gc.root specific, we specify them so that the
- // gc.root lowering code doesn't run.
- NeededSafePoints = false;
- UsesMetadata = false;
- }
-
- Optional<bool> isGCManagedPointer(const Type *Ty) const override {
- // Method is only valid on pointer typed values.
- const PointerType *PT = cast<PointerType>(Ty);
- // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
- // GC managed heap. We know that a pointer into this heap needs to be
- // updated and that no other pointer does. Note that addrspace(1) is used
- // only as an example, it has no special meaning, and is not reserved for
- // GC usage.
- return (1 == PT->getAddressSpace());
- }
-};
-
-/// A GCStrategy for the CoreCLR Runtime. The strategy is similar to
-/// Statepoint-example GC, but differs from it in certain aspects, such as:
-/// 1) Base-pointers need not be explicitly tracked and reported for
-/// interior pointers
-/// 2) Uses a different format for encoding stack-maps
-/// 3) Location of Safe-point polls: polls are only needed before loop-back
-/// edges and before tail-calls (not needed at function-entry)
-///
-/// The above differences in behavior are to be implemented in upcoming
-/// checkins.
-class CoreCLRGC : public GCStrategy {
-public:
- CoreCLRGC() {
- UseStatepoints = true;
- // These options are all gc.root specific, we specify them so that the
- // gc.root lowering code doesn't run.
- NeededSafePoints = false;
- UsesMetadata = false;
- }
-
- Optional<bool> isGCManagedPointer(const Type *Ty) const override {
- // Method is only valid on pointer typed values.
- const PointerType *PT = cast<PointerType>(Ty);
- // We pick addrspace(1) as our GC managed heap.
- return (1 == PT->getAddressSpace());
- }
-};
-
-} // end anonymous namespace
-
-// Register all the above so that they can be found at runtime. Note that
-// these static initializers are important since the registration list is
-// constructed from their storage.
-static GCRegistry::Add<ErlangGC> A("erlang",
- "erlang-compatible garbage collector");
-static GCRegistry::Add<OcamlGC> B("ocaml", "ocaml 3.10-compatible GC");
-static GCRegistry::Add<ShadowStackGC>
- C("shadow-stack", "Very portable GC for uncooperative code generators");
-static GCRegistry::Add<StatepointGC> D("statepoint-example",
- "an example strategy for statepoint");
-static GCRegistry::Add<CoreCLRGC> E("coreclr", "CoreCLR-compatible GC");
-
-// Provide hook to ensure the containing library is fully loaded.
-void llvm::linkAllBuiltinGCs() {}
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 23c7fea01f28..1c2e3f998449 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -157,7 +157,7 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
// Initialize MBBMap.
for (MachineBasicBlock &MBB : MF) {
- MBBCFAInfo MBBInfo;
+ MBBCFAInfo &MBBInfo = MBBVector[MBB.getNumber()];
MBBInfo.MBB = &MBB;
MBBInfo.IncomingCFAOffset = InitialOffset;
MBBInfo.OutgoingCFAOffset = InitialOffset;
@@ -165,7 +165,6 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
MBBInfo.OutgoingCFARegister = InitialRegister;
MBBInfo.IncomingCSRSaved.resize(NumRegs);
MBBInfo.OutgoingCSRSaved.resize(NumRegs);
- MBBVector[MBB.getNumber()] = MBBInfo;
}
CSRLocMap.clear();
@@ -220,6 +219,14 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
case MCCFIInstruction::OpRestore:
CSRRestored.set(CFI.getRegister());
break;
+ case MCCFIInstruction::OpLLVMDefAspaceCfa:
+ // TODO: Add support for handling cfi_def_aspace_cfa.
+#ifndef NDEBUG
+ report_fatal_error(
+ "Support for cfi_llvm_def_aspace_cfa not implemented! Value of CFA "
+ "may be incorrect!\n");
+#endif
+ break;
case MCCFIInstruction::OpRememberState:
// TODO: Add support for handling cfi_remember_state.
#ifndef NDEBUG
@@ -265,9 +272,9 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
MBBInfo.OutgoingCFARegister = SetRegister;
// Update outgoing CSR info.
- MBBInfo.OutgoingCSRSaved = MBBInfo.IncomingCSRSaved;
- MBBInfo.OutgoingCSRSaved |= CSRSaved;
- MBBInfo.OutgoingCSRSaved.reset(CSRRestored);
+ BitVector::apply([](auto x, auto y, auto z) { return (x | y) & ~z; },
+ MBBInfo.OutgoingCSRSaved, MBBInfo.IncomingCSRSaved, CSRSaved,
+ CSRRestored);
}
void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) {
@@ -295,6 +302,7 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
bool InsertedCFIInstr = false;
+ BitVector SetDifference;
for (MachineBasicBlock &MBB : MF) {
// Skip the first MBB in a function
if (MBB.getNumber() == MF.front().getNumber()) continue;
@@ -346,8 +354,8 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
continue;
}
- BitVector SetDifference = PrevMBBInfo->OutgoingCSRSaved;
- SetDifference.reset(MBBInfo.IncomingCSRSaved);
+ BitVector::apply([](auto x, auto y) { return x & ~y; }, SetDifference,
+ PrevMBBInfo->OutgoingCSRSaved, MBBInfo.IncomingCSRSaved);
for (int Reg : SetDifference.set_bits()) {
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg));
@@ -356,8 +364,8 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
InsertedCFIInstr = true;
}
- SetDifference = MBBInfo.IncomingCSRSaved;
- SetDifference.reset(PrevMBBInfo->OutgoingCSRSaved);
+ BitVector::apply([](auto x, auto y) { return x & ~y; }, SetDifference,
+ MBBInfo.IncomingCSRSaved, PrevMBBInfo->OutgoingCSRSaved);
for (int Reg : SetDifference.set_bits()) {
auto it = CSRLocMap.find(Reg);
assert(it != CSRLocMap.end() && "Reg should have an entry in CSRLocMap");
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 16f380c1eb62..863a0e1e0b56 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/StackMaps.h"
#include <cassert>
#include <tuple>
@@ -125,6 +126,16 @@ static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS,
return true;
}
+bool VirtRegAuxInfo::isLiveAtStatepointVarArg(LiveInterval &LI) {
+ return any_of(VRM.getRegInfo().reg_operands(LI.reg()),
+ [](MachineOperand &MO) {
+ MachineInstr *MI = MO.getParent();
+ if (MI->getOpcode() != TargetOpcode::STATEPOINT)
+ return false;
+ return StatepointOpers(MI).getVarIdx() <= MI->getOperandNo(&MO);
+ });
+}
+
void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) {
float Weight = weightCalcHelper(LI);
// Check if unspillable.
@@ -290,9 +301,15 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// Mark li as unspillable if all live ranges are tiny and the interval
// is not live at any reg mask. If the interval is live at a reg mask
- // spilling may be required.
+ // spilling may be required. If li is live as use in statepoint instruction
+ // spilling may be required due to if we mark interval with use in statepoint
+ // as not spillable we are risky to end up with no register to allocate.
+ // At the same time STATEPOINT instruction is perfectly fine to have this
+ // operand on stack, so spilling such interval and folding its load from stack
+ // into instruction itself makes perfect sense.
if (ShouldUpdateLI && LI.isZeroLength(LIS.getSlotIndexes()) &&
- !LI.isLiveAtIndexes(LIS.getRegMaskSlots())) {
+ !LI.isLiveAtIndexes(LIS.getRegMaskSlots()) &&
+ !isLiveAtStatepointVarArg(LI)) {
LI.markNotSpillable();
return -1.0;
}
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index d2400d0371e3..e0e2db9f4725 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -97,8 +97,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRegUsageInfoCollectorPass(Registry);
initializeRegUsageInfoPropagationPass(Registry);
initializeRegisterCoalescerPass(Registry);
+ initializeRemoveRedundantDebugValuesPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
initializeSafeStackLegacyPassPass(Registry);
+ initializeShadowStackGCLoweringPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSjLjEHPreparePass(Registry);
initializeSlotIndexesPass(Registry);
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b2bc75c19709..77ce3d2fb563 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -46,6 +46,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -377,6 +378,7 @@ class TypePromotionTransaction;
}
void removeAllAssertingVHReferences(Value *V);
+ bool eliminateAssumptions(Function &F);
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -404,6 +406,7 @@ class TypePromotionTransaction;
bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
bool fixupDbgValue(Instruction *I);
bool placeDbgValues(Function &F);
+ bool placePseudoProbes(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
bool tryToPromoteExts(TypePromotionTransaction &TPT,
@@ -506,6 +509,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
}
}
+ // Get rid of @llvm.assume builtins before attempting to eliminate empty
+ // blocks, since there might be blocks that only contain @llvm.assume calls
+ // (plus arguments that we can get rid of).
+ EverMadeChange |= eliminateAssumptions(F);
+
// Eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= eliminateMostlyEmptyBlocks(F);
@@ -566,10 +574,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= ConstantFoldTerminator(&BB, true);
if (!MadeChange) continue;
- for (SmallVectorImpl<BasicBlock*>::iterator
- II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_empty(*II))
- WorkList.insert(*II);
+ for (BasicBlock *Succ : Successors)
+ if (pred_empty(Succ))
+ WorkList.insert(Succ);
}
// Delete the dead blocks and any of their dead successors.
@@ -580,10 +587,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
DeleteDeadBlock(BB);
- for (SmallVectorImpl<BasicBlock*>::iterator
- II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_empty(*II))
- WorkList.insert(*II);
+ for (BasicBlock *Succ : Successors)
+ if (pred_empty(Succ))
+ WorkList.insert(Succ);
}
// Merge pairs of basic blocks with unconditional branches, connected by
@@ -607,6 +613,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Do this last to clean up use-before-def scenarios introduced by other
// preparatory transforms.
EverMadeChange |= placeDbgValues(F);
+ EverMadeChange |= placePseudoProbes(F);
#ifndef NDEBUG
if (VerifyBFIUpdates)
@@ -616,6 +623,26 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
+bool CodeGenPrepare::eliminateAssumptions(Function &F) {
+ bool MadeChange = false;
+ for (BasicBlock &BB : F) {
+ CurInstIterator = BB.begin();
+ while (CurInstIterator != BB.end()) {
+ Instruction *I = &*(CurInstIterator++);
+ if (auto *Assume = dyn_cast<AssumeInst>(I)) {
+ MadeChange = true;
+ Value *Operand = Assume->getOperand(0);
+ Assume->eraseFromParent();
+
+ resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
+ RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
+ });
+ }
+ }
+ }
+ return MadeChange;
+}
+
/// An instruction is about to be deleted, so remove all references to it in our
/// GEP-tracking data strcutures.
void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
@@ -780,8 +807,8 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
// Skip merging if the block's successor is also a successor to any callbr
// that leads to this block.
// FIXME: Is this really needed? Is this a correctness issue?
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (auto *CBI = dyn_cast<CallBrInst>((Pred)->getTerminator()))
for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
if (DestBB == CBI->getSuccessor(i))
return false;
@@ -822,9 +849,7 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
// Find all other incoming blocks from which incoming values of all PHIs in
// DestBB are the same as the ones from BB.
- for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
- ++PI) {
- BasicBlock *DestBBPred = *PI;
+ for (BasicBlock *DestBBPred : predecessors(DestBB)) {
if (DestBBPred == BB)
continue;
@@ -964,8 +989,8 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
} else {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- PN.addIncoming(InVal, *PI);
+ for (BasicBlock *Pred : predecessors(BB))
+ PN.addIncoming(InVal, Pred);
}
}
}
@@ -1280,11 +1305,83 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
return SinkCast(CI);
}
+// Match a simple increment by constant operation. Note that if a sub is
+// matched, the step is negated (as if the step had been canonicalized to
+// an add, even though we leave the instruction alone.)
+bool matchIncrement(const Instruction* IVInc, Instruction *&LHS,
+ Constant *&Step) {
+ if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
+ match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
+ m_Instruction(LHS), m_Constant(Step)))))
+ return true;
+ if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
+ match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
+ m_Instruction(LHS), m_Constant(Step))))) {
+ Step = ConstantExpr::getNeg(Step);
+ return true;
+ }
+ return false;
+}
+
+/// If given \p PN is an inductive variable with value IVInc coming from the
+/// backedge, and on each iteration it gets increased by Step, return pair
+/// <IVInc, Step>. Otherwise, return None.
+static Optional<std::pair<Instruction *, Constant *> >
+getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
+ const Loop *L = LI->getLoopFor(PN->getParent());
+ if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
+ return None;
+ auto *IVInc =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
+ if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
+ return None;
+ Instruction *LHS = nullptr;
+ Constant *Step = nullptr;
+ if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
+ return std::make_pair(IVInc, Step);
+ return None;
+}
+
+static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+ Instruction *LHS = nullptr;
+ Constant *Step = nullptr;
+ if (!matchIncrement(I, LHS, Step))
+ return false;
+ if (auto *PN = dyn_cast<PHINode>(LHS))
+ if (auto IVInc = getIVIncrement(PN, LI))
+ return IVInc->first == I;
+ return false;
+}
+
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
Value *Arg0, Value *Arg1,
CmpInst *Cmp,
Intrinsic::ID IID) {
- if (BO->getParent() != Cmp->getParent()) {
+ auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
+ if (!isIVIncrement(BO, LI))
+ return false;
+ const Loop *L = LI->getLoopFor(BO->getParent());
+ assert(L && "L should not be null after isIVIncrement()");
+ // Do not risk on moving increment into a child loop.
+ if (LI->getLoopFor(Cmp->getParent()) != L)
+ return false;
+
+ // Finally, we need to ensure that the insert point will dominate all
+ // existing uses of the increment.
+
+ auto &DT = getDT(*BO->getParent()->getParent());
+ if (DT.dominates(Cmp->getParent(), BO->getParent()))
+ // If we're moving up the dom tree, all uses are trivially dominated.
+ // (This is the common case for code produced by LSR.)
+ return true;
+
+ // Otherwise, special case the single use in the phi recurrence.
+ return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
+ };
+ if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
// We used to use a dominator tree here to allow multi-block optimization.
// But that was problematic because:
// 1. It could cause a perf regression by hoisting the math op into the
@@ -1295,6 +1392,14 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
// This is because we recompute the DT on every change in the main CGP
// run-loop. The recomputing is probably unnecessary in many cases, so if
// that was fixed, using a DT here would be ok.
+ //
+ // There is one important particular case we still want to handle: if BO is
+ // the IV increment. Important properties that make it profitable:
+ // - We can speculate IV increment anywhere in the loop (as long as the
+ // indvar Phi is its only user);
+ // - Upon computing Cmp, we effectively compute something equivalent to the
+ // IV increment (despite it loops differently in the IR). So moving it up
+ // to the cmp point does not really increase register pressure.
return false;
}
@@ -1936,6 +2041,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
return false;
+ // Bail if the value is never zero.
+ if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL))
+ return false;
+
// The intrinsic will be sunk behind a compare against zero and branch.
BasicBlock *StartBlock = CountZeros->getParent();
BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
@@ -2061,18 +2170,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
if (II) {
switch (II->getIntrinsicID()) {
default: break;
- case Intrinsic::assume: {
- Value *Operand = II->getOperand(0);
- II->eraseFromParent();
- // Prune the operand, it's most likely dead.
- resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
- RecursivelyDeleteTriviallyDeadInstructions(
- Operand, TLInfo, nullptr,
- [&](Value *V) { removeAllAssertingVHReferences(V); });
- });
- return true;
- }
-
+ case Intrinsic::assume:
+ llvm_unreachable("llvm.assume should have been removed already");
case Intrinsic::experimental_widenable_condition: {
// Give up on future widening oppurtunties so that we can fold away dead
// paths and merge blocks before going into block-local instruction
@@ -2242,21 +2341,25 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
if (PN && PN->getParent() != BB)
return false;
- // Make sure there are no instructions between the PHI and return, or that the
- // return is the first instruction in the block.
- if (PN) {
- BasicBlock::iterator BI = BB->begin();
- // Skip over debug and the bitcast.
- do {
- ++BI;
- } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI ||
- isa<PseudoProbeInst>(BI));
- if (&*BI != RetI)
- return false;
- } else {
- if (BB->getFirstNonPHIOrDbg(true) != RetI)
- return false;
- }
+ auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
+ const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
+ if (BC && BC->hasOneUse())
+ Inst = BC->user_back();
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ return II->getIntrinsicID() == Intrinsic::lifetime_end;
+ return false;
+ };
+
+ // Make sure there are no instructions between the first instruction
+ // and return.
+ const Instruction *BI = BB->getFirstNonPHI();
+ // Skip over debug and the bitcast.
+ while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
+ isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
+ BI = BI->getNextNode();
+ if (BI != RetI)
+ return false;
/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
/// call.
@@ -2276,14 +2379,14 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
}
} else {
SmallPtrSet<BasicBlock*, 4> VisitedBBs;
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
- if (!VisitedBBs.insert(*PI).second)
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (!VisitedBBs.insert(Pred).second)
continue;
- if (Instruction *I = (*PI)->rbegin()->getPrevNonDebugInstruction(true)) {
+ if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
CallInst *CI = dyn_cast<CallInst>(I);
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
attributesPermitTailCall(F, CI, RetI, *TLI))
- TailCallBBs.push_back(*PI);
+ TailCallBBs.push_back(Pred);
}
}
}
@@ -2775,11 +2878,16 @@ class TypePromotionTransaction {
/// Keep track of the debug users.
SmallVector<DbgValueInst *, 1> DbgValues;
+ /// Keep track of the new value so that we can undo it by replacing
+ /// instances of the new value with the original value.
+ Value *New;
+
using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
public:
/// Replace all the use of \p Inst by \p New.
- UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
+ UsesReplacer(Instruction *Inst, Value *New)
+ : TypePromotionAction(Inst), New(New) {
LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
<< "\n");
// Record the original uses.
@@ -2798,20 +2906,14 @@ class TypePromotionTransaction {
/// Reassign the original uses of Inst to Inst.
void undo() override {
LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
- for (use_iterator UseIt = OriginalUses.begin(),
- EndIt = OriginalUses.end();
- UseIt != EndIt; ++UseIt) {
- UseIt->Inst->setOperand(UseIt->Idx, Inst);
- }
+ for (InstructionAndIdx &Use : OriginalUses)
+ Use.Inst->setOperand(Use.Idx, Inst);
// RAUW has replaced all original uses with references to the new value,
// including the debug uses. Since we are undoing the replacements,
// the original debug uses must also be reinstated to maintain the
// correctness and utility of debug value instructions.
- for (auto *DVI: DbgValues) {
- LLVMContext &Ctx = Inst->getType()->getContext();
- auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
- DVI->setOperand(0, MV);
- }
+ for (auto *DVI : DbgValues)
+ DVI->replaceVariableLocationOp(New, Inst);
}
};
@@ -2981,9 +3083,8 @@ TypePromotionTransaction::getRestorationPoint() const {
}
bool TypePromotionTransaction::commit() {
- for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
- ++It)
- (*It)->commit();
+ for (std::unique_ptr<TypePromotionAction> &Action : Actions)
+ Action->commit();
bool Modified = !Actions.empty();
Actions.clear();
return Modified;
@@ -3007,6 +3108,8 @@ class AddressingModeMatcher {
const TargetLowering &TLI;
const TargetRegisterInfo &TRI;
const DataLayout &DL;
+ const LoopInfo &LI;
+ const std::function<const DominatorTree &()> getDTFn;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
/// the memory instruction that we're computing this address for.
@@ -3042,16 +3145,18 @@ class AddressingModeMatcher {
AddressingModeMatcher(
SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
- const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
- ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
- InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
+ const TargetRegisterInfo &TRI, const LoopInfo &LI,
+ const std::function<const DominatorTree &()> getDTFn,
+ Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM,
+ const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
+ TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
- DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
- MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
- PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
- OptSize(OptSize), PSI(PSI), BFI(BFI) {
+ DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
+ AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
+ InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
+ LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
IgnoreProfitability = false;
}
@@ -3066,18 +3171,18 @@ public:
static ExtAddrMode
Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
SmallVectorImpl<Instruction *> &AddrModeInsts,
- const TargetLowering &TLI, const TargetRegisterInfo &TRI,
- const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
- TypePromotionTransaction &TPT,
+ const TargetLowering &TLI, const LoopInfo &LI,
+ const std::function<const DominatorTree &()> getDTFn,
+ const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
+ InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
- MemoryInst, Result, InsertedInsts,
- PromotedInsts, TPT, LargeOffsetGEP,
- OptSize, PSI, BFI)
- .matchAddr(V, 0);
+ bool Success = AddressingModeMatcher(
+ AddrModeInsts, TLI, TRI, LI, getDTFn, AccessTy, AS, MemoryInst, Result,
+ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
+ BFI).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
}
@@ -3773,11 +3878,12 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
// to see if ScaleReg is actually X+C. If so, we can turn this into adding
- // X*Scale + C*Scale to addr mode.
+ // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
+ // go any further: we can reuse it and cannot eliminate it.
ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
- if (isa<Instruction>(ScaleReg) && // not a constant expr.
+ if (isa<Instruction>(ScaleReg) && // not a constant expr.
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
- CI->getValue().isSignedIntN(64)) {
+ !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
TestAddrMode.InBounds = false;
TestAddrMode.ScaledReg = AddLHS;
TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
@@ -3789,9 +3895,75 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
AddrMode = TestAddrMode;
return true;
}
+ // Restore status quo.
+ TestAddrMode = AddrMode;
+ }
+
+ // If this is an add recurrence with a constant step, return the increment
+ // instruction and the canonicalized step.
+ auto GetConstantStep = [this](const Value * V)
+ ->Optional<std::pair<Instruction *, APInt> > {
+ auto *PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return None;
+ auto IVInc = getIVIncrement(PN, &LI);
+ if (!IVInc)
+ return None;
+ // TODO: The result of the intrinsics above is two-compliment. However when
+ // IV inc is expressed as add or sub, iv.next is potentially a poison value.
+ // If it has nuw or nsw flags, we need to make sure that these flags are
+ // inferrable at the point of memory instruction. Otherwise we are replacing
+ // well-defined two-compliment computation with poison. Currently, to avoid
+ // potentially complex analysis needed to prove this, we reject such cases.
+ if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
+ if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
+ return None;
+ if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
+ return std::make_pair(IVInc->first, ConstantStep->getValue());
+ return None;
+ };
+
+ // Try to account for the following special case:
+ // 1. ScaleReg is an inductive variable;
+ // 2. We use it with non-zero offset;
+ // 3. IV's increment is available at the point of memory instruction.
+ //
+ // In this case, we may reuse the IV increment instead of the IV Phi to
+ // achieve the following advantages:
+ // 1. If IV step matches the offset, we will have no need in the offset;
+ // 2. Even if they don't match, we will reduce the overlap of living IV
+ // and IV increment, that will potentially lead to better register
+ // assignment.
+ if (AddrMode.BaseOffs) {
+ if (auto IVStep = GetConstantStep(ScaleReg)) {
+ Instruction *IVInc = IVStep->first;
+ // The following assert is important to ensure a lack of infinite loops.
+ // This transforms is (intentionally) the inverse of the one just above.
+ // If they don't agree on the definition of an increment, we'd alternate
+ // back and forth indefinitely.
+ assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
+ APInt Step = IVStep->second;
+ APInt Offset = Step * AddrMode.Scale;
+ if (Offset.isSignedIntN(64)) {
+ TestAddrMode.InBounds = false;
+ TestAddrMode.ScaledReg = IVInc;
+ TestAddrMode.BaseOffs -= Offset.getLimitedValue();
+ // If this addressing mode is legal, commit it..
+ // (Note that we defer the (expensive) domtree base legality check
+ // to the very last possible point.)
+ if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
+ getDTFn().dominates(IVInc, MemoryInst)) {
+ AddrModeInsts.push_back(cast<Instruction>(IVInc));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ // Restore status quo.
+ TestAddrMode = AddrMode;
+ }
+ }
}
- // Otherwise, not (x+c)*scale, just return what we have.
+ // Otherwise, just return what we have.
return true;
}
@@ -4881,9 +5053,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
0);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(
- MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
+ AddressAccessTy, AS, MemoryInst, Result,
+ InsertedInsts, PromotedInsts, TPT,
+ LargeOffsetGEP, OptSize, PSI, BFI);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.matchAddr(Address, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -4986,9 +5159,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
AddrModeInsts.clear();
std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
0);
+ // Defer the query (and possible computation of) the dom tree to point of
+ // actual use. It's expected that most address matches don't actually need
+ // the domtree.
+ auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
+ Function *F = MemoryInst->getParent()->getParent();
+ return this->getDT(*F);
+ };
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
+ V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
+ *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
BFI.get());
GetElementPtrInst *GEP = LargeOffsetGEP.first;
@@ -5373,14 +5553,19 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
IRBuilder<> Builder(MemoryInst);
+ Type *SourceTy = GEP->getSourceElementType();
Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
// If the final index isn't a vector, emit a scalar GEP containing all ops
// and a vector GEP with all zeroes final index.
if (!Ops[FinalIndex]->getType()->isVectorTy()) {
- NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front());
+ NewAddr = Builder.CreateGEP(SourceTy, Ops[0],
+ makeArrayRef(Ops).drop_front());
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
- NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy));
+ auto *SecondTy = GetElementPtrInst::getIndexedType(
+ SourceTy, makeArrayRef(Ops).drop_front());
+ NewAddr =
+ Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
} else {
Value *Base = Ops[0];
Value *Index = Ops[FinalIndex];
@@ -5389,11 +5574,14 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
if (Ops.size() != 2) {
// Replace the last index with 0.
Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
- Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front());
+ Base = Builder.CreateGEP(SourceTy, Base,
+ makeArrayRef(Ops).drop_front());
+ SourceTy = GetElementPtrInst::getIndexedType(
+ SourceTy, makeArrayRef(Ops).drop_front());
}
// Now create the GEP with scalar pointer and vector index.
- NewAddr = Builder.CreateGEP(Base, Index);
+ NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
}
} else if (!isa<Constant>(Ptr)) {
// Not a GEP, maybe its a splat and we can create a GEP to enable
@@ -5409,7 +5597,16 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
// Emit a vector GEP with a scalar pointer and all 0s vector index.
Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
- NewAddr = Builder.CreateGEP(V, Constant::getNullValue(IndexTy));
+ Type *ScalarTy;
+ if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
+ Intrinsic::masked_gather) {
+ ScalarTy = MemoryInst->getType()->getScalarType();
+ } else {
+ assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
+ Intrinsic::masked_scatter);
+ ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
+ }
+ NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
} else {
// Constant, SelectionDAGBuilder knows to check if its a splat.
return false;
@@ -6272,6 +6469,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
unsigned BitWidth = LoadResultVT.getSizeInBits();
+ // If the BitWidth is 0, do not try to optimize the type
+ if (BitWidth == 0)
+ return false;
+
APInt DemandBits(BitWidth, 0);
APInt WidestAndBits(BitWidth, 0);
@@ -6409,7 +6610,7 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
uint64_t Sum = TrueWeight + FalseWeight;
if (Sum != 0) {
auto Probability = BranchProbability::getBranchProbability(Max, Sum);
- if (Probability > TLI->getPredictableBranchThreshold())
+ if (Probability > TTI->getPredictableBranchThreshold())
return true;
}
}
@@ -6795,7 +6996,8 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
Value *Cond = SI->getCondition();
Type *OldType = Cond->getType();
LLVMContext &Context = Cond->getContext();
- MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
+ EVT OldVT = TLI->getValueType(*DL, OldType);
+ MVT RegType = TLI->getRegisterType(Context, OldVT);
unsigned RegWidth = RegType.getSizeInBits();
if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
@@ -6809,14 +7011,21 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
// where N is the number of cases in the switch.
auto *NewType = Type::getIntNTy(Context, RegWidth);
- // Zero-extend the switch condition and case constants unless the switch
- // condition is a function argument that is already being sign-extended.
- // In that case, we can avoid an unnecessary mask/extension by sign-extending
- // everything instead.
+ // Extend the switch condition and case constants using the target preferred
+ // extend unless the switch condition is a function argument with an extend
+ // attribute. In that case, we can avoid an unnecessary mask/extension by
+ // matching the argument extension instead.
Instruction::CastOps ExtType = Instruction::ZExt;
- if (auto *Arg = dyn_cast<Argument>(Cond))
+ // Some targets prefer SExt over ZExt.
+ if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
+ ExtType = Instruction::SExt;
+
+ if (auto *Arg = dyn_cast<Argument>(Cond)) {
if (Arg->hasSExtAttr())
ExtType = Instruction::SExt;
+ if (Arg->hasZExtAttr())
+ ExtType = Instruction::ZExt;
+ }
auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
ExtInst->insertBefore(SI);
@@ -6927,11 +7136,10 @@ class VectorPromoteHelper {
StoreInst *ST = cast<StoreInst>(CombineInst);
unsigned AS = ST->getPointerAddressSpace();
- unsigned Align = ST->getAlignment();
// Check if this store is supported.
if (!TLI.allowsMisalignedMemoryAccesses(
TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
- Align)) {
+ ST->getAlign())) {
// If this is not supported, there is no way we can combine
// the extract with the store.
return false;
@@ -6940,9 +7148,9 @@ class VectorPromoteHelper {
// The scalar chain of computation has to pay for the transition
// scalar to vector.
// The vector chain has to account for the combining cost.
- uint64_t ScalarCost =
+ InstructionCost ScalarCost =
TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
- uint64_t VectorCost = StoreExtractCombineCost;
+ InstructionCost VectorCost = StoreExtractCombineCost;
enum TargetTransformInfo::TargetCostKind CostKind =
TargetTransformInfo::TCK_RecipThroughput;
for (const auto &Inst : InstsToBePromoted) {
@@ -7483,9 +7691,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
for (GetElementPtrInst *UGEPI : UGEPIs) {
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
- unsigned ImmCost =
- TTI->getIntImmCost(NewIdx, GEPIIdx->getType(),
- TargetTransformInfo::TCK_SizeAndLatency);
+ InstructionCost ImmCost = TTI->getIntImmCost(
+ NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
if (ImmCost > TargetTransformInfo::TCC_Basic)
return false;
}
@@ -7511,6 +7718,67 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
return true;
}
+static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) {
+ // Try and convert
+ // %c = icmp ult %x, 8
+ // br %c, bla, blb
+ // %tc = lshr %x, 3
+ // to
+ // %tc = lshr %x, 3
+ // %c = icmp eq %tc, 0
+ // br %c, bla, blb
+ // Creating the cmp to zero can be better for the backend, especially if the
+ // lshr produces flags that can be used automatically.
+ if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
+ return false;
+
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
+ if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
+ return false;
+
+ Value *X = Cmp->getOperand(0);
+ APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
+
+ for (auto *U : X->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ // A quick dominance check
+ if (!UI ||
+ (UI->getParent() != Branch->getParent() &&
+ UI->getParent() != Branch->getSuccessor(0) &&
+ UI->getParent() != Branch->getSuccessor(1)) ||
+ (UI->getParent() != Branch->getParent() &&
+ !UI->getParent()->getSinglePredecessor()))
+ continue;
+
+ if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
+ match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
+ IRBuilder<> Builder(Branch);
+ if (UI->getParent() != Branch->getParent())
+ UI->moveBefore(Branch);
+ Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
+ ConstantInt::get(UI->getType(), 0));
+ LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
+ LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
+ Cmp->replaceAllUsesWith(NewCmp);
+ return true;
+ }
+ if (Cmp->isEquality() &&
+ (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
+ match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) {
+ IRBuilder<> Builder(Branch);
+ if (UI->getParent() != Branch->getParent())
+ UI->moveBefore(Branch);
+ Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
+ ConstantInt::get(UI->getType(), 0));
+ LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
+ LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
+ Cmp->replaceAllUsesWith(NewCmp);
+ return true;
+ }
+ }
+ return false;
+}
+
bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
@@ -7672,6 +7940,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return optimizeSwitchInst(cast<SwitchInst>(I));
case Instruction::ExtractElement:
return optimizeExtractElementInst(cast<ExtractElementInst>(I));
+ case Instruction::Br:
+ return optimizeBranch(cast<BranchInst>(I), *TLI);
}
return false;
@@ -7731,19 +8001,23 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
DbgValueInst &DVI = *cast<DbgValueInst>(I);
// Does this dbg.value refer to a sunk address calculation?
- Value *Location = DVI.getVariableLocation();
- WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
- Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
- if (SunkAddr) {
- // Point dbg.value at locally computed address, which should give the best
- // opportunity to be accurately lowered. This update may change the type of
- // pointer being referred to; however this makes no difference to debugging
- // information, and we can't generate bitcasts that may affect codegen.
- DVI.setOperand(0, MetadataAsValue::get(DVI.getContext(),
- ValueAsMetadata::get(SunkAddr)));
- return true;
- }
- return false;
+ bool AnyChange = false;
+ SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
+ DVI.location_ops().end());
+ for (Value *Location : LocationOps) {
+ WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
+ Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+ if (SunkAddr) {
+ // Point dbg.value at locally computed address, which should give the best
+ // opportunity to be accurately lowered. This update may change the type
+ // of pointer being referred to; however this makes no difference to
+ // debugging information, and we can't generate bitcasts that may affect
+ // codegen.
+ DVI.replaceVariableLocationOp(Location, SunkAddr);
+ AnyChange = true;
+ }
+ }
+ return AnyChange;
}
// A llvm.dbg.value may be using a value before its definition, due to
@@ -7762,30 +8036,73 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
if (!DVI)
continue;
- Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
+ SmallVector<Instruction *, 4> VIs;
+ for (Value *V : DVI->getValues())
+ if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
+ VIs.push_back(VI);
+
+ // This DVI may depend on multiple instructions, complicating any
+ // potential sink. This block takes the defensive approach, opting to
+ // "undef" the DVI if it has more than one instruction and any of them do
+ // not dominate DVI.
+ for (Instruction *VI : VIs) {
+ if (VI->isTerminator())
+ continue;
- if (!VI || VI->isTerminator())
- continue;
+ // If VI is a phi in a block with an EHPad terminator, we can't insert
+ // after it.
+ if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+ continue;
- // If VI is a phi in a block with an EHPad terminator, we can't insert
- // after it.
- if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
- continue;
+ // If the defining instruction dominates the dbg.value, we do not need
+ // to move the dbg.value.
+ if (DT.dominates(VI, DVI))
+ continue;
- // If the defining instruction dominates the dbg.value, we do not need
- // to move the dbg.value.
- if (DT.dominates(VI, DVI))
- continue;
+ // If we depend on multiple instructions and any of them doesn't
+ // dominate this DVI, we probably can't salvage it: moving it to
+ // after any of the instructions could cause us to lose the others.
+ if (VIs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Unable to find valid location for Debug Value, undefing:\n"
+ << *DVI);
+ DVI->setUndef();
+ break;
+ }
- LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
- << *DVI << ' ' << *VI);
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
- else
- DVI->insertAfter(VI);
- MadeChange = true;
- ++NumDbgValueMoved;
+ LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
+ << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
+// probes can be chained dependencies of other regular DAG nodes and block DAG
+// combine optimizations.
+bool CodeGenPrepare::placePseudoProbes(Function &F) {
+ bool MadeChange = false;
+ for (auto &Block : F) {
+ // Move the rest probes to the beginning of the block.
+ auto FirstInst = Block.getFirstInsertionPt();
+ while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
+ ++FirstInst;
+ BasicBlock::iterator I(FirstInst);
+ I++;
+ while (I != Block.end()) {
+ if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
+ II->moveBefore(&*FirstInst);
+ MadeChange = true;
+ }
}
}
return MadeChange;
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
index 97c110afdda4..f3cba6225107 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
@@ -52,7 +53,7 @@ CGOPT(ThreadModel::Model, ThreadModel)
CGOPT_EXP(CodeModel::Model, CodeModel)
CGOPT(ExceptionHandling, ExceptionModel)
CGOPT_EXP(CodeGenFileType, FileType)
-CGOPT(FramePointer::FP, FramePointerUsage)
+CGOPT(FramePointerKind, FramePointerUsage)
CGOPT(bool, EnableUnsafeFPMath)
CGOPT(bool, EnableNoInfsFPMath)
CGOPT(bool, EnableNoNaNsFPMath)
@@ -68,7 +69,6 @@ CGOPT(bool, DontPlaceZerosInBSS)
CGOPT(bool, EnableGuaranteedTailCallOpt)
CGOPT(bool, DisableTailCalls)
CGOPT(bool, StackSymbolOrdering)
-CGOPT(unsigned, OverrideStackAlignment)
CGOPT(bool, StackRealign)
CGOPT(std::string, TrapFuncName)
CGOPT(bool, UseCtors)
@@ -78,9 +78,6 @@ CGOPT_EXP(bool, FunctionSections)
CGOPT(bool, IgnoreXCOFFVisibility)
CGOPT(bool, XCOFFTracebackTable)
CGOPT(std::string, BBSections)
-CGOPT(std::string, StackProtectorGuard)
-CGOPT(unsigned, StackProtectorGuardOffset)
-CGOPT(std::string, StackProtectorGuardReg)
CGOPT(unsigned, TLSSize)
CGOPT(bool, EmulatedTLS)
CGOPT(bool, UniqueSectionNames)
@@ -96,6 +93,7 @@ CGOPT(bool, PseudoProbeForProfiling)
CGOPT(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
+CGOPT(bool, DebugStrictDwarf)
codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
#define CGBINDOPT(NAME) \
@@ -182,16 +180,16 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"Emit nothing, for performance testing")));
CGBINDOPT(FileType);
- static cl::opt<FramePointer::FP> FramePointerUsage(
+ static cl::opt<FramePointerKind> FramePointerUsage(
"frame-pointer",
cl::desc("Specify frame pointer elimination optimization"),
- cl::init(FramePointer::None),
+ cl::init(FramePointerKind::None),
cl::values(
- clEnumValN(FramePointer::All, "all",
+ clEnumValN(FramePointerKind::All, "all",
"Disable frame pointer elimination"),
- clEnumValN(FramePointer::NonLeaf, "non-leaf",
+ clEnumValN(FramePointerKind::NonLeaf, "non-leaf",
"Disable frame pointer elimination for non-leaf frame"),
- clEnumValN(FramePointer::None, "none",
+ clEnumValN(FramePointerKind::None, "none",
"Enable frame pointer elimination")));
CGBINDOPT(FramePointerUsage);
@@ -306,11 +304,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(true));
CGBINDOPT(StackSymbolOrdering);
- static cl::opt<unsigned> OverrideStackAlignment(
- "stack-alignment", cl::desc("Override default stack alignment"),
- cl::init(0));
- CGBINDOPT(OverrideStackAlignment);
-
static cl::opt<bool> StackRealign(
"stackrealign",
cl::desc("Force align the stack to the minimum alignment"),
@@ -364,21 +357,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init("none"));
CGBINDOPT(BBSections);
- static cl::opt<std::string> StackProtectorGuard(
- "stack-protector-guard", cl::desc("Stack protector guard mode"),
- cl::init("none"));
- CGBINDOPT(StackProtectorGuard);
-
- static cl::opt<std::string> StackProtectorGuardReg(
- "stack-protector-guard-reg", cl::desc("Stack protector guard register"),
- cl::init("none"));
- CGBINDOPT(StackProtectorGuardReg);
-
- static cl::opt<unsigned> StackProtectorGuardOffset(
- "stack-protector-guard-offset", cl::desc("Stack protector guard offset"),
- cl::init((unsigned)-1));
- CGBINDOPT(StackProtectorGuardOffset);
-
static cl::opt<unsigned> TLSSize(
"tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0));
CGBINDOPT(TLSSize);
@@ -414,6 +392,7 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::values(
clEnumValN(DebuggerKind::GDB, "gdb", "gdb"),
clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"),
+ clEnumValN(DebuggerKind::DBX, "dbx", "dbx"),
clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)")));
CGBINDOPT(DebuggerTuningOpt);
@@ -469,6 +448,10 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(XRayOmitFunctionIndex);
+ static cl::opt<bool> DebugStrictDwarf(
+ "strict-dwarf", cl::desc("use strict dwarf"), cl::init(false));
+ CGBINDOPT(DebugStrictDwarf);
+
#undef CGBINDOPT
mc::RegisterMCTargetOptionsFlags();
@@ -495,24 +478,6 @@ codegen::getBBSectionsMode(llvm::TargetOptions &Options) {
}
}
-llvm::StackProtectorGuards
-codegen::getStackProtectorGuardMode(llvm::TargetOptions &Options) {
- if (getStackProtectorGuard() == "tls")
- return StackProtectorGuards::TLS;
- if (getStackProtectorGuard() == "global")
- return StackProtectorGuards::Global;
- if (getStackProtectorGuard() != "none") {
- ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
- MemoryBuffer::getFile(getStackProtectorGuard());
- if (!MBOrErr)
- errs() << "error illegal stack protector guard mode: "
- << MBOrErr.getError().message() << "\n";
- else
- Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
- }
- return StackProtectorGuards::None;
-}
-
// Common utility function tightly tied to the options listed here. Initializes
// a TargetOptions object with CodeGen flags and returns it.
TargetOptions
@@ -537,7 +502,6 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.EnableAIXExtendedAltivecABI = getEnableAIXExtendedAltivecABI();
Options.NoZerosInBSS = getDontPlaceZerosInBSS();
Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt();
- Options.StackAlignmentOverride = getOverrideStackAlignment();
Options.StackSymbolOrdering = getStackSymbolOrdering();
Options.UseInitArray = !getUseCtors();
Options.RelaxELFRelocations = getRelaxELFRelocations();
@@ -549,9 +513,6 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.BBSections = getBBSectionsMode(Options);
Options.UniqueSectionNames = getUniqueSectionNames();
Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames();
- Options.StackProtectorGuard = getStackProtectorGuardMode(Options);
- Options.StackProtectorGuardOffset = getStackProtectorGuardOffset();
- Options.StackProtectorGuardReg = getStackProtectorGuardReg();
Options.TLSSize = getTLSSize();
Options.EmulatedTLS = getEmulatedTLS();
Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0;
@@ -565,6 +526,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
+ Options.DebugStrictDwarf = getDebugStrictDwarf();
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
@@ -660,11 +622,11 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
}
if (FramePointerUsageView->getNumOccurrences() > 0 &&
!F.hasFnAttribute("frame-pointer")) {
- if (getFramePointerUsage() == FramePointer::All)
+ if (getFramePointerUsage() == FramePointerKind::All)
NewAttrs.addAttribute("frame-pointer", "all");
- else if (getFramePointerUsage() == FramePointer::NonLeaf)
+ else if (getFramePointerUsage() == FramePointerKind::NonLeaf)
NewAttrs.addAttribute("frame-pointer", "non-leaf");
- else if (getFramePointerUsage() == FramePointer::None)
+ else if (getFramePointerUsage() == FramePointerKind::None)
NewAttrs.addAttribute("frame-pointer", "none");
}
if (DisableTailCallsView->getNumOccurrences() > 0)
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 7ae42b010261..c56c8c87734f 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -65,9 +65,8 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
bool IsReturnBlock = BB->isReturnBlock();
// Examine the live-in regs of all successors.
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- SE = BB->succ_end(); SI != SE; ++SI)
- for (const auto &LI : (*SI)->liveins()) {
+ for (const MachineBasicBlock *Succ : BB->successors())
+ for (const auto &LI : Succ->liveins()) {
for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
@@ -143,17 +142,16 @@ static const SDep *CriticalPathStep(const SUnit *SU) {
const SDep *Next = nullptr;
unsigned NextDepth = 0;
// Find the predecessor edge with the greatest depth.
- for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
- P != PE; ++P) {
- const SUnit *PredSU = P->getSUnit();
- unsigned PredLatency = P->getLatency();
+ for (const SDep &P : SU->Preds) {
+ const SUnit *PredSU = P.getSUnit();
+ unsigned PredLatency = P.getLatency();
unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
// In the case of a latency tie, prefer an anti-dependency edge over
// other types of edges.
if (NextDepth < PredTotalLatency ||
- (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ (NextDepth == PredTotalLatency && P.getKind() == SDep::Anti)) {
NextDepth = PredTotalLatency;
- Next = &*P;
+ Next = &P;
}
}
return Next;
@@ -426,9 +424,8 @@ findSuitableFreeRegister(RegRefIter RegRefBegin,
continue;
// If NewReg overlaps any of the forbidden registers, we can't use it.
bool Forbidden = false;
- for (SmallVectorImpl<unsigned>::iterator it = Forbid.begin(),
- ite = Forbid.end(); it != ite; ++it)
- if (TRI->regsOverlap(NewReg, *it)) {
+ for (unsigned R : Forbid)
+ if (TRI->regsOverlap(NewReg, R)) {
Forbidden = true;
break;
}
@@ -582,11 +579,11 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
// Also, if there are dependencies on other SUnits with the
// same register as the anti-dependency, don't attempt to
// break it.
- for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
- PE = CriticalPathSU->Preds.end(); P != PE; ++P)
- if (P->getSUnit() == NextSU ?
- (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
- (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ for (const SDep &P : CriticalPathSU->Preds)
+ if (P.getSUnit() == NextSU
+ ? (P.getKind() != SDep::Anti || P.getReg() != AntiDepReg)
+ : (P.getKind() == SDep::Data &&
+ P.getReg() == AntiDepReg)) {
AntiDepReg = 0;
break;
}
diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp
index afcf014bca40..d38bacdb1aa7 100644
--- a/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -295,7 +295,7 @@ bool VLIWPacketizerList::alias(const MachineMemOperand &Op1,
MemoryLocation(Op2.getValue(), Overlapb,
UseTBAA ? Op2.getAAInfo() : AAMDNodes()));
- return AAResult != NoAlias;
+ return AAResult != AliasResult::NoAlias;
}
bool VLIWPacketizerList::alias(const MachineInstr &MI1,
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 93467e9d09b8..6e7db95b5c2a 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -132,10 +132,8 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
// Add live-ins from successors to LivePhysRegs. Normally, physregs are not
// live across blocks, but some targets (x86) can have flags live out of a
// block.
- for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
- E = MBB->succ_end();
- S != E; S++)
- for (const auto &LI : (*S)->liveins())
+ for (const MachineBasicBlock *Succ : MBB->successors())
+ for (const auto &LI : Succ->liveins())
LivePhysRegs.set(LI.PhysReg);
// Now scan the instructions and delete dead ones, tracking physreg
diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp
index 03fe5f155291..1337e57f360b 100644
--- a/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -516,15 +516,17 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
transferDefinedLanesStep(MO, Info.DefinedLanes);
}
- LLVM_DEBUG(dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0;
- RegIdx < NumVirtRegs;
- ++RegIdx) {
- unsigned Reg = Register::index2VirtReg(RegIdx);
- const VRegInfo &Info = VRegInfos[RegIdx];
- dbgs() << printReg(Reg, nullptr)
- << " Used: " << PrintLaneMask(Info.UsedLanes)
- << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
- } dbgs() << "\n";);
+ LLVM_DEBUG({
+ dbgs() << "Defined/Used lanes:\n";
+ for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
+ unsigned Reg = Register::index2VirtReg(RegIdx);
+ const VRegInfo &Info = VRegInfos[RegIdx];
+ dbgs() << printReg(Reg, nullptr)
+ << " Used: " << PrintLaneMask(Info.UsedLanes)
+ << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
+ }
+ dbgs() << "\n";
+ });
bool Again = false;
// Mark operands as dead/unused.
diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 97e0162f35a1..5ca1e91cc5f4 100644
--- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -42,6 +42,12 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfehprepare"
STATISTIC(NumResumesLowered, "Number of resume calls lowered");
+STATISTIC(NumCleanupLandingPadsUnreachable,
+ "Number of cleanup landing pads found unreachable");
+STATISTIC(NumCleanupLandingPadsRemaining,
+ "Number of cleanup landing pads remaining");
+STATISTIC(NumNoUnwind, "Number of functions with nounwind");
+STATISTIC(NumUnwind, "Number of functions with unwind");
namespace {
@@ -153,7 +159,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
BasicBlock *BB = RI->getParent();
new UnreachableInst(Ctx, RI);
RI->eraseFromParent();
- simplifyCFG(BB, *TTI, RequireAndPreserveDomTree ? DTU : nullptr);
+ simplifyCFG(BB, *TTI, DTU);
}
}
Resumes.resize(ResumesLeft);
@@ -163,6 +169,10 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
bool DwarfEHPrepare::InsertUnwindResumeCalls() {
SmallVector<ResumeInst *, 16> Resumes;
SmallVector<LandingPadInst *, 16> CleanupLPads;
+ if (F.doesNotThrow())
+ NumNoUnwind++;
+ else
+ NumUnwind++;
for (BasicBlock &BB : F) {
if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
Resumes.push_back(RI);
@@ -171,6 +181,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
CleanupLPads.push_back(LP);
}
+ NumCleanupLandingPadsRemaining += CleanupLPads.size();
+
if (Resumes.empty())
return false;
@@ -182,8 +194,19 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
LLVMContext &Ctx = F.getContext();
size_t ResumesLeft = Resumes.size();
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOpt::None) {
ResumesLeft = pruneUnreachableResumes(Resumes, CleanupLPads);
+#if LLVM_ENABLE_STATS
+ unsigned NumRemainingLPs = 0;
+ for (BasicBlock &BB : F) {
+ if (auto *LP = BB.getLandingPadInst())
+ if (LP->isCleanup())
+ NumRemainingLPs++;
+ }
+ NumCleanupLandingPadsUnreachable += CleanupLPads.size() - NumRemainingLPs;
+ NumCleanupLandingPadsRemaining -= CleanupLPads.size() - NumRemainingLPs;
+#endif
+ }
if (ResumesLeft == 0)
return true; // We pruned them all.
@@ -242,25 +265,15 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
CI->setDoesNotReturn();
new UnreachableInst(Ctx, UnwindBB);
- if (DTU && RequireAndPreserveDomTree)
+ if (DTU)
DTU->applyUpdates(Updates);
return true;
}
bool DwarfEHPrepare::run() {
- assert(((OptLevel == CodeGenOpt::None || !RequireAndPreserveDomTree) ||
- (DTU &&
- DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
- "Original domtree is invalid?");
-
bool Changed = InsertUnwindResumeCalls();
- assert(((OptLevel == CodeGenOpt::None || !RequireAndPreserveDomTree) ||
- (DTU &&
- DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
- "Original domtree is invalid?");
-
return Changed;
}
@@ -268,7 +281,7 @@ static bool prepareDwarfEH(CodeGenOpt::Level OptLevel,
FunctionCallee &RewindFunction, Function &F,
const TargetLowering &TLI, DominatorTree *DT,
const TargetTransformInfo *TTI) {
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
return DwarfEHPrepare(OptLevel, RewindFunction, F, TLI, DT ? &DTU : nullptr,
TTI)
@@ -295,8 +308,11 @@ public:
const TargetLowering &TLI = *TM.getSubtargetImpl(F)->getTargetLowering();
DominatorTree *DT = nullptr;
const TargetTransformInfo *TTI = nullptr;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTWP->getDomTree();
if (OptLevel != CodeGenOpt::None) {
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ if (!DT)
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
}
return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI);
@@ -308,9 +324,8 @@ public:
if (OptLevel != CodeGenOpt::None) {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (RequireAndPreserveDomTree)
- AU.addPreserved<DominatorTreeWrapperPass>();
}
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
StringRef getPassName() const override {
diff --git a/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/llvm/lib/CodeGen/EHContGuardCatchret.cpp
new file mode 100644
index 000000000000..c18532946bf9
--- /dev/null
+++ b/llvm/lib/CodeGen/EHContGuardCatchret.cpp
@@ -0,0 +1,84 @@
+//===-- EHContGuardCatchret.cpp - Catchret target symbols -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains a machine function pass to insert a symbol before each
+/// valid catchret target and store this in the MachineFunction's
+/// CatchRetTargets vector. This will be used to emit the table of valid targets
+/// used by EHCont Guard.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ehcontguard-catchret"
+
+STATISTIC(EHContGuardCatchretTargets,
+ "Number of EHCont Guard catchret targets");
+
+namespace {
+
+/// MachineFunction pass to insert a symbol before each valid catchret target
+/// and store these in the MachineFunction's CatchRetTargets vector.
+class EHContGuardCatchret : public MachineFunctionPass {
+public:
+ static char ID;
+
+ EHContGuardCatchret() : MachineFunctionPass(ID) {
+ initializeEHContGuardCatchretPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "EH Cont Guard catchret targets";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char EHContGuardCatchret::ID = 0;
+
+INITIALIZE_PASS(EHContGuardCatchret, "EHContGuardCatchret",
+ "Insert symbols at valid catchret targets for /guard:ehcont",
+ false, false)
+FunctionPass *llvm::createEHContGuardCatchretPass() {
+ return new EHContGuardCatchret();
+}
+
+bool EHContGuardCatchret::runOnMachineFunction(MachineFunction &MF) {
+
+ // Skip modules for which the ehcontguard flag is not set.
+ if (!MF.getMMI().getModule()->getModuleFlag("ehcontguard"))
+ return false;
+
+ // Skip functions that do not have catchret
+ if (!MF.hasEHCatchret())
+ return false;
+
+ bool Result = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHCatchretTarget()) {
+ MF.addCatchretTarget(MBB.getEHCatchretSymbol());
+ EHContGuardCatchretTargets++;
+ Result = true;
+ }
+ }
+
+ return Result;
+}
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index cf7d93d6a33a..90883212a275 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -410,9 +410,8 @@ bool SSAIfConv::findInsertionPoint() {
if (!LiveRegUnits.empty()) {
LLVM_DEBUG({
dbgs() << "Would clobber";
- for (SparseSet<unsigned>::const_iterator
- i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
- dbgs() << ' ' << printRegUnit(*i, TRI);
+ for (unsigned LRU : LiveRegUnits)
+ dbgs() << ' ' << printRegUnit(LRU, TRI);
dbgs() << " live before " << *I;
});
continue;
@@ -558,6 +557,52 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
return true;
}
+/// \return true iff the two registers are known to have the same value.
+static bool hasSameValue(const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, Register TReg,
+ Register FReg) {
+ if (TReg == FReg)
+ return true;
+
+ if (!TReg.isVirtual() || !FReg.isVirtual())
+ return false;
+
+ const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg);
+ const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg);
+ if (!TDef || !FDef)
+ return false;
+
+ // If there are side-effects, all bets are off.
+ if (TDef->hasUnmodeledSideEffects())
+ return false;
+
+ // If the instruction could modify memory, or there may be some intervening
+ // store between the two, we can't consider them to be equal.
+ if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr))
+ return false;
+
+ // We also can't guarantee that they are the same if, for example, the
+ // instructions are both a copy from a physical reg, because some other
+ // instruction may have modified the value in that reg between the two
+ // defining insts.
+ if (any_of(TDef->uses(), [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isPhysical();
+ }))
+ return false;
+
+ // Check whether the two defining instructions produce the same value(s).
+ if (!TII->produceSameValue(*TDef, *FDef, &MRI))
+ return false;
+
+ // Further, check that the two defs come from corresponding operands.
+ int TIdx = TDef->findRegisterDefOperandIdx(TReg);
+ int FIdx = FDef->findRegisterDefOperandIdx(FReg);
+ if (TIdx == -1 || FIdx == -1)
+ return false;
+
+ return TIdx == FIdx;
+}
+
/// replacePHIInstrs - Completely replace PHI instructions with selects.
/// This is possible when the only Tail predecessors are the if-converted
/// blocks.
@@ -572,7 +617,15 @@ void SSAIfConv::replacePHIInstrs() {
PHIInfo &PI = PHIs[i];
LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
Register DstReg = PI.PHI->getOperand(0).getReg();
- TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {
+ // We do not need the select instruction if both incoming values are
+ // equal, but we do need a COPY.
+ BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(PI.TReg);
+ } else {
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg,
+ PI.FReg);
+ }
LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
PI.PHI->eraseFromParent();
PI.PHI = nullptr;
@@ -593,7 +646,7 @@ void SSAIfConv::rewritePHIOperands() {
unsigned DstReg = 0;
LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
- if (PI.TReg == PI.FReg) {
+ if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {
// We do not need the select instruction if both incoming values are
// equal.
DstReg = PI.TReg;
diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp
index 0b2ffda50a39..3dd354e8ab7e 100644
--- a/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -46,9 +46,8 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
for (const auto &MBB : *MF) {
unsigned OutE = 2 * MBB.getNumber() + 1;
// Join the outgoing bundle with the ingoing bundles of all successors.
- for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
- SE = MBB.succ_end(); SI != SE; ++SI)
- EC.join(OutE, 2 * (*SI)->getNumber());
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ EC.join(OutE, 2 * Succ->getNumber());
}
EC.compress();
if (ViewEdgeBundles)
@@ -69,9 +68,9 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
return false;
}
-/// Specialize WriteGraph, the standard implementation won't work.
namespace llvm {
+/// Specialize WriteGraph, the standard implementation won't work.
template<>
raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
bool ShortNames,
@@ -86,10 +85,9 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
<< "\"\n"
<< "\t\"" << printMBBReference(MBB) << "\" -> " << G.getBundle(BB, true)
<< '\n';
- for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
- SE = MBB.succ_end(); SI != SE; ++SI)
+ for (const MachineBasicBlock *Succ : MBB.successors())
O << "\t\"" << printMBBReference(MBB) << "\" -> \""
- << printMBBReference(**SI) << "\" [ color=lightgray ]\n";
+ << printMBBReference(*Succ) << "\" [ color=lightgray ]\n";
}
O << "}\n";
return O;
diff --git a/llvm/lib/CodeGen/ExecutionDomainFix.cpp b/llvm/lib/CodeGen/ExecutionDomainFix.cpp
index 2cca05ea6f55..9621ad4b1248 100644
--- a/llvm/lib/CodeGen/ExecutionDomainFix.cpp
+++ b/llvm/lib/CodeGen/ExecutionDomainFix.cpp
@@ -380,7 +380,7 @@ void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Finally set all defs and non-collapsed uses to dv. We must iterate through
// all the operators, including imp-def ones.
- for (MachineOperand &mo : mi->operands()) {
+ for (const MachineOperand &mo : mi->operands()) {
if (!mo.isReg())
continue;
for (int rx : regIndices(mo.getReg())) {
@@ -454,16 +454,14 @@ bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) {
// Traverse the basic blocks.
LoopTraversal Traversal;
LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
- for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) {
+ for (const LoopTraversal::TraversedMBBInfo &TraversedMBB : TraversedMBBOrder)
processBasicBlock(TraversedMBB);
- }
- for (LiveRegsDVInfo OutLiveRegs : MBBOutRegsInfos) {
- for (DomainValue *OutLiveReg : OutLiveRegs) {
+ for (const LiveRegsDVInfo &OutLiveRegs : MBBOutRegsInfos)
+ for (DomainValue *OutLiveReg : OutLiveRegs)
if (OutLiveReg)
release(OutLiveReg);
- }
- }
+
MBBOutRegsInfos.clear();
Avail.clear();
Allocator.DestroyAll();
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 9f85db9de884..50fdc2114780 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -21,11 +22,13 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -75,6 +78,7 @@ class MemCmpExpansion {
PHINode *PhiRes;
const bool IsUsedForZeroCmp;
const DataLayout &DL;
+ DomTreeUpdater *DTU;
IRBuilder<> Builder;
// Represents the decomposition in blocks of the expansion. For example,
// comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
@@ -123,7 +127,8 @@ class MemCmpExpansion {
public:
MemCmpExpansion(CallInst *CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
- const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout);
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
+ DomTreeUpdater *DTU);
unsigned getNumBlocks();
uint64_t getNumLoads() const { return LoadSequence.size(); }
@@ -212,10 +217,12 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
MemCmpExpansion::MemCmpExpansion(
CallInst *const CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
- const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout)
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
+ DomTreeUpdater *DTU)
: CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
- IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) {
+ IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
+ Builder(CI) {
assert(Size > 0 && "zero blocks");
// Scale the max size down if the target can load more bytes than we need.
llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
@@ -325,13 +332,14 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
unsigned OffsetBytes) {
- Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+ BasicBlock *BB = LoadCmpBlocks[BlockIndex];
+ Builder.SetInsertPoint(BB);
const LoadPair Loads =
getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
Type::getInt32Ty(CI->getContext()), OffsetBytes);
Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
- PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);
+ PhiRes->addIncoming(Diff, BB);
if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
// Early exit branch if difference found to EndBlock. Otherwise, continue to
@@ -340,10 +348,16 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
ConstantInt::get(Diff->getType(), 0));
BranchInst *CmpBr =
BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Insert, BB, EndBlock},
+ {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
Builder.Insert(CmpBr);
} else {
// The last block has an unconditional branch to EndBlock.
BranchInst *CmpBr = BranchInst::Create(EndBlock);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
Builder.Insert(CmpBr);
}
}
@@ -428,8 +442,12 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
: LoadCmpBlocks[BlockIndex + 1];
// Early exit branch if difference found to ResultBlock. Otherwise,
// continue to next LoadCmpBlock or EndBlock.
+ BasicBlock *BB = Builder.GetInsertBlock();
BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
Builder.Insert(CmpBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
+ {DominatorTree::Insert, BB, NextBB}});
// Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
// since early exit to ResultBlock was not taken (no difference was found in
@@ -482,8 +500,12 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
: LoadCmpBlocks[BlockIndex + 1];
// Early exit branch if difference found to ResultBlock. Otherwise, continue
// to next LoadCmpBlock or EndBlock.
+ BasicBlock *BB = Builder.GetInsertBlock();
BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
Builder.Insert(CmpBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
+ {DominatorTree::Insert, BB, ResBlock.BB}});
// Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
// since early exit to ResultBlock was not taken (no difference was found in
@@ -507,6 +529,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
PhiRes->addIncoming(Res, ResBlock.BB);
BranchInst *NewBr = BranchInst::Create(EndBlock);
Builder.Insert(NewBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
return;
}
BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
@@ -519,9 +543,11 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
ConstantInt::get(Builder.getInt32Ty(), 1));
+ PhiRes->addIncoming(Res, ResBlock.BB);
BranchInst *NewBr = BranchInst::Create(EndBlock);
Builder.Insert(NewBr);
- PhiRes->addIncoming(Res, ResBlock.BB);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
}
void MemCmpExpansion::setupResultBlockPHINodes() {
@@ -597,7 +623,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
// Create the basic block framework for a multi-block expansion.
if (getNumBlocks() != 1) {
BasicBlock *StartBlock = CI->getParent();
- EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
+ EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
+ /*MSSAU=*/nullptr, "endblock");
setupEndBlockPHINodes();
createResultBlock();
@@ -610,9 +637,12 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
// Create the number of required load compare basic blocks.
createLoadCmpBlocks();
- // Update the terminator added by splitBasicBlock to branch to the first
+ // Update the terminator added by SplitBlock to branch to the first
// LoadCmpBlock.
StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
+ {DominatorTree::Delete, StartBlock, EndBlock}});
}
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
@@ -707,7 +737,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// ret i32 %phi.res
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
const TargetLowering *TLI, const DataLayout *DL,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
+ DomTreeUpdater *DTU) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@@ -744,7 +775,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
Options.MaxNumLoads = MaxLoadsPerMemcmp;
- MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
+ MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
// Don't expand if this will require more loads than desired by the target.
if (Expansion.getNumLoads() == 0) {
@@ -763,8 +794,6 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
return true;
}
-
-
class ExpandMemCmpPass : public FunctionPass {
public:
static char ID;
@@ -791,7 +820,10 @@ public:
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
nullptr;
- auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI);
+ DominatorTree *DT = nullptr;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTWP->getDomTree();
+ auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
return !PA.areAllPreserved();
}
@@ -800,25 +832,28 @@ private:
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
FunctionPass::getAnalysisUsage(AU);
}
PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const TargetLowering* TL,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
+ const TargetLowering *TL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DominatorTree *DT);
// Returns true if a change was made.
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering* TL,
- const DataLayout& DL, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI);
+ const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const DataLayout &DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
};
-bool ExpandMemCmpPass::runOnBlock(
- BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering* TL,
- const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI,
+ const TargetLowering *TL,
+ const DataLayout &DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI,
+ DomTreeUpdater *DTU) {
for (Instruction& I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI) {
@@ -827,22 +862,27 @@ bool ExpandMemCmpPass::runOnBlock(
LibFunc Func;
if (TLI->getLibFunc(*CI, Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
+ expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) {
return true;
}
}
return false;
}
+PreservedAnalyses
+ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI,
+ const TargetLowering *TL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DominatorTree *DT) {
+ Optional<DomTreeUpdater> DTU;
+ if (DT)
+ DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
-PreservedAnalyses ExpandMemCmpPass::runImpl(
- Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
- const TargetLowering* TL, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI) {
const DataLayout& DL = F.getParent()->getDataLayout();
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
- if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) {
+ if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI,
+ DTU.hasValue() ? DTU.getPointer() : nullptr)) {
MadeChanges = true;
// If changes were made, restart the function from the beginning, since
// the structure of the function was changed.
@@ -854,7 +894,11 @@ PreservedAnalyses ExpandMemCmpPass::runImpl(
if (MadeChanges)
for (BasicBlock &BB : F)
SimplifyInstructionsInBlock(&BB);
- return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all();
+ if (!MadeChanges)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
}
} // namespace
@@ -866,6 +910,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
"Expand memcmp() to load/stores", false, false)
diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index 842211c09134..d909d6aa5b0a 100644
--- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -188,9 +188,8 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
- for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
- mbbi != mbbe; ++mbbi) {
- for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineBasicBlock::iterator mi = MBB.begin(), me = MBB.end();
mi != me;) {
MachineInstr &MI = *mi;
// Advance iterator here because MI may be erased.
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index a4c9f02dc64d..2bcaf750911b 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -154,13 +154,12 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
}
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin: {
- // FIXME: We only expand 'fast' reductions here because the underlying
- // code in createMinMaxOp() assumes that comparisons use 'fast'
- // semantics.
+ // We require "nnan" to use a shuffle reduction; "nsz" is implied by the
+ // semantics of the reduction.
Value *Vec = II->getArgOperand(0);
if (!isPowerOf2_32(
cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
- !FMF.isFast())
+ !FMF.noNaNs())
continue;
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
new file mode 100644
index 000000000000..a8d4d4ebe8bd
--- /dev/null
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -0,0 +1,469 @@
+//===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR expansion for vector predication intrinsics, allowing
+// targets to enable vector predication until just before codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ExpandVectorPredication.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+using VPLegalization = TargetTransformInfo::VPLegalization;
+using VPTransform = TargetTransformInfo::VPLegalization::VPTransform;
+
+// Keep this in sync with TargetTransformInfo::VPLegalization.
+#define VPINTERNAL_VPLEGAL_CASES \
+ VPINTERNAL_CASE(Legal) \
+ VPINTERNAL_CASE(Discard) \
+ VPINTERNAL_CASE(Convert)
+
+#define VPINTERNAL_CASE(X) "|" #X
+
+// Override options.
+static cl::opt<std::string> EVLTransformOverride(
+ "expandvp-override-evl-transform", cl::init(""), cl::Hidden,
+ cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
+ ". If non-empty, ignore "
+ "TargetTransformInfo and "
+ "always use this transformation for the %evl parameter (Used in "
+ "testing)."));
+
+static cl::opt<std::string> MaskTransformOverride(
+ "expandvp-override-mask-transform", cl::init(""), cl::Hidden,
+ cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
+ ". If non-empty, Ignore "
+ "TargetTransformInfo and "
+ "always use this transformation for the %mask parameter (Used in "
+ "testing)."));
+
+#undef VPINTERNAL_CASE
+#define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
+
+static VPTransform parseOverrideOption(const std::string &TextOpt) {
+ return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES;
+}
+
+#undef VPINTERNAL_VPLEGAL_CASES
+
+// Whether any override options are set.
+static bool anyExpandVPOverridesSet() {
+ return !EVLTransformOverride.empty() || !MaskTransformOverride.empty();
+}
+
+#define DEBUG_TYPE "expandvp"
+
+STATISTIC(NumFoldedVL, "Number of folded vector length params");
+STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations");
+
+///// Helpers {
+
+/// \returns Whether the vector mask \p MaskVal has all lane bits set.
+static bool isAllTrueMask(Value *MaskVal) {
+ auto *ConstVec = dyn_cast<ConstantVector>(MaskVal);
+ return ConstVec && ConstVec->isAllOnesValue();
+}
+
+/// \returns A non-excepting divisor constant for this type.
+static Constant *getSafeDivisor(Type *DivTy) {
+ assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type");
+ return ConstantInt::get(DivTy, 1u, false);
+}
+
+/// Transfer operation properties from \p OldVPI to \p NewVal.
+static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) {
+ auto *NewInst = dyn_cast<Instruction>(&NewVal);
+ if (!NewInst || !isa<FPMathOperator>(NewVal))
+ return;
+
+ auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
+ if (!OldFMOp)
+ return;
+
+ NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
+}
+
+/// Transfer all properties from \p OldOp to \p NewOp and replace all uses.
+/// OldVP gets erased.
+static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
+ transferDecorations(NewOp, OldOp);
+ OldOp.replaceAllUsesWith(&NewOp);
+ OldOp.eraseFromParent();
+}
+
+//// } Helpers
+
+namespace {
+
+// Expansion pass state at function scope.
+struct CachingVPExpander {
+ Function &F;
+ const TargetTransformInfo &TTI;
+
+ /// \returns A (fixed length) vector with ascending integer indices
+ /// (<0, 1, ..., NumElems-1>).
+ /// \p Builder
+ /// Used for instruction creation.
+ /// \p LaneTy
+ /// Integer element type of the result vector.
+ /// \p NumElems
+ /// Number of vector elements.
+ Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy,
+ unsigned NumElems);
+
+ /// \returns A bitmask that is true where the lane position is less-than \p
+ /// EVLParam
+ ///
+ /// \p Builder
+ /// Used for instruction creation.
+ /// \p VLParam
+ /// The explicit vector length parameter to test against the lane
+ /// positions.
+ /// \p ElemCount
+ /// Static (potentially scalable) number of vector elements.
+ Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam,
+ ElementCount ElemCount);
+
+ Value *foldEVLIntoMask(VPIntrinsic &VPI);
+
+ /// "Remove" the %evl parameter of \p PI by setting it to the static vector
+ /// length of the operation.
+ void discardEVLParameter(VPIntrinsic &PI);
+
+ /// \brief Lower this VP binary operator to a unpredicated binary operator.
+ Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
+ VPIntrinsic &PI);
+
+ /// \brief Query TTI and expand the vector predication in \p P accordingly.
+ Value *expandPredication(VPIntrinsic &PI);
+
+ /// \brief Determine how and whether the VPIntrinsic \p VPI shall be
+ /// expanded. This overrides TTI with the cl::opts listed at the top of this
+ /// file.
+ VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const;
+ bool UsingTTIOverrides;
+
+public:
+ CachingVPExpander(Function &F, const TargetTransformInfo &TTI)
+ : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
+
+ bool expandVectorPredication();
+};
+
+//// CachingVPExpander {
+
+Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy,
+ unsigned NumElems) {
+ // TODO add caching
+ SmallVector<Constant *, 16> ConstElems;
+
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx)
+ ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false));
+
+ return ConstantVector::get(ConstElems);
+}
+
+Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
+ Value *EVLParam,
+ ElementCount ElemCount) {
+ // TODO add caching
+ // Scalable vector %evl conversion.
+ if (ElemCount.isScalable()) {
+ auto *M = Builder.GetInsertBlock()->getModule();
+ Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount);
+ Function *ActiveMaskFunc = Intrinsic::getDeclaration(
+ M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()});
+ // `get_active_lane_mask` performs an implicit less-than comparison.
+ Value *ConstZero = Builder.getInt32(0);
+ return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
+ }
+
+ // Fixed vector %evl conversion.
+ Type *LaneTy = EVLParam->getType();
+ unsigned NumElems = ElemCount.getFixedValue();
+ Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam);
+ Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
+ return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat);
+}
+
+Value *
+CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
+ VPIntrinsic &VPI) {
+ assert((isSafeToSpeculativelyExecute(&VPI) ||
+ VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode());
+ assert(Instruction::isBinaryOp(OC));
+
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ Value *Mask = VPI.getMaskParam();
+
+ // Blend in safe operands.
+ if (Mask && !isAllTrueMask(Mask)) {
+ switch (OC) {
+ default:
+ // Can safely ignore the predicate.
+ break;
+
+ // Division operators need a safe divisor on masked-off lanes (1).
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ // 2nd operand must not be zero.
+ Value *SafeDivisor = getSafeDivisor(VPI.getType());
+ Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor);
+ }
+ }
+
+ Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName());
+
+ replaceOperation(*NewBinOp, VPI);
+ return NewBinOp;
+}
+
+void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
+ LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
+
+ if (VPI.canIgnoreVectorLengthParam())
+ return;
+
+ Value *EVLParam = VPI.getVectorLengthParam();
+ if (!EVLParam)
+ return;
+
+ ElementCount StaticElemCount = VPI.getStaticVectorLength();
+ Value *MaxEVL = nullptr;
+ Type *Int32Ty = Type::getInt32Ty(VPI.getContext());
+ if (StaticElemCount.isScalable()) {
+ // TODO add caching
+ auto *M = VPI.getModule();
+ Function *VScaleFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty);
+ IRBuilder<> Builder(VPI.getParent(), VPI.getIterator());
+ Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue());
+ Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale");
+ MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size",
+ /*NUW*/ true, /*NSW*/ false);
+ } else {
+ MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false);
+ }
+ VPI.setVectorLengthParam(MaxEVL);
+}
+
+Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) {
+ LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n');
+
+ IRBuilder<> Builder(&VPI);
+
+ // Ineffective %evl parameter and so nothing to do here.
+ if (VPI.canIgnoreVectorLengthParam())
+ return &VPI;
+
+ // Only VP intrinsics can have an %evl parameter.
+ Value *OldMaskParam = VPI.getMaskParam();
+ Value *OldEVLParam = VPI.getVectorLengthParam();
+ assert(OldMaskParam && "no mask param to fold the vl param into");
+ assert(OldEVLParam && "no EVL param to fold away");
+
+ LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n');
+ LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n');
+
+ // Convert the %evl predication into vector mask predication.
+ ElementCount ElemCount = VPI.getStaticVectorLength();
+ Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
+ Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam);
+ VPI.setMaskParam(NewMaskParam);
+
+ // Drop the %evl parameter.
+ discardEVLParameter(VPI);
+ assert(VPI.canIgnoreVectorLengthParam() &&
+ "transformation did not render the evl param ineffective!");
+
+ // Reassess the modified instruction.
+ return &VPI;
+}
+
+Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
+ LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n');
+
+ IRBuilder<> Builder(&VPI);
+
+ // Try lowering to a LLVM instruction first.
+ auto OC = VPI.getFunctionalOpcode();
+
+ if (OC && Instruction::isBinaryOp(*OC))
+ return expandPredicationInBinaryOperator(Builder, VPI);
+
+ return &VPI;
+}
+
+//// } CachingVPExpander
+
+struct TransformJob {
+ VPIntrinsic *PI;
+ TargetTransformInfo::VPLegalization Strategy;
+ TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat)
+ : PI(PI), Strategy(InitStrat) {}
+
+ bool isDone() const { return Strategy.shouldDoNothing(); }
+};
+
+void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) {
+ // Speculatable instructions do not strictly need predication.
+ if (isSafeToSpeculativelyExecute(&I)) {
+ // Converting a speculatable VP intrinsic means dropping %mask and %evl.
+ // No need to expand %evl into the %mask only to ignore that code.
+ if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
+ LegalizeStrat.EVLParamStrategy = VPLegalization::Discard;
+ return;
+ }
+
+ // We have to preserve the predicating effect of %evl for this
+ // non-speculatable VP intrinsic.
+ // 1) Never discard %evl.
+ // 2) If this VP intrinsic will be expanded to non-VP code, make sure that
+ // %evl gets folded into %mask.
+ if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) ||
+ (LegalizeStrat.OpStrategy == VPLegalization::Convert)) {
+ LegalizeStrat.EVLParamStrategy = VPLegalization::Convert;
+ }
+}
+
+VPLegalization
+CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
+ auto VPStrat = TTI.getVPLegalizationStrategy(VPI);
+ if (LLVM_LIKELY(!UsingTTIOverrides)) {
+ // No overrides - we are in production.
+ return VPStrat;
+ }
+
+ // Overrides set - we are in testing, the following does not need to be
+ // efficient.
+ VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride);
+ VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride);
+ return VPStrat;
+}
+
+/// \brief Expand llvm.vp.* intrinsics as requested by \p TTI.
+bool CachingVPExpander::expandVectorPredication() {
+ SmallVector<TransformJob, 16> Worklist;
+
+ // Collect all VPIntrinsics that need expansion and determine their expansion
+ // strategy.
+ for (auto &I : instructions(F)) {
+ auto *VPI = dyn_cast<VPIntrinsic>(&I);
+ if (!VPI)
+ continue;
+ auto VPStrat = getVPLegalizationStrategy(*VPI);
+ sanitizeStrategy(I, VPStrat);
+ if (!VPStrat.shouldDoNothing())
+ Worklist.emplace_back(VPI, VPStrat);
+ }
+ if (Worklist.empty())
+ return false;
+
+ // Transform all VPIntrinsics on the worklist.
+ LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size()
+ << " instructions ::::\n");
+ for (TransformJob Job : Worklist) {
+ // Transform the EVL parameter.
+ switch (Job.Strategy.EVLParamStrategy) {
+ case VPLegalization::Legal:
+ break;
+ case VPLegalization::Discard:
+ discardEVLParameter(*Job.PI);
+ break;
+ case VPLegalization::Convert:
+ if (foldEVLIntoMask(*Job.PI))
+ ++NumFoldedVL;
+ break;
+ }
+ Job.Strategy.EVLParamStrategy = VPLegalization::Legal;
+
+ // Replace with a non-predicated operation.
+ switch (Job.Strategy.OpStrategy) {
+ case VPLegalization::Legal:
+ break;
+ case VPLegalization::Discard:
+ llvm_unreachable("Invalid strategy for operators.");
+ case VPLegalization::Convert:
+ expandPredication(*Job.PI);
+ ++NumLoweredVPOps;
+ break;
+ }
+ Job.Strategy.OpStrategy = VPLegalization::Legal;
+
+ assert(Job.isDone() && "incomplete transformation");
+ }
+
+ return true;
+}
+class ExpandVectorPredication : public FunctionPass {
+public:
+ static char ID;
+ ExpandVectorPredication() : FunctionPass(ID) {
+ initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ CachingVPExpander VPExpander(F, *TTI);
+ return VPExpander.expandVectorPredication();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+} // namespace
+
+char ExpandVectorPredication::ID;
+INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp",
+ "Expand vector predication intrinsics", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp",
+ "Expand vector predication intrinsics", false, false)
+
+FunctionPass *llvm::createExpandVectorPredicationPass() {
+ return new ExpandVectorPredication();
+}
+
+PreservedAnalyses
+ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) {
+ const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ CachingVPExpander VPExpander(F, TTI);
+ if (!VPExpander.expandVectorPredication())
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/llvm/lib/CodeGen/FaultMaps.cpp b/llvm/lib/CodeGen/FaultMaps.cpp
index 23560b4cd136..1d35b194f218 100644
--- a/llvm/lib/CodeGen/FaultMaps.cpp
+++ b/llvm/lib/CodeGen/FaultMaps.cpp
@@ -15,8 +15,6 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -114,39 +112,3 @@ const char *FaultMaps::faultTypeToString(FaultMaps::FaultKind FT) {
return "FaultingStore";
}
}
-
-raw_ostream &llvm::
-operator<<(raw_ostream &OS,
- const FaultMapParser::FunctionFaultInfoAccessor &FFI) {
- OS << "Fault kind: "
- << FaultMaps::faultTypeToString((FaultMaps::FaultKind)FFI.getFaultKind())
- << ", faulting PC offset: " << FFI.getFaultingPCOffset()
- << ", handling PC offset: " << FFI.getHandlerPCOffset();
- return OS;
-}
-
-raw_ostream &llvm::
-operator<<(raw_ostream &OS, const FaultMapParser::FunctionInfoAccessor &FI) {
- OS << "FunctionAddress: " << format_hex(FI.getFunctionAddr(), 8)
- << ", NumFaultingPCs: " << FI.getNumFaultingPCs() << "\n";
- for (unsigned i = 0, e = FI.getNumFaultingPCs(); i != e; ++i)
- OS << FI.getFunctionFaultInfoAt(i) << "\n";
- return OS;
-}
-
-raw_ostream &llvm::operator<<(raw_ostream &OS, const FaultMapParser &FMP) {
- OS << "Version: " << format_hex(FMP.getFaultMapVersion(), 2) << "\n";
- OS << "NumFunctions: " << FMP.getNumFunctions() << "\n";
-
- if (FMP.getNumFunctions() == 0)
- return OS;
-
- FaultMapParser::FunctionInfoAccessor FI;
-
- for (unsigned i = 0, e = FMP.getNumFunctions(); i != e; ++i) {
- FI = (i == 0) ? FMP.getFirstFunctionInfo() : FI.getNextFunctionInfo();
- OS << FI;
- }
-
- return OS;
-}
diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index f8f99b7e87f2..e3c4e86d203b 100644
--- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -484,6 +484,16 @@ public:
MachineOperand &DefMO = MI.getOperand(I);
assert(DefMO.isReg() && DefMO.isDef() && "Expected Reg Def operand");
Register Reg = DefMO.getReg();
+ assert(DefMO.isTied() && "Def is expected to be tied");
+ // We skipped undef uses and did not spill them, so we should not
+ // proceed with defs here.
+ if (MI.getOperand(MI.findTiedOperandIdx(I)).isUndef()) {
+ if (AllowGCPtrInCSR) {
+ NewIndices.push_back(NewMI->getNumOperands());
+ MIB.addReg(Reg, RegState::Define);
+ }
+ continue;
+ }
if (!AllowGCPtrInCSR) {
assert(is_contained(RegsToSpill, Reg));
RegsToReload.push_back(Reg);
diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp
index 7c96d838d992..8fae798b31d9 100644
--- a/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/llvm/lib/CodeGen/GCMetadata.cpp
@@ -12,7 +12,7 @@
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
@@ -122,14 +122,9 @@ bool Printer::runOnFunction(Function &F) {
OS << "\t" << PI->Label->getName() << ": " << "post-call"
<< ", live = {";
- for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
- RE = FD->live_end(PI);
- ;) {
- OS << " " << RI->Num;
- if (++RI == RE)
- break;
- OS << ",";
- }
+ ListSeparator LS(",");
+ for (const GCRoot &R : make_range(FD->live_begin(PI), FD->live_end(PI)))
+ OS << LS << " " << R.Num;
OS << " }\n";
}
diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp
index e2ee0c97f94d..58269e172c57 100644
--- a/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -86,6 +85,7 @@ INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); }
char LowerIntrinsics::ID = 0;
+char &llvm::GCLoweringID = LowerIntrinsics::ID;
LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) {
initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
@@ -105,9 +105,9 @@ void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
bool LowerIntrinsics::doInitialization(Module &M) {
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration() && I->hasGC())
- MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+ for (Function &F : M)
+ if (!F.isDeclaration() && F.hasGC())
+ MI->getFunctionInfo(F); // Instantiate the GC strategy.
return false;
}
@@ -317,8 +317,8 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
// size, we use UINT64_MAX to represent this.
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- const bool DynamicFrameSize = MFI.hasVarSizedObjects() ||
- RegInfo->needsStackRealignment(MF);
+ const bool DynamicFrameSize =
+ MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF);
FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI.getStackSize());
// Find all safe points.
diff --git a/llvm/lib/CodeGen/GCStrategy.cpp b/llvm/lib/CodeGen/GCStrategy.cpp
deleted file mode 100644
index 43d06b0f82e9..000000000000
--- a/llvm/lib/CodeGen/GCStrategy.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===- GCStrategy.cpp - Garbage Collector Description ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the policy object GCStrategy which describes the
-// behavior of a given garbage collector.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/GCStrategy.h"
-
-using namespace llvm;
-
-LLVM_INSTANTIATE_REGISTRY(GCRegistry)
-
-GCStrategy::GCStrategy() = default;
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 2fa208fbfaaf..f9bfe8518083 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/Error.h"
#define DEBUG_TYPE "cseinfo"
@@ -259,8 +260,17 @@ void GISelCSEInfo::releaseMemory() {
#endif
}
+#ifndef NDEBUG
+static const char *stringify(const MachineInstr *MI, std::string &S) {
+ raw_string_ostream OS(S);
+ OS << *MI;
+ return OS.str().c_str();
+}
+#endif
+
Error GISelCSEInfo::verify() {
#ifndef NDEBUG
+ std::string S1, S2;
handleRecordedInsts();
// For each instruction in map from MI -> UMI,
// Profile(MI) and make sure UMI is found for that profile.
@@ -273,20 +283,23 @@ Error GISelCSEInfo::verify() {
if (FoundNode != It.second)
return createStringError(std::errc::not_supported,
"CSEMap mismatch, InstrMapping has MIs without "
- "corresponding Nodes in CSEMap");
+ "corresponding Nodes in CSEMap:\n%s",
+ stringify(It.second->MI, S1));
}
// For every node in the CSEMap, make sure that the InstrMapping
// points to it.
- for (auto It = CSEMap.begin(), End = CSEMap.end(); It != End; ++It) {
- const UniqueMachineInstr &UMI = *It;
+ for (const UniqueMachineInstr &UMI : CSEMap) {
if (!InstrMapping.count(UMI.MI))
return createStringError(std::errc::not_supported,
- "Node in CSE without InstrMapping", UMI.MI);
+ "Node in CSE without InstrMapping:\n%s",
+ stringify(UMI.MI, S1));
if (InstrMapping[UMI.MI] != &UMI)
return createStringError(std::make_error_code(std::errc::not_supported),
- "Mismatch in CSE mapping");
+ "Mismatch in CSE mapping:\n%s\n%s",
+ stringify(InstrMapping[UMI.MI]->MI, S1),
+ stringify(UMI.MI, S2));
}
#endif
return Error::success();
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 2c86f06a602d..dd560e8ff145 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -189,7 +189,7 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid dsts");
if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
SrcOps[1].getReg(), *getMRI()))
- return buildConstant(DstOps[0], Cst->getSExtValue());
+ return buildConstant(DstOps[0], *Cst);
break;
}
case TargetOpcode::G_SEXT_INREG: {
@@ -200,7 +200,17 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
const SrcOp &Src1 = SrcOps[1];
if (auto MaybeCst =
ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI()))
- return buildConstant(Dst, MaybeCst->getSExtValue());
+ return buildConstant(Dst, *MaybeCst);
+ break;
+ }
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
+ // Try to constant fold these.
+ assert(SrcOps.size() == 1 && "Invalid sources");
+ assert(DstOps.size() == 1 && "Invalid dsts");
+ if (Optional<APFloat> Cst = ConstantFoldIntToFloat(
+ Opc, DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getReg(), *getMRI()))
+ return buildFConstant(DstOps[0], *Cst);
break;
}
}
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 803e1527a4f0..d2cda9ece31a 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -54,6 +54,8 @@ addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
Flags.setReturned();
if (AttrFn(Attribute::SwiftSelf))
Flags.setSwiftSelf();
+ if (AttrFn(Attribute::SwiftAsync))
+ Flags.setSwiftAsync();
if (AttrFn(Attribute::SwiftError))
Flags.setSwiftError();
}
@@ -112,7 +114,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
unsigned i = 0;
unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
for (auto &Arg : CB.args()) {
- ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i),
+ ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i),
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
@@ -133,7 +135,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
else
Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
- Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}};
+ Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, ISD::ArgFlagsTy{}};
if (!Info.OrigRet.Ty->isVoidTy())
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
@@ -154,22 +156,42 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const AttributeList &Attrs = FuncInfo.getAttributes();
addArgFlagsFromAttributes(Flags, Attrs, OpIdx);
+ PointerType *PtrTy = dyn_cast<PointerType>(Arg.Ty->getScalarType());
+ if (PtrTy) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(PtrTy->getPointerAddressSpace());
+ }
+
+ Align MemAlign = DL.getABITypeAlign(Arg.Ty);
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
- Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
+ assert(OpIdx >= AttributeList::FirstArgIndex);
+ Type *ElementTy = PtrTy->getElementType();
auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
- Align FrameAlign;
- if (auto ParamAlign = FuncInfo.getParamAlign(OpIdx - 2))
- FrameAlign = *ParamAlign;
+ if (auto ParamAlign =
+ FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex))
+ MemAlign = *ParamAlign;
+ else if ((ParamAlign =
+ FuncInfo.getParamAlign(OpIdx - AttributeList::FirstArgIndex)))
+ MemAlign = *ParamAlign;
else
- FrameAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
- Flags.setByValAlign(FrameAlign);
+ MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
+ } else if (OpIdx >= AttributeList::FirstArgIndex) {
+ if (auto ParamAlign =
+ FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex))
+ MemAlign = *ParamAlign;
}
+ Flags.setMemAlign(MemAlign);
Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
+
+ // Don't try to use the returned attribute if the argument is marked as
+ // swiftself, since it won't be passed in x0.
+ if (Flags.isSwiftSelf())
+ Flags.setReturned(false);
}
template void
@@ -182,96 +204,366 @@ CallLowering::setArgFlags<CallBase>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const DataLayout &DL,
const CallBase &FuncInfo) const;
-Register CallLowering::packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
- MachineIRBuilder &MIRBuilder) const {
- assert(SrcRegs.size() > 1 && "Nothing to pack");
+void CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL,
+ CallingConv::ID CallConv,
+ SmallVectorImpl<uint64_t> *Offsets) const {
+ LLVMContext &Ctx = OrigArg.Ty->getContext();
- const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
- MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ SmallVector<EVT, 4> SplitVTs;
+ ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, Offsets, 0);
+
+ if (SplitVTs.size() == 0)
+ return;
+
+ if (SplitVTs.size() == 1) {
+ // No splitting to do, but we want to replace the original type (e.g. [1 x
+ // double] -> double).
+ SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
+ OrigArg.OrigArgIndex, OrigArg.Flags[0],
+ OrigArg.IsFixed, OrigArg.OrigValue);
+ return;
+ }
- LLT PackedLLT = getLLTForType(*PackedTy, DL);
+ // Create one ArgInfo for each virtual register in the original ArgInfo.
+ assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
+
+ bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
+ OrigArg.Ty, CallConv, false, DL);
+ for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
+ Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
+ SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.OrigArgIndex,
+ OrigArg.Flags[0], OrigArg.IsFixed);
+ if (NeedsRegBlock)
+ SplitArgs.back().Flags[0].setInConsecutiveRegs();
+ }
- SmallVector<LLT, 8> LLTs;
- SmallVector<uint64_t, 8> Offsets;
- computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
- assert(LLTs.size() == SrcRegs.size() && "Regs / types mismatch");
+ SplitArgs.back().Flags[0].setInConsecutiveRegsLast();
+}
+
+/// Pack values \p SrcRegs to cover the vector type result \p DstRegs.
+static MachineInstrBuilder
+mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
+ ArrayRef<Register> SrcRegs) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT LLTy = MRI.getType(DstRegs[0]);
+ LLT PartLLT = MRI.getType(SrcRegs[0]);
+
+ // Deal with v3s16 split into v2s16
+ LLT LCMTy = getLCMType(LLTy, PartLLT);
+ if (LCMTy == LLTy) {
+ // Common case where no padding is needed.
+ assert(DstRegs.size() == 1);
+ return B.buildConcatVectors(DstRegs[0], SrcRegs);
+ }
- Register Dst = MRI->createGenericVirtualRegister(PackedLLT);
- MIRBuilder.buildUndef(Dst);
- for (unsigned i = 0; i < SrcRegs.size(); ++i) {
- Register NewDst = MRI->createGenericVirtualRegister(PackedLLT);
- MIRBuilder.buildInsert(NewDst, Dst, SrcRegs[i], Offsets[i]);
- Dst = NewDst;
+ // We need to create an unmerge to the result registers, which may require
+ // widening the original value.
+ Register UnmergeSrcReg;
+ if (LCMTy != PartLLT) {
+ // e.g. A <3 x s16> value was split to <2 x s16>
+ // %register_value0:_(<2 x s16>)
+ // %register_value1:_(<2 x s16>)
+ // %undef:_(<2 x s16>) = G_IMPLICIT_DEF
+ // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef
+ // %dst_reg:_(<3 x s16>), %dead:_(<3 x s16>) = G_UNMERGE_VALUES %concat
+ const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits();
+ Register Undef = B.buildUndef(PartLLT).getReg(0);
+
+ // Build vector of undefs.
+ SmallVector<Register, 8> WidenedSrcs(NumWide, Undef);
+
+ // Replace the first sources with the real registers.
+ std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin());
+ UnmergeSrcReg = B.buildConcatVectors(LCMTy, WidenedSrcs).getReg(0);
+ } else {
+ // We don't need to widen anything if we're extracting a scalar which was
+ // promoted to a vector e.g. s8 -> v4s8 -> s8
+ assert(SrcRegs.size() == 1);
+ UnmergeSrcReg = SrcRegs[0];
}
- return Dst;
+ int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits();
+
+ SmallVector<Register, 8> PadDstRegs(NumDst);
+ std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin());
+
+ // Create the excess dead defs for the unmerge.
+ for (int I = DstRegs.size(); I != NumDst; ++I)
+ PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
+
+ return B.buildUnmerge(PadDstRegs, UnmergeSrcReg);
}
-void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg,
- Type *PackedTy,
- MachineIRBuilder &MIRBuilder) const {
- assert(DstRegs.size() > 1 && "Nothing to unpack");
+/// Create a sequence of instructions to combine pieces split into register
+/// typed values to the original IR value. \p OrigRegs contains the destination
+/// value registers of type \p LLTy, and \p Regs contains the legalized pieces
+/// with type \p PartLLT. This is used for incoming values (physregs to vregs).
+static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
+ ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT,
+ const ISD::ArgFlagsTy Flags) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+
+ if (PartLLT == LLTy) {
+ // We should have avoided introducing a new virtual register, and just
+ // directly assigned here.
+ assert(OrigRegs[0] == Regs[0]);
+ return;
+ }
- const DataLayout &DL = MIRBuilder.getDataLayout();
+ if (PartLLT.getSizeInBits() == LLTy.getSizeInBits() && OrigRegs.size() == 1 &&
+ Regs.size() == 1) {
+ B.buildBitcast(OrigRegs[0], Regs[0]);
+ return;
+ }
+
+ // A vector PartLLT needs extending to LLTy's element size.
+ // E.g. <2 x s64> = G_SEXT <2 x s32>.
+ if (PartLLT.isVector() == LLTy.isVector() &&
+ PartLLT.getScalarSizeInBits() > LLTy.getScalarSizeInBits() &&
+ (!PartLLT.isVector() ||
+ PartLLT.getNumElements() == LLTy.getNumElements()) &&
+ OrigRegs.size() == 1 && Regs.size() == 1) {
+ Register SrcReg = Regs[0];
+
+ LLT LocTy = MRI.getType(SrcReg);
+
+ if (Flags.isSExt()) {
+ SrcReg = B.buildAssertSExt(LocTy, SrcReg, LLTy.getScalarSizeInBits())
+ .getReg(0);
+ } else if (Flags.isZExt()) {
+ SrcReg = B.buildAssertZExt(LocTy, SrcReg, LLTy.getScalarSizeInBits())
+ .getReg(0);
+ }
+
+ // Sometimes pointers are passed zero extended.
+ LLT OrigTy = MRI.getType(OrigRegs[0]);
+ if (OrigTy.isPointer()) {
+ LLT IntPtrTy = LLT::scalar(OrigTy.getSizeInBits());
+ B.buildIntToPtr(OrigRegs[0], B.buildTrunc(IntPtrTy, SrcReg));
+ return;
+ }
+
+ B.buildTrunc(OrigRegs[0], SrcReg);
+ return;
+ }
+
+ if (!LLTy.isVector() && !PartLLT.isVector()) {
+ assert(OrigRegs.size() == 1);
+ LLT OrigTy = MRI.getType(OrigRegs[0]);
+
+ unsigned SrcSize = PartLLT.getSizeInBits().getFixedSize() * Regs.size();
+ if (SrcSize == OrigTy.getSizeInBits())
+ B.buildMerge(OrigRegs[0], Regs);
+ else {
+ auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs);
+ B.buildTrunc(OrigRegs[0], Widened);
+ }
+
+ return;
+ }
+
+ if (PartLLT.isVector()) {
+ assert(OrigRegs.size() == 1);
+ SmallVector<Register> CastRegs(Regs.begin(), Regs.end());
+
+ // If PartLLT is a mismatched vector in both number of elements and element
+ // size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to
+ // have the same elt type, i.e. v4s32.
+ if (PartLLT.getSizeInBits() > LLTy.getSizeInBits() &&
+ PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 &&
+ Regs.size() == 1) {
+ LLT NewTy = PartLLT.changeElementType(LLTy.getElementType())
+ .changeElementCount(PartLLT.getElementCount() * 2);
+ CastRegs[0] = B.buildBitcast(NewTy, Regs[0]).getReg(0);
+ PartLLT = NewTy;
+ }
+
+ if (LLTy.getScalarType() == PartLLT.getElementType()) {
+ mergeVectorRegsToResultRegs(B, OrigRegs, CastRegs);
+ } else {
+ unsigned I = 0;
+ LLT GCDTy = getGCDType(LLTy, PartLLT);
+
+ // We are both splitting a vector, and bitcasting its element types. Cast
+ // the source pieces into the appropriate number of pieces with the result
+ // element type.
+ for (Register SrcReg : CastRegs)
+ CastRegs[I++] = B.buildBitcast(GCDTy, SrcReg).getReg(0);
+ mergeVectorRegsToResultRegs(B, OrigRegs, CastRegs);
+ }
+
+ return;
+ }
+
+ assert(LLTy.isVector() && !PartLLT.isVector());
+
+ LLT DstEltTy = LLTy.getElementType();
+
+ // Pointer information was discarded. We'll need to coerce some register types
+ // to avoid violating type constraints.
+ LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType();
+
+ assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits());
+
+ if (DstEltTy == PartLLT) {
+ // Vector was trivially scalarized.
+
+ if (RealDstEltTy.isPointer()) {
+ for (Register Reg : Regs)
+ MRI.setType(Reg, RealDstEltTy);
+ }
+
+ B.buildBuildVector(OrigRegs[0], Regs);
+ } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
+ // Deal with vector with 64-bit elements decomposed to 32-bit
+ // registers. Need to create intermediate 64-bit elements.
+ SmallVector<Register, 8> EltMerges;
+ int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
+
+ assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
+
+ for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
+ auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt));
+ // Fix the type in case this is really a vector of pointers.
+ MRI.setType(Merge.getReg(0), RealDstEltTy);
+ EltMerges.push_back(Merge.getReg(0));
+ Regs = Regs.drop_front(PartsPerElt);
+ }
- SmallVector<LLT, 8> LLTs;
- SmallVector<uint64_t, 8> Offsets;
- computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
- assert(LLTs.size() == DstRegs.size() && "Regs / types mismatch");
+ B.buildBuildVector(OrigRegs[0], EltMerges);
+ } else {
+ // Vector was split, and elements promoted to a wider type.
+ // FIXME: Should handle floating point promotions.
+ LLT BVType = LLT::fixed_vector(LLTy.getNumElements(), PartLLT);
+ auto BV = B.buildBuildVector(BVType, Regs);
+ B.buildTrunc(OrigRegs[0], BV);
+ }
+}
+
+/// Create a sequence of instructions to expand the value in \p SrcReg (of type
+/// \p SrcTy) to the types in \p DstRegs (of type \p PartTy). \p ExtendOp should
+/// contain the type of scalar value extension if necessary.
+///
+/// This is used for outgoing values (vregs to physregs)
+static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
+ Register SrcReg, LLT SrcTy, LLT PartTy,
+ unsigned ExtendOp = TargetOpcode::G_ANYEXT) {
+ // We could just insert a regular copy, but this is unreachable at the moment.
+ assert(SrcTy != PartTy && "identical part types shouldn't reach here");
+
+ const unsigned PartSize = PartTy.getSizeInBits();
+
+ if (PartTy.isVector() == SrcTy.isVector() &&
+ PartTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits()) {
+ assert(DstRegs.size() == 1);
+ B.buildInstr(ExtendOp, {DstRegs[0]}, {SrcReg});
+ return;
+ }
+
+ if (SrcTy.isVector() && !PartTy.isVector() &&
+ PartSize > SrcTy.getElementType().getSizeInBits()) {
+ // Vector was scalarized, and the elements extended.
+ auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg);
+ for (int i = 0, e = DstRegs.size(); i != e; ++i)
+ B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i));
+ return;
+ }
+
+ LLT GCDTy = getGCDType(SrcTy, PartTy);
+ if (GCDTy == PartTy) {
+ // If this already evenly divisible, we can create a simple unmerge.
+ B.buildUnmerge(DstRegs, SrcReg);
+ return;
+ }
+
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT DstTy = MRI.getType(DstRegs[0]);
+ LLT LCMTy = getLCMType(SrcTy, PartTy);
+
+ const unsigned DstSize = DstTy.getSizeInBits();
+ const unsigned SrcSize = SrcTy.getSizeInBits();
+ unsigned CoveringSize = LCMTy.getSizeInBits();
- for (unsigned i = 0; i < DstRegs.size(); ++i)
- MIRBuilder.buildExtract(DstRegs[i], SrcReg, Offsets[i]);
+ Register UnmergeSrc = SrcReg;
+
+ if (CoveringSize != SrcSize) {
+ // For scalars, it's common to be able to use a simple extension.
+ if (SrcTy.isScalar() && DstTy.isScalar()) {
+ CoveringSize = alignTo(SrcSize, DstSize);
+ LLT CoverTy = LLT::scalar(CoveringSize);
+ UnmergeSrc = B.buildInstr(ExtendOp, {CoverTy}, {SrcReg}).getReg(0);
+ } else {
+ // Widen to the common type.
+ // FIXME: This should respect the extend type
+ Register Undef = B.buildUndef(SrcTy).getReg(0);
+ SmallVector<Register, 8> MergeParts(1, SrcReg);
+ for (unsigned Size = SrcSize; Size != CoveringSize; Size += SrcSize)
+ MergeParts.push_back(Undef);
+ UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0);
+ }
+ }
+
+ // Unmerge to the original registers and pad with dead defs.
+ SmallVector<Register, 8> UnmergeResults(DstRegs.begin(), DstRegs.end());
+ for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize;
+ Size += DstSize) {
+ UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy));
+ }
+
+ B.buildUnmerge(UnmergeResults, UnmergeSrc);
}
-bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
- SmallVectorImpl<ArgInfo> &Args,
- ValueHandler &Handler) const {
+bool CallLowering::determineAndHandleAssignments(
+ ValueHandler &Handler, ValueAssigner &Assigner,
+ SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder,
+ CallingConv::ID CallConv, bool IsVarArg, Register ThisReturnReg) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
- return handleAssignments(CCInfo, ArgLocs, MIRBuilder, Args, Handler);
+
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, F.getContext());
+ if (!determineAssignments(Assigner, Args, CCInfo))
+ return false;
+
+ return handleAssignments(Handler, Args, CCInfo, ArgLocs, MIRBuilder,
+ ThisReturnReg);
}
-bool CallLowering::handleAssignments(CCState &CCInfo,
- SmallVectorImpl<CCValAssign> &ArgLocs,
- MachineIRBuilder &MIRBuilder,
- SmallVectorImpl<ArgInfo> &Args,
- ValueHandler &Handler) const {
- MachineFunction &MF = MIRBuilder.getMF();
- const Function &F = MF.getFunction();
- const DataLayout &DL = F.getParent()->getDataLayout();
+static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) {
+ if (Flags.isSExt())
+ return TargetOpcode::G_SEXT;
+ if (Flags.isZExt())
+ return TargetOpcode::G_ZEXT;
+ return TargetOpcode::G_ANYEXT;
+}
+
+bool CallLowering::determineAssignments(ValueAssigner &Assigner,
+ SmallVectorImpl<ArgInfo> &Args,
+ CCState &CCInfo) const {
+ LLVMContext &Ctx = CCInfo.getContext();
+ const CallingConv::ID CallConv = CCInfo.getCallingConv();
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
EVT CurVT = EVT::getEVT(Args[i].Ty);
- if (CurVT.isSimple() &&
- !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(),
- CCValAssign::Full, Args[i], Args[i].Flags[0],
- CCInfo))
- continue;
- MVT NewVT = TLI->getRegisterTypeForCallingConv(
- F.getContext(), F.getCallingConv(), EVT(CurVT));
+ MVT NewVT = TLI->getRegisterTypeForCallingConv(Ctx, CallConv, CurVT);
// If we need to split the type over multiple regs, check it's a scenario
// we currently support.
- unsigned NumParts = TLI->getNumRegistersForCallingConv(
- F.getContext(), F.getCallingConv(), CurVT);
+ unsigned NumParts =
+ TLI->getNumRegistersForCallingConv(Ctx, CallConv, CurVT);
if (NumParts == 1) {
// Try to use the register type if we couldn't assign the VT.
- if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
- Args[i].Flags[0], CCInfo))
+ if (Assigner.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[0], CCInfo))
return false;
continue;
}
- assert(NumParts > 1);
- // For now only handle exact splits.
- if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits())
- return false;
-
// For incoming arguments (physregs to vregs), we could have values in
// physregs (or memlocs) which we want to extract and copy to vregs.
// During this, we might have to deal with the LLT being split across
@@ -280,68 +572,49 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
// If we have outgoing args, then we have the opposite case. We have a
// vreg with an LLT which we want to assign to a physical location, and
// we might have to record that the value has to be split later.
- if (Handler.isIncomingArgumentHandler()) {
- // We're handling an incoming arg which is split over multiple regs.
- // E.g. passing an s128 on AArch64.
- ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
- Args[i].OrigRegs.push_back(Args[i].Regs[0]);
- Args[i].Regs.clear();
- Args[i].Flags.clear();
- LLT NewLLT = getLLTForMVT(NewVT);
- // For each split register, create and assign a vreg that will store
- // the incoming component of the larger value. These will later be
- // merged to form the final vreg.
- for (unsigned Part = 0; Part < NumParts; ++Part) {
- Register Reg =
- MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT);
- ISD::ArgFlagsTy Flags = OrigFlags;
- if (Part == 0) {
- Flags.setSplit();
- } else {
- Flags.setOrigAlign(Align(1));
- if (Part == NumParts - 1)
- Flags.setSplitEnd();
- }
- Args[i].Regs.push_back(Reg);
- Args[i].Flags.push_back(Flags);
- if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
- Args[i].Flags[Part], CCInfo)) {
- // Still couldn't assign this smaller part type for some reason.
- return false;
- }
+
+ // We're handling an incoming arg which is split over multiple regs.
+ // E.g. passing an s128 on AArch64.
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ Args[i].Flags.clear();
+
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (Part == 0) {
+ Flags.setSplit();
+ } else {
+ Flags.setOrigAlign(Align(1));
+ if (Part == NumParts - 1)
+ Flags.setSplitEnd();
}
- } else {
- // This type is passed via multiple registers in the calling convention.
- // We need to extract the individual parts.
- Register LargeReg = Args[i].Regs[0];
- LLT SmallTy = LLT::scalar(NewVT.getSizeInBits());
- auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg);
- assert(Unmerge->getNumOperands() == NumParts + 1);
- ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
- // We're going to replace the regs and flags with the split ones.
- Args[i].Regs.clear();
- Args[i].Flags.clear();
- for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) {
- ISD::ArgFlagsTy Flags = OrigFlags;
- if (PartIdx == 0) {
- Flags.setSplit();
- } else {
- Flags.setOrigAlign(Align(1));
- if (PartIdx == NumParts - 1)
- Flags.setSplitEnd();
- }
- Args[i].Regs.push_back(Unmerge.getReg(PartIdx));
- Args[i].Flags.push_back(Flags);
- if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full,
- Args[i], Args[i].Flags[PartIdx], CCInfo))
- return false;
+
+ Args[i].Flags.push_back(Flags);
+ if (Assigner.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[Part], CCInfo)) {
+ // Still couldn't assign this smaller part type for some reason.
+ return false;
}
}
}
- for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) {
- assert(j < ArgLocs.size() && "Skipped too many arg locs");
+ return true;
+}
+bool CallLowering::handleAssignments(ValueHandler &Handler,
+ SmallVectorImpl<ArgInfo> &Args,
+ CCState &CCInfo,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ MachineIRBuilder &MIRBuilder,
+ Register ThisReturnReg) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const Function &F = MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
+ const unsigned NumArgs = Args.size();
+
+ for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) {
+ assert(j < ArgLocs.size() && "Skipped too many arg locs");
CCValAssign &VA = ArgLocs[j];
assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
@@ -354,93 +627,131 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
continue;
}
- // FIXME: Pack registers if we have more than one.
- Register ArgReg = Args[i].Regs[0];
+ const MVT ValVT = VA.getValVT();
+ const MVT LocVT = VA.getLocVT();
- EVT OrigVT = EVT::getEVT(Args[i].Ty);
- EVT VAVT = VA.getValVT();
+ const LLT LocTy(LocVT);
+ const LLT ValTy(ValVT);
+ const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
+ const EVT OrigVT = EVT::getEVT(Args[i].Ty);
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
// Expected to be multiple regs for a single incoming arg.
// There should be Regs.size() ArgLocs per argument.
- unsigned NumArgRegs = Args[i].Regs.size();
+ // This should be the same as getNumRegistersForCallingConv
+ const unsigned NumParts = Args[i].Flags.size();
+
+ // Now split the registers into the assigned types.
+ Args[i].OrigRegs.assign(Args[i].Regs.begin(), Args[i].Regs.end());
+
+ if (NumParts != 1 || NewLLT != OrigTy) {
+ // If we can't directly assign the register, we need one or more
+ // intermediate values.
+ Args[i].Regs.resize(NumParts);
- assert((j + (NumArgRegs - 1)) < ArgLocs.size() &&
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part)
+ Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+ }
+
+ assert((j + (NumParts - 1)) < ArgLocs.size() &&
"Too many regs for number of args");
- for (unsigned Part = 0; Part < NumArgRegs; ++Part) {
+
+ // Coerce into outgoing value types before register assignment.
+ if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy) {
+ assert(Args[i].OrigRegs.size() == 1);
+ buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
+ ValTy, extendOpFromFlags(Args[i].Flags[0]));
+ }
+
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ Register ArgReg = Args[i].Regs[Part];
// There should be Regs.size() ArgLocs per argument.
VA = ArgLocs[j + Part];
- if (VA.isMemLoc()) {
- // Don't currently support loading/storing a type that needs to be split
- // to the stack. Should be easy, just not implemented yet.
- if (NumArgRegs > 1) {
- LLVM_DEBUG(
- dbgs()
- << "Load/store a split arg to/from the stack not implemented yet\n");
- return false;
- }
+ const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
- // FIXME: Use correct address space for pointer size
- EVT LocVT = VA.getValVT();
- unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize()
- : LocVT.getStoreSize();
- unsigned Offset = VA.getLocMemOffset();
- MachinePointerInfo MPO;
- Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO);
- Handler.assignValueToAddress(Args[i], StackAddr,
- MemSize, MPO, VA);
- continue;
- }
+ if (VA.isMemLoc() && !Flags.isByVal()) {
+ // Individual pieces may have been spilled to the stack and others
+ // passed in registers.
- assert(VA.isRegLoc() && "custom loc should have been handled already");
+ // TODO: The memory size may be larger than the value we need to
+ // store. We may need to adjust the offset for big endian targets.
+ LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
- // GlobalISel does not currently work for scalable vectors.
- if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() ||
- !Handler.isIncomingArgumentHandler()) {
- // This is an argument that might have been split. There should be
- // Regs.size() ArgLocs per argument.
+ MachinePointerInfo MPO;
+ Register StackAddr = Handler.getStackAddress(
+ MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
- // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge
- // to the original register after handling all of the parts.
- Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+ Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA);
continue;
}
- // This ArgLoc covers multiple pieces, so we need to split it.
- const LLT VATy(VAVT.getSimpleVT());
- Register NewReg =
- MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
- Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
- // If it's a vector type, we either need to truncate the elements
- // or do an unmerge to get the lower block of elements.
- if (VATy.isVector() &&
- VATy.getNumElements() > OrigVT.getVectorNumElements()) {
- // Just handle the case where the VA type is 2 * original type.
- if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
- LLVM_DEBUG(dbgs()
- << "Incoming promoted vector arg has too many elts");
- return false;
+ if (VA.isMemLoc() && Flags.isByVal()) {
+ assert(Args[i].Regs.size() == 1 &&
+ "didn't expect split byval pointer");
+
+ if (Handler.isIncomingArgumentHandler()) {
+ // We just need to copy the frame index value to the pointer.
+ MachinePointerInfo MPO;
+ Register StackAddr = Handler.getStackAddress(
+ Flags.getByValSize(), VA.getLocMemOffset(), MPO, Flags);
+ MIRBuilder.buildCopy(Args[i].Regs[0], StackAddr);
+ } else {
+ // For outgoing byval arguments, insert the implicit copy byval
+ // implies, such that writes in the callee do not modify the caller's
+ // value.
+ uint64_t MemSize = Flags.getByValSize();
+ int64_t Offset = VA.getLocMemOffset();
+
+ MachinePointerInfo DstMPO;
+ Register StackAddr =
+ Handler.getStackAddress(MemSize, Offset, DstMPO, Flags);
+
+ MachinePointerInfo SrcMPO(Args[i].OrigValue);
+ if (!Args[i].OrigValue) {
+ // We still need to accurately track the stack address space if we
+ // don't know the underlying value.
+ const LLT PtrTy = MRI.getType(StackAddr);
+ SrcMPO = MachinePointerInfo(PtrTy.getAddressSpace());
+ }
+
+ Align DstAlign = std::max(Flags.getNonZeroByValAlign(),
+ inferAlignFromPtrInfo(MF, DstMPO));
+
+ Align SrcAlign = std::max(Flags.getNonZeroByValAlign(),
+ inferAlignFromPtrInfo(MF, SrcMPO));
+
+ Handler.copyArgumentMemory(Args[i], StackAddr, Args[i].Regs[0],
+ DstMPO, DstAlign, SrcMPO, SrcAlign,
+ MemSize, VA);
}
- auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
- MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
- } else {
- MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
+ continue;
}
- }
- // Now that all pieces have been handled, re-pack any arguments into any
- // wider, original registers.
- if (Handler.isIncomingArgumentHandler()) {
- if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) {
- assert(NumArgRegs >= 2);
+ assert(!VA.needsCustom() && "custom loc should have been handled already");
- // Merge the split registers into the expected larger result vreg
- // of the original call.
- MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
+ if (i == 0 && ThisReturnReg.isValid() &&
+ Handler.isIncomingArgumentHandler() &&
+ isTypeIsValidForThisReturn(ValVT)) {
+ Handler.assignValueToReg(Args[i].Regs[i], ThisReturnReg, VA);
+ continue;
}
+
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
}
- j += NumArgRegs - 1;
+ // Now that all pieces have been assigned, re-pack the register typed values
+ // into the original value typed registers.
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
+ // Merge the split registers into the expected larger result vregs of
+ // the original call.
+ buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
+ LocTy, Args[i].Flags[0]);
+ }
+
+ j += NumParts - 1;
}
return true;
@@ -470,7 +781,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
Register Addr;
MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- MRI.getType(VRegs[I]).getSizeInBytes(),
+ MRI.getType(VRegs[I]),
commonAlignment(BaseAlign, Offsets[I]));
MIRBuilder.buildLoad(VRegs[I], Addr, *MMO);
}
@@ -501,7 +812,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
Register Addr;
MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- MRI.getType(VRegs[I]).getSizeInBytes(),
+ MRI.getType(VRegs[I]),
commonAlignment(BaseAlign, Offsets[I]));
MIRBuilder.buildStore(VRegs[I], Addr, *MMO);
}
@@ -522,7 +833,8 @@ void CallLowering::insertSRetIncomingArgument(
// NOTE: Assume that a pointer won't get split into more than one VT.
assert(ValueVTs.size() == 1);
- ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext()));
+ ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext()),
+ ArgInfo::NoArgIndex);
setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F);
DemoteArg.Flags[0].setSRet();
SplitArgs.insert(SplitArgs.begin(), DemoteArg);
@@ -540,7 +852,8 @@ void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder,
DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false);
Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0);
- ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS));
+ ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS),
+ ArgInfo::NoArgIndex);
setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB);
DemoteArg.Flags[0].setSRet();
@@ -594,23 +907,6 @@ bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const {
return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg());
}
-bool CallLowering::analyzeArgInfo(CCState &CCState,
- SmallVectorImpl<ArgInfo> &Args,
- CCAssignFn &AssignFnFixed,
- CCAssignFn &AssignFnVarArg) const {
- for (unsigned i = 0, e = Args.size(); i < e; ++i) {
- MVT VT = MVT::getVT(Args[i].Ty);
- CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg;
- if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) {
- // Bail out on anything we can't handle.
- LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString()
- << " (arg number = " << i << "\n");
- return false;
- }
- }
- return true;
-}
-
bool CallLowering::parametersInCSRMatch(
const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask,
const SmallVectorImpl<CCValAssign> &OutLocs,
@@ -666,10 +962,8 @@ bool CallLowering::parametersInCSRMatch(
bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
MachineFunction &MF,
SmallVectorImpl<ArgInfo> &InArgs,
- CCAssignFn &CalleeAssignFnFixed,
- CCAssignFn &CalleeAssignFnVarArg,
- CCAssignFn &CallerAssignFnFixed,
- CCAssignFn &CallerAssignFnVarArg) const {
+ ValueAssigner &CalleeAssigner,
+ ValueAssigner &CallerAssigner) const {
const Function &F = MF.getFunction();
CallingConv::ID CalleeCC = Info.CallConv;
CallingConv::ID CallerCC = F.getCallingConv();
@@ -678,15 +972,13 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
return true;
SmallVector<CCValAssign, 16> ArgLocs1;
- CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext());
- if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed,
- CalleeAssignFnVarArg))
+ CCState CCInfo1(CalleeCC, Info.IsVarArg, MF, ArgLocs1, F.getContext());
+ if (!determineAssignments(CalleeAssigner, InArgs, CCInfo1))
return false;
SmallVector<CCValAssign, 16> ArgLocs2;
- CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext());
- if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed,
- CalleeAssignFnVarArg))
+ CCState CCInfo2(CallerCC, F.isVarArg(), MF, ArgLocs2, F.getContext());
+ if (!determineAssignments(CallerAssigner, InArgs, CCInfo2))
return false;
// We need the argument locations to match up exactly. If there's more in
@@ -721,11 +1013,58 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
return true;
}
+LLT CallLowering::ValueHandler::getStackValueStoreType(
+ const DataLayout &DL, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const {
+ const MVT ValVT = VA.getValVT();
+ if (ValVT != MVT::iPTR) {
+ LLT ValTy(ValVT);
+
+ // We lost the pointeriness going through CCValAssign, so try to restore it
+ // based on the flags.
+ if (Flags.isPointer()) {
+ LLT PtrTy = LLT::pointer(Flags.getPointerAddrSpace(),
+ ValTy.getScalarSizeInBits());
+ if (ValVT.isVector())
+ return LLT::vector(ValTy.getElementCount(), PtrTy);
+ return PtrTy;
+ }
+
+ return ValTy;
+ }
+
+ unsigned AddrSpace = Flags.getPointerAddrSpace();
+ return LLT::pointer(AddrSpace, DL.getPointerSize(AddrSpace));
+}
+
+void CallLowering::ValueHandler::copyArgumentMemory(
+ const ArgInfo &Arg, Register DstPtr, Register SrcPtr,
+ const MachinePointerInfo &DstPtrInfo, Align DstAlign,
+ const MachinePointerInfo &SrcPtrInfo, Align SrcAlign, uint64_t MemSize,
+ CCValAssign &VA) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *SrcMMO = MF.getMachineMemOperand(
+ SrcPtrInfo,
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable, MemSize,
+ SrcAlign);
+
+ MachineMemOperand *DstMMO = MF.getMachineMemOperand(
+ DstPtrInfo,
+ MachineMemOperand::MOStore | MachineMemOperand::MODereferenceable,
+ MemSize, DstAlign);
+
+ const LLT PtrTy = MRI.getType(DstPtr);
+ const LLT SizeTy = LLT::scalar(PtrTy.getSizeInBits());
+
+ auto SizeConst = MIRBuilder.buildConstant(SizeTy, MemSize);
+ MIRBuilder.buildMemCpy(DstPtr, SrcPtr, SizeConst, *DstMMO, *SrcMMO);
+}
+
Register CallLowering::ValueHandler::extendRegister(Register ValReg,
CCValAssign &VA,
unsigned MaxSizeBits) {
LLT LocTy{VA.getLocVT()};
- LLT ValTy = MRI.getType(ValReg);
+ LLT ValTy{VA.getValVT()};
+
if (LocTy.getSizeInBits() == ValTy.getSizeInBits())
return ValReg;
@@ -735,6 +1074,14 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg,
LocTy = LLT::scalar(MaxSizeBits);
}
+ const LLT ValRegTy = MRI.getType(ValReg);
+ if (ValRegTy.isPointer()) {
+ // The x32 ABI wants to zero extend 32-bit pointers to 64-bit registers, so
+ // we have to cast to do the extension.
+ LLT IntPtrTy = LLT::scalar(ValRegTy.getSizeInBits());
+ ValReg = MIRBuilder.buildPtrToInt(IntPtrTy, ValReg).getReg(0);
+ }
+
switch (VA.getLocInfo()) {
default: break;
case CCValAssign::Full:
@@ -760,4 +1107,63 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg,
llvm_unreachable("unable to extend register");
}
-void CallLowering::ValueHandler::anchor() {}
+void CallLowering::ValueAssigner::anchor() {}
+
+Register CallLowering::IncomingValueHandler::buildExtensionHint(CCValAssign &VA,
+ Register SrcReg,
+ LLT NarrowTy) {
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::ZExt: {
+ return MIRBuilder
+ .buildAssertZExt(MRI.cloneVirtualRegister(SrcReg), SrcReg,
+ NarrowTy.getScalarSizeInBits())
+ .getReg(0);
+ }
+ case CCValAssign::LocInfo::SExt: {
+ return MIRBuilder
+ .buildAssertSExt(MRI.cloneVirtualRegister(SrcReg), SrcReg,
+ NarrowTy.getScalarSizeInBits())
+ .getReg(0);
+ break;
+ }
+ default:
+ return SrcReg;
+ }
+}
+
+/// Check if we can use a basic COPY instruction between the two types.
+///
+/// We're currently building on top of the infrastructure using MVT, which loses
+/// pointer information in the CCValAssign. We accept copies from physical
+/// registers that have been reported as integers if it's to an equivalent sized
+/// pointer LLT.
+static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {
+ if (SrcTy == DstTy)
+ return true;
+
+ if (SrcTy.getSizeInBits() != DstTy.getSizeInBits())
+ return false;
+
+ SrcTy = SrcTy.getScalarType();
+ DstTy = DstTy.getScalarType();
+
+ return (SrcTy.isPointer() && DstTy.isScalar()) ||
+ (DstTy.isScalar() && SrcTy.isPointer());
+}
+
+void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
+ Register PhysReg,
+ CCValAssign &VA) {
+ const MVT LocVT = VA.getLocVT();
+ const LLT LocTy(LocVT);
+ const LLT RegTy = MRI.getType(ValVReg);
+
+ if (isCopyCompatibleType(RegTy, LocTy)) {
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ return;
+ }
+
+ auto Copy = MIRBuilder.buildCopy(LocTy, PhysReg);
+ auto Hint = buildExtensionHint(VA, Copy.getReg(0), RegTy);
+ MIRBuilder.buildTrunc(ValVReg, Hint);
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index f1071d96e5a3..6f103bca6892 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -153,8 +153,14 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
MFChanged |= Changed;
} while (Changed);
- assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) &&
- "CSEInfo is not consistent. Likely missing calls to "
- "observer on mutations"));
+#ifndef NDEBUG
+ if (CSEInfo) {
+ if (auto E = CSEInfo->verify()) {
+ errs() << E << '\n';
+ assert(false && "CSEInfo is not consistent. Likely missing calls to "
+ "observer on mutations.");
+ }
+ }
+#endif
return MFChanged;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index df0219fcfa64..06d827de2e96 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6,13 +6,18 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -20,8 +25,10 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include <tuple>
#define DEBUG_TYPE "gi-combiner"
@@ -436,16 +443,13 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
// to find a safe place to sink it) whereas the extend is freely movable.
// It also prevents us from duplicating the load for the volatile case or just
// for performance.
-
- if (MI.getOpcode() != TargetOpcode::G_LOAD &&
- MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
- MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
+ GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
+ if (!LoadMI)
return false;
- auto &LoadValue = MI.getOperand(0);
- assert(LoadValue.isReg() && "Result wasn't a register?");
+ Register LoadReg = LoadMI->getDstReg();
- LLT LoadValueTy = MRI.getType(LoadValue.getReg());
+ LLT LoadValueTy = MRI.getType(LoadReg);
if (!LoadValueTy.isScalar())
return false;
@@ -467,27 +471,29 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
// and emit a variant of (extend (trunc X)) for the others according to the
// relative type sizes. At the same time, pick an extend to use based on the
// extend involved in the chosen type.
- unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
- ? TargetOpcode::G_ANYEXT
- : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
- ? TargetOpcode::G_SEXT
- : TargetOpcode::G_ZEXT;
+ unsigned PreferredOpcode =
+ isa<GLoad>(&MI)
+ ? TargetOpcode::G_ANYEXT
+ : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
Preferred = {LLT(), PreferredOpcode, nullptr};
- for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
+ for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
(UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
+ const auto &MMO = LoadMI->getMMO();
+ // For atomics, only form anyextending loads.
+ if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
+ continue;
// Check for legality.
if (LI) {
LegalityQuery::MemDesc MMDesc;
- const auto &MMO = **MI.memoperands_begin();
- MMDesc.SizeInBits = MMO.getSizeInBits();
+ MMDesc.MemoryTy = MMO.getMemoryType();
MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getOrdering();
+ MMDesc.Ordering = MMO.getSuccessOrdering();
LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
- if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
- LegalizeActions::Legal)
+ LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
+ if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
+ .Action != LegalizeActions::Legal)
continue;
}
Preferred = ChoosePreferredUse(Preferred,
@@ -660,23 +666,22 @@ bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
uint64_t SizeInBits = MI.getOperand(2).getImm();
// If the source is a G_SEXTLOAD from the same bit width, then we don't
// need any extend at all, just a truncate.
- if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
- const auto &MMO = **LoadMI->memoperands_begin();
+ if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
// If truncating more than the original extended value, abort.
- if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
+ auto LoadSizeBits = LoadMI->getMemSizeInBits();
+ if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits)
return false;
- if (MMO.getSizeInBits() == SizeInBits)
+ if (LoadSizeBits == SizeInBits)
return true;
}
return false;
}
-bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
+void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
Builder.setInstrAndDebugLoc(MI);
Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchSextInRegOfLoad(
@@ -688,20 +693,16 @@ bool CombinerHelper::matchSextInRegOfLoad(
return false;
Register SrcReg = MI.getOperand(1).getReg();
- MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
- if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
+ auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
+ if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) ||
+ !LoadDef->isSimple())
return false;
// If the sign extend extends from a narrower width than the load's width,
// then we can narrow the load width when we combine to a G_SEXTLOAD.
- auto &MMO = **LoadDef->memoperands_begin();
- // Don't do this for non-simple loads.
- if (MMO.isAtomic() || MMO.isVolatile())
- return false;
-
// Avoid widening the load at all.
- unsigned NewSizeBits =
- std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
+ unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(),
+ LoadDef->getMemSizeInBits());
// Don't generate G_SEXTLOADs with a < 1 byte width.
if (NewSizeBits < 8)
@@ -710,18 +711,17 @@ bool CombinerHelper::matchSextInRegOfLoad(
// anyway for most targets.
if (!isPowerOf2_32(NewSizeBits))
return false;
- MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
+ MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
return true;
}
-bool CombinerHelper::applySextInRegOfLoad(
+void CombinerHelper::applySextInRegOfLoad(
MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
Register LoadReg;
unsigned ScalarSizeBits;
std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
- auto *LoadDef = MRI.getVRegDef(LoadReg);
- assert(LoadDef && "Expected a load reg");
+ GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
// If we have the following:
// %ld = G_LOAD %ptr, (load 2)
@@ -729,15 +729,14 @@ bool CombinerHelper::applySextInRegOfLoad(
// ==>
// %ld = G_SEXTLOAD %ptr (load 1)
- auto &MMO = **LoadDef->memoperands_begin();
- Builder.setInstrAndDebugLoc(MI);
+ auto &MMO = LoadDef->getMMO();
+ Builder.setInstrAndDebugLoc(*LoadDef);
auto &MF = Builder.getMF();
auto PtrInfo = MMO.getPointerInfo();
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
- LoadDef->getOperand(1).getReg(), *NewMMO);
+ LoadDef->getPointerReg(), *NewMMO);
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
@@ -941,10 +940,104 @@ void CombinerHelper::applyCombineIndexedLoadStore(
LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
}
-bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
- if (MI.getOpcode() != TargetOpcode::G_BR)
+bool CombinerHelper::matchCombineDivRem(MachineInstr &MI,
+ MachineInstr *&OtherMI) {
+ unsigned Opcode = MI.getOpcode();
+ bool IsDiv, IsSigned;
+
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UDIV: {
+ IsDiv = true;
+ IsSigned = Opcode == TargetOpcode::G_SDIV;
+ break;
+ }
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM: {
+ IsDiv = false;
+ IsSigned = Opcode == TargetOpcode::G_SREM;
+ break;
+ }
+ }
+
+ Register Src1 = MI.getOperand(1).getReg();
+ unsigned DivOpcode, RemOpcode, DivremOpcode;
+ if (IsSigned) {
+ DivOpcode = TargetOpcode::G_SDIV;
+ RemOpcode = TargetOpcode::G_SREM;
+ DivremOpcode = TargetOpcode::G_SDIVREM;
+ } else {
+ DivOpcode = TargetOpcode::G_UDIV;
+ RemOpcode = TargetOpcode::G_UREM;
+ DivremOpcode = TargetOpcode::G_UDIVREM;
+ }
+
+ if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
return false;
+ // Combine:
+ // %div:_ = G_[SU]DIV %src1:_, %src2:_
+ // %rem:_ = G_[SU]REM %src1:_, %src2:_
+ // into:
+ // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
+
+ // Combine:
+ // %rem:_ = G_[SU]REM %src1:_, %src2:_
+ // %div:_ = G_[SU]DIV %src1:_, %src2:_
+ // into:
+ // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
+
+ for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
+ if (MI.getParent() == UseMI.getParent() &&
+ ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
+ (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
+ matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) {
+ OtherMI = &UseMI;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
+ MachineInstr *&OtherMI) {
+ unsigned Opcode = MI.getOpcode();
+ assert(OtherMI && "OtherMI shouldn't be empty.");
+
+ Register DestDivReg, DestRemReg;
+ if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
+ DestDivReg = MI.getOperand(0).getReg();
+ DestRemReg = OtherMI->getOperand(0).getReg();
+ } else {
+ DestDivReg = OtherMI->getOperand(0).getReg();
+ DestRemReg = MI.getOperand(0).getReg();
+ }
+
+ bool IsSigned =
+ Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
+
+ // Check which instruction is first in the block so we don't break def-use
+ // deps by "moving" the instruction incorrectly.
+ if (dominates(MI, *OtherMI))
+ Builder.setInstrAndDebugLoc(MI);
+ else
+ Builder.setInstrAndDebugLoc(*OtherMI);
+
+ Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
+ : TargetOpcode::G_UDIVREM,
+ {DestDivReg, DestRemReg},
+ {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
+ MI.eraseFromParent();
+ OtherMI->eraseFromParent();
+}
+
+bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI,
+ MachineInstr *&BrCond) {
+ assert(MI.getOpcode() == TargetOpcode::G_BR);
+
// Try to match the following:
// bb1:
// G_BRCOND %c1, %bb2
@@ -964,21 +1057,20 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
return false;
assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
- MachineInstr *BrCond = &*std::prev(BrIt);
+ BrCond = &*std::prev(BrIt);
if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
return false;
- // Check that the next block is the conditional branch target.
- if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
- return false;
- return true;
+ // Check that the next block is the conditional branch target. Also make sure
+ // that it isn't the same as the G_BR's target (otherwise, this will loop.)
+ MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
+ return BrCondTarget != MI.getOperand(0).getMBB() &&
+ MBB->isLayoutSuccessor(BrCondTarget);
}
-void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
+void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
+ MachineInstr *&BrCond) {
MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
- MachineBasicBlock::iterator BrIt(MI);
- MachineInstr *BrCond = &*std::prev(BrIt);
-
Builder.setInstrAndDebugLoc(*BrCond);
LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
// FIXME: Does int/fp matter for this? If so, we might need to restrict
@@ -1056,7 +1148,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
MVT VT = getMVTForLLT(Ty);
if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
TLI.allowsMisalignedMemoryAccesses(
- VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast)
TySize = Size;
@@ -1117,7 +1209,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
}
bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
- Register Val, unsigned KnownLen,
+ Register Val, uint64_t KnownLen,
Align Alignment, bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
@@ -1211,7 +1303,7 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
}
auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
+ MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
Register Ptr = Dst;
if (DstOff != 0) {
@@ -1229,10 +1321,51 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
return true;
}
+bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
+
+ const auto *MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+ bool IsVolatile = MemOp->isVolatile();
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
+ // FIXME: support dynamically sized G_MEMCPY_INLINE
+ assert(LenVRegAndVal.hasValue() &&
+ "inline memcpy with dynamic size is not yet supported");
+ uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return true;
+ }
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ Align DstAlign = DstMMO.getBaseAlign();
+ Align SrcAlign = SrcMMO.getBaseAlign();
+
+ return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
+ IsVolatile);
+}
+
+bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
+ Register Src, uint64_t KnownLen,
+ Align DstAlign, Align SrcAlign,
+ bool IsVolatile) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+ return optimizeMemcpy(MI, Dst, Src, KnownLen,
+ std::numeric_limits<uint64_t>::max(), DstAlign,
+ SrcAlign, IsVolatile);
+}
+
bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
- Register Src, unsigned KnownLen,
- Align DstAlign, Align SrcAlign,
- bool IsVolatile) {
+ Register Src, uint64_t KnownLen,
+ uint64_t Limit, Align DstAlign,
+ Align SrcAlign, bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
auto &DL = MF.getDataLayout();
@@ -1242,7 +1375,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
Align Alignment = commonAlignment(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
@@ -1253,7 +1385,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
// FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
// if the memcpy is in a tail call position.
- unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
std::vector<LLT> MemOps;
const auto &DstMMO = **MI.memoperands_begin();
@@ -1277,7 +1408,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->needsStackRealignment(MF))
+ if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign = NewAlign / 2;
@@ -1336,7 +1467,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
}
bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
- Register Src, unsigned KnownLen,
+ Register Src, uint64_t KnownLen,
Align DstAlign, Align SrcAlign,
bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
@@ -1382,7 +1513,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->needsStackRealignment(MF))
+ if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign = NewAlign / 2;
@@ -1449,10 +1580,6 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
auto MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
- bool IsVolatile = MemOp->isVolatile();
- // Don't try to optimize volatile.
- if (IsVolatile)
- return false;
Align DstAlign = MemOp->getBaseAlign();
Align SrcAlign;
@@ -1470,18 +1597,33 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
if (!LenVRegAndVal)
return false; // Leave it to the legalizer to lower it to a libcall.
- unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
+ uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
if (KnownLen == 0) {
MI.eraseFromParent();
return true;
}
+ bool IsVolatile = MemOp->isVolatile();
+ if (Opc == TargetOpcode::G_MEMCPY_INLINE)
+ return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
+ IsVolatile);
+
+ // Don't try to optimize volatile.
+ if (IsVolatile)
+ return false;
+
if (MaxLen && KnownLen > MaxLen)
return false;
- if (Opc == TargetOpcode::G_MEMCPY)
- return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+ if (Opc == TargetOpcode::G_MEMCPY) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
+ return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
+ IsVolatile);
+ }
if (Opc == TargetOpcode::G_MEMMOVE)
return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
if (Opc == TargetOpcode::G_MEMSET)
@@ -1540,7 +1682,7 @@ bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
return Cst.hasValue();
}
-bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
Optional<APFloat> &Cst) {
assert(Cst.hasValue() && "Optional is unexpectedly empty!");
Builder.setInstrAndDebugLoc(MI);
@@ -1549,7 +1691,6 @@ bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
Register DstReg = MI.getOperand(0).getReg();
Builder.buildFConstant(DstReg, *FPVal);
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
@@ -1569,6 +1710,13 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
if (!MaybeImmVal)
return false;
+ // Don't do this combine if there multiple uses of the first PTR_ADD,
+ // since we may be able to compute the second PTR_ADD as an immediate
+ // offset anyway. Folding the first offset into the second may cause us
+ // to go beyond the bounds of our legal addressing modes.
+ if (!MRI.hasOneNonDBGUse(Add2))
+ return false;
+
MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
@@ -1585,7 +1733,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
return true;
}
-bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
+void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
PtrAddChain &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
MachineIRBuilder MIB(MI);
@@ -1595,7 +1743,6 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
MI.getOperand(1).setReg(MatchInfo.Base);
MI.getOperand(2).setReg(NewOffset.getReg(0));
Observer.changedInstr(MI);
- return true;
}
bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
@@ -1643,7 +1790,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
return true;
}
-bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
+void CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
RegisterImmPair &MatchInfo) {
unsigned Opcode = MI.getOpcode();
assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
@@ -1661,7 +1808,7 @@ bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
Builder.buildConstant(MI.getOperand(0), 0);
MI.eraseFromParent();
- return true;
+ return;
}
// Arithmetic shift and saturating signed left shift have no effect beyond
// scalar size.
@@ -1674,7 +1821,6 @@ bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
MI.getOperand(1).setReg(MatchInfo.Reg);
MI.getOperand(2).setReg(NewImm);
Observer.changedInstr(MI);
- return true;
}
bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
@@ -1758,7 +1904,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
return true;
}
-bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
+void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
ShiftOfShiftedLogic &MatchInfo) {
unsigned Opcode = MI.getOpcode();
assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
@@ -1790,7 +1936,6 @@ bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
MatchInfo.Logic->eraseFromParent();
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
@@ -1805,7 +1950,7 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
return (static_cast<int32_t>(ShiftVal) != -1);
}
-bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
+void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
MachineIRBuilder MIB(MI);
@@ -1815,7 +1960,6 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
MI.getOperand(2).setReg(ShiftCst.getReg(0));
Observer.changedInstr(MI);
- return true;
}
// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
@@ -1856,7 +2000,7 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
return MinLeadingZeros >= ShiftAmt;
}
-bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
+void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
const RegisterImmPair &MatchData) {
Register ExtSrcReg = MatchData.Reg;
int64_t ShiftAmtVal = MatchData.Imm;
@@ -1868,6 +2012,24 @@ bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
Builder.buildZExt(MI.getOperand(0), NarrowShift);
MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI,
+ Register &MatchInfo) {
+ GMerge &Merge = cast<GMerge>(MI);
+ SmallVector<Register, 16> MergedValues;
+ for (unsigned I = 0; I < Merge.getNumSources(); ++I)
+ MergedValues.emplace_back(Merge.getSourceReg(I));
+
+ auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
+ if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
+ return false;
+
+ for (unsigned I = 0; I < MergedValues.size(); ++I)
+ if (MergedValues[I] != Unmerge->getReg(I))
+ return false;
+
+ MatchInfo = Unmerge->getSourceReg();
return true;
}
@@ -1906,7 +2068,7 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
return true;
}
-bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
+void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
@@ -1927,7 +2089,6 @@ bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
Builder.buildCast(DstReg, SrcReg);
}
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
@@ -1955,7 +2116,7 @@ bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
return true;
}
-bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
+void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
SmallVectorImpl<APInt> &Csts) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
@@ -1969,7 +2130,6 @@ bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
}
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
@@ -1983,7 +2143,7 @@ bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
return true;
}
-bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
Builder.setInstrAndDebugLoc(MI);
Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
// Truncating a vector is going to truncate every single lane,
@@ -2002,7 +2162,6 @@ bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
} else
Builder.buildTrunc(Dst0Reg, SrcReg);
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
@@ -2031,7 +2190,7 @@ bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
}
-bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
+void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
@@ -2063,7 +2222,6 @@ bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
}
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
@@ -2091,7 +2249,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
return ShiftVal >= Size / 2 && ShiftVal < Size;
}
-bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
+void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
const unsigned &ShiftVal) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
@@ -2162,7 +2320,6 @@ bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
}
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
@@ -2185,13 +2342,12 @@ bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
}
-bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
Register DstReg = MI.getOperand(0).getReg();
Builder.setInstr(MI);
Builder.buildCopy(DstReg, Reg);
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
@@ -2200,13 +2356,12 @@ bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
}
-bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
Register DstReg = MI.getOperand(0).getReg();
Builder.setInstr(MI);
Builder.buildZExtOrTrunc(DstReg, Reg);
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineAddP2IToPtrAdd(
@@ -2234,7 +2389,7 @@ bool CombinerHelper::matchCombineAddP2IToPtrAdd(
return false;
}
-bool CombinerHelper::applyCombineAddP2IToPtrAdd(
+void CombinerHelper::applyCombineAddP2IToPtrAdd(
MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
Register Dst = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
@@ -2251,7 +2406,6 @@ bool CombinerHelper::applyCombineAddP2IToPtrAdd(
auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
Builder.buildPtrToInt(Dst, PtrAdd);
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
@@ -2272,7 +2426,7 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
return false;
}
-bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
+void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
int64_t &NewCst) {
assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
Register Dst = MI.getOperand(0).getReg();
@@ -2280,7 +2434,6 @@ bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
Builder.setInstrAndDebugLoc(MI);
Builder.buildConstant(Dst, NewCst);
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
@@ -2292,12 +2445,18 @@ bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
}
-bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
Register DstReg = MI.getOperand(0).getReg();
- MI.eraseFromParent();
- replaceRegWith(MRI, DstReg, Reg);
- return true;
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (mi_match(SrcReg, MRI,
+ m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
+ unsigned DstSize = DstTy.getScalarSizeInBits();
+ unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
+ return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
+ }
+ return false;
}
bool CombinerHelper::matchCombineExtOfExt(
@@ -2321,7 +2480,7 @@ bool CombinerHelper::matchCombineExtOfExt(
return false;
}
-bool CombinerHelper::applyCombineExtOfExt(
+void CombinerHelper::applyCombineExtOfExt(
MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
MI.getOpcode() == TargetOpcode::G_SEXT ||
@@ -2336,7 +2495,7 @@ bool CombinerHelper::applyCombineExtOfExt(
Observer.changingInstr(MI);
MI.getOperand(1).setReg(Reg);
Observer.changedInstr(MI);
- return true;
+ return;
}
// Combine:
@@ -2349,13 +2508,10 @@ bool CombinerHelper::applyCombineExtOfExt(
Builder.setInstrAndDebugLoc(MI);
Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
MI.eraseFromParent();
- return true;
}
-
- return false;
}
-bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
+void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
@@ -2365,7 +2521,6 @@ bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
MI.getFlags());
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
@@ -2381,14 +2536,6 @@ bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
}
-bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
- assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
- Register Dst = MI.getOperand(0).getReg();
- MI.eraseFromParent();
- replaceRegWith(MRI, Dst, Src);
- return true;
-}
-
bool CombinerHelper::matchCombineTruncOfExt(
MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
@@ -2403,7 +2550,7 @@ bool CombinerHelper::matchCombineTruncOfExt(
return false;
}
-bool CombinerHelper::applyCombineTruncOfExt(
+void CombinerHelper::applyCombineTruncOfExt(
MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
Register SrcReg = MatchInfo.first;
@@ -2414,7 +2561,7 @@ bool CombinerHelper::applyCombineTruncOfExt(
if (SrcTy == DstTy) {
MI.eraseFromParent();
replaceRegWith(MRI, DstReg, SrcReg);
- return true;
+ return;
}
Builder.setInstrAndDebugLoc(MI);
if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
@@ -2422,7 +2569,6 @@ bool CombinerHelper::applyCombineTruncOfExt(
else
Builder.buildTrunc(DstReg, SrcReg);
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchCombineTruncOfShl(
@@ -2449,7 +2595,7 @@ bool CombinerHelper::matchCombineTruncOfShl(
return false;
}
-bool CombinerHelper::applyCombineTruncOfShl(
+void CombinerHelper::applyCombineTruncOfShl(
MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
Register DstReg = MI.getOperand(0).getReg();
@@ -2463,7 +2609,6 @@ bool CombinerHelper::applyCombineTruncOfShl(
auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
@@ -2662,6 +2807,14 @@ bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
return true;
}
+bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildConstant(MI.getOperand(0), C);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
@@ -2731,7 +2884,7 @@ bool CombinerHelper::matchCombineInsertVecElts(
return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
}
-bool CombinerHelper::applyCombineInsertVecElts(
+void CombinerHelper::applyCombineInsertVecElts(
MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
Builder.setInstr(MI);
Register UndefReg;
@@ -2748,17 +2901,15 @@ bool CombinerHelper::applyCombineInsertVecElts(
}
Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::applySimplifyAddToSub(
+void CombinerHelper::applySimplifyAddToSub(
MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
Builder.setInstr(MI);
Register SubLHS, SubRHS;
std::tie(SubLHS, SubRHS) = MatchInfo;
Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
@@ -2852,7 +3003,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
return true;
}
-bool CombinerHelper::applyBuildInstructionSteps(
+void CombinerHelper::applyBuildInstructionSteps(
MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
assert(MatchInfo.InstrsToBuild.size() &&
"Expected at least one instr to build?");
@@ -2865,7 +3016,6 @@ bool CombinerHelper::applyBuildInstructionSteps(
OperandFn(Instr);
}
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchAshrShlToSextInreg(
@@ -2885,7 +3035,8 @@ bool CombinerHelper::matchAshrShlToSextInreg(
MatchInfo = std::make_tuple(Src, ShlCst);
return true;
}
-bool CombinerHelper::applyAshShlToSextInreg(
+
+void CombinerHelper::applyAshShlToSextInreg(
MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_ASHR);
Register Src;
@@ -2895,6 +3046,32 @@ bool CombinerHelper::applyAshShlToSextInreg(
Builder.setInstrAndDebugLoc(MI);
Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
MI.eraseFromParent();
+}
+
+/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
+bool CombinerHelper::matchOverlappingAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ Register R;
+ int64_t C1;
+ int64_t C2;
+ if (!mi_match(
+ Dst, MRI,
+ m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (C1 & C2) {
+ B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
+ return;
+ }
+ auto Zero = B.buildConstant(Ty, 0);
+ replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
+ };
return true;
}
@@ -3091,7 +3268,7 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI,
return true;
}
-bool CombinerHelper::applyNotCmp(MachineInstr &MI,
+void CombinerHelper::applyNotCmp(MachineInstr &MI,
SmallVectorImpl<Register> &RegsToNegate) {
for (Register Reg : RegsToNegate) {
MachineInstr *Def = MRI.getVRegDef(Reg);
@@ -3121,7 +3298,6 @@ bool CombinerHelper::applyNotCmp(MachineInstr &MI,
replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchXorOfAndWithSameReg(
@@ -3155,7 +3331,7 @@ bool CombinerHelper::matchXorOfAndWithSameReg(
return Y == SharedReg;
}
-bool CombinerHelper::applyXorOfAndWithSameReg(
+void CombinerHelper::applyXorOfAndWithSameReg(
MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
// Fold (xor (and x, y), y) -> (and (not x), y)
Builder.setInstrAndDebugLoc(MI);
@@ -3167,7 +3343,6 @@ bool CombinerHelper::applyXorOfAndWithSameReg(
MI.getOperand(1).setReg(Not->getOperand(0).getReg());
MI.getOperand(2).setReg(Y);
Observer.changedInstr(MI);
- return true;
}
bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
@@ -3188,16 +3363,15 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
return isBuildVectorAllZeros(*VecMI, MRI);
}
-bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
+void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
Builder.setInstrAndDebugLoc(MI);
Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
MI.eraseFromParent();
- return true;
}
/// The second source operand is known to be a power of 2.
-bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
+void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Pow2Src1 = MI.getOperand(2).getReg();
@@ -3209,7 +3383,6 @@ bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
Builder.buildAnd(DstReg, Src0, Add);
MI.eraseFromParent();
- return true;
}
Optional<SmallVector<Register, 8>>
@@ -3283,7 +3456,7 @@ CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
/// e.g. x[i] << 24
///
/// \returns The load instruction and the byte offset it is moved into.
-static Optional<std::pair<MachineInstr *, int64_t>>
+static Optional<std::pair<GZExtLoad *, int64_t>>
matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
const MachineRegisterInfo &MRI) {
assert(MRI.hasOneNonDBGUse(Reg) &&
@@ -3300,18 +3473,17 @@ matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
return None;
// TODO: Handle other types of loads.
- auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI);
+ auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
if (!Load)
return None;
- const auto &MMO = **Load->memoperands_begin();
- if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits)
+ if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
return None;
return std::make_pair(Load, Shift / MemSizeInBits);
}
-Optional<std::pair<MachineInstr *, int64_t>>
+Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
CombinerHelper::findLoadOffsetsForLoadOrCombine(
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
@@ -3323,7 +3495,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
int64_t LowestIdx = INT64_MAX;
// The load which uses the lowest index.
- MachineInstr *LowestIdxLoad = nullptr;
+ GZExtLoad *LowestIdxLoad = nullptr;
// Keeps track of the load indices we see. We shouldn't see any indices twice.
SmallSet<int64_t, 8> SeenIdx;
@@ -3334,10 +3506,10 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
const MachineMemOperand *MMO = nullptr;
// Earliest instruction-order load in the pattern.
- MachineInstr *EarliestLoad = nullptr;
+ GZExtLoad *EarliestLoad = nullptr;
// Latest instruction-order load in the pattern.
- MachineInstr *LatestLoad = nullptr;
+ GZExtLoad *LatestLoad = nullptr;
// Base pointer which every load should share.
Register BasePtr;
@@ -3352,7 +3524,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
if (!LoadAndPos)
return None;
- MachineInstr *Load;
+ GZExtLoad *Load;
int64_t DstPos;
std::tie(Load, DstPos) = *LoadAndPos;
@@ -3365,10 +3537,10 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
return None;
// Make sure that the MachineMemOperands of every seen load are compatible.
- const MachineMemOperand *LoadMMO = *Load->memoperands_begin();
+ auto &LoadMMO = Load->getMMO();
if (!MMO)
- MMO = LoadMMO;
- if (MMO->getAddrSpace() != LoadMMO->getAddrSpace())
+ MMO = &LoadMMO;
+ if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
return None;
// Find out what the base pointer and index for the load is.
@@ -3442,7 +3614,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
return None;
}
- return std::make_pair(LowestIdxLoad, LowestIdx);
+ return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
}
bool CombinerHelper::matchLoadOrCombine(
@@ -3490,13 +3662,13 @@ bool CombinerHelper::matchLoadOrCombine(
// Also verify that each of these ends up putting a[i] into the same memory
// offset as a load into a wide type would.
SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
- MachineInstr *LowestIdxLoad;
+ GZExtLoad *LowestIdxLoad, *LatestLoad;
int64_t LowestIdx;
auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
if (!MaybeLoadInfo)
return false;
- std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
+ std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
// We have a bunch of loads being OR'd together. Using the addresses + offsets
// we found before, check if this corresponds to a big or little endian byte
@@ -3530,12 +3702,12 @@ bool CombinerHelper::matchLoadOrCombine(
// We wil reuse the pointer from the load which ends up at byte offset 0. It
// may not use index 0.
- Register Ptr = LowestIdxLoad->getOperand(1).getReg();
- const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin();
+ Register Ptr = LowestIdxLoad->getPointerReg();
+ const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
LegalityQuery::MemDesc MMDesc;
- MMDesc.SizeInBits = WideMemSizeInBits;
+ MMDesc.MemoryTy = Ty;
MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getOrdering();
+ MMDesc.Ordering = MMO.getSuccessOrdering();
if (!isLegalOrBeforeLegalizer(
{TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
return false;
@@ -3551,6 +3723,7 @@ bool CombinerHelper::matchLoadOrCombine(
return false;
MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.setInstrAndDebugLoc(*LatestLoad);
Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
MIB.buildLoad(LoadDst, Ptr, *NewMMO);
if (NeedsBSwap)
@@ -3559,11 +3732,535 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
-bool CombinerHelper::applyLoadOrCombine(
+bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
+ MachineInstr *&ExtMI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PHI);
+
+ Register DstReg = MI.getOperand(0).getReg();
+
+ // TODO: Extending a vector may be expensive, don't do this until heuristics
+ // are better.
+ if (MRI.getType(DstReg).isVector())
+ return false;
+
+ // Try to match a phi, whose only use is an extend.
+ if (!MRI.hasOneNonDBGUse(DstReg))
+ return false;
+ ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
+ switch (ExtMI->getOpcode()) {
+ case TargetOpcode::G_ANYEXT:
+ return true; // G_ANYEXT is usually free.
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT:
+ break;
+ default:
+ return false;
+ }
+
+ // If the target is likely to fold this extend away, don't propagate.
+ if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
+ return false;
+
+ // We don't want to propagate the extends unless there's a good chance that
+ // they'll be optimized in some way.
+ // Collect the unique incoming values.
+ SmallPtrSet<MachineInstr *, 4> InSrcs;
+ for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
+ auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
+ switch (DefMI->getOpcode()) {
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_CONSTANT:
+ InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
+ // Don't try to propagate if there are too many places to create new
+ // extends, chances are it'll increase code size.
+ if (InSrcs.size() > 2)
+ return false;
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
+ MachineInstr *&ExtMI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PHI);
+ Register DstReg = ExtMI->getOperand(0).getReg();
+ LLT ExtTy = MRI.getType(DstReg);
+
+ // Propagate the extension into the block of each incoming reg's block.
+ // Use a SetVector here because PHIs can have duplicate edges, and we want
+ // deterministic iteration order.
+ SmallSetVector<MachineInstr *, 8> SrcMIs;
+ SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
+ for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
+ auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
+ if (!SrcMIs.insert(SrcMI))
+ continue;
+
+ // Build an extend after each src inst.
+ auto *MBB = SrcMI->getParent();
+ MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
+ if (InsertPt != MBB->end() && InsertPt->isPHI())
+ InsertPt = MBB->getFirstNonPHI();
+
+ Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
+ Builder.setDebugLoc(MI.getDebugLoc());
+ auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
+ SrcMI->getOperand(0).getReg());
+ OldToNewSrcMap[SrcMI] = NewExt;
+ }
+
+ // Create a new phi with the extended inputs.
+ Builder.setInstrAndDebugLoc(MI);
+ auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
+ NewPhi.addDef(DstReg);
+ for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
+ auto &MO = MI.getOperand(SrcIdx);
+ if (!MO.isReg()) {
+ NewPhi.addMBB(MO.getMBB());
+ continue;
+ }
+ auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
+ NewPhi.addUse(NewSrc->getOperand(0).getReg());
+ }
+ Builder.insertInstr(NewPhi);
+ ExtMI->eraseFromParent();
+}
+
+bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
+ Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
+ // If we have a constant index, look for a G_BUILD_VECTOR source
+ // and find the source register that the index maps to.
+ Register SrcVec = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcVec);
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
+ return false;
+
+ auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
+ return false;
+
+ unsigned VecIdx = Cst->Value.getZExtValue();
+ MachineInstr *BuildVecMI =
+ getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI);
+ if (!BuildVecMI) {
+ BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI);
+ if (!BuildVecMI)
+ return false;
+ LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg());
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}}))
+ return false;
+ }
+
+ EVT Ty(getMVTForLLT(SrcTy));
+ if (!MRI.hasOneNonDBGUse(SrcVec) &&
+ !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
+ return false;
+
+ Reg = BuildVecMI->getOperand(VecIdx + 1).getReg();
+ return true;
+}
+
+void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
+ Register &Reg) {
+ // Check the type of the register, since it may have come from a
+ // G_BUILD_VECTOR_TRUNC.
+ LLT ScalarTy = MRI.getType(Reg);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+ if (ScalarTy != DstTy) {
+ assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
+ Builder.buildTrunc(DstReg, Reg);
+ MI.eraseFromParent();
+ return;
+ }
+ replaceSingleDefInstWithReg(MI, Reg);
+}
+
+bool CombinerHelper::matchExtractAllEltsFromBuildVector(
+ MachineInstr &MI,
+ SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ // This combine tries to find build_vector's which have every source element
+ // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
+ // the masked load scalarization is run late in the pipeline. There's already
+ // a combine for a similar pattern starting from the extract, but that
+ // doesn't attempt to do it if there are multiple uses of the build_vector,
+ // which in this case is true. Starting the combine from the build_vector
+ // feels more natural than trying to find sibling nodes of extracts.
+ // E.g.
+ // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
+ // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
+ // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
+ // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
+ // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
+ // ==>
+ // replace ext{1,2,3,4} with %s{1,2,3,4}
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ unsigned NumElts = DstTy.getNumElements();
+
+ SmallBitVector ExtractedElts(NumElts);
+ for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
+ MRI.use_instr_nodbg_end())) {
+ if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
+ return false;
+ auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
+ if (!Cst)
+ return false;
+ unsigned Idx = Cst.getValue().getZExtValue();
+ if (Idx >= NumElts)
+ return false; // Out of range.
+ ExtractedElts.set(Idx);
+ SrcDstPairs.emplace_back(
+ std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
+ }
+ // Match if every element was extracted.
+ return ExtractedElts.all();
+}
+
+void CombinerHelper::applyExtractAllEltsFromBuildVector(
+ MachineInstr &MI,
+ SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ for (auto &Pair : SrcDstPairs) {
+ auto *ExtMI = Pair.second;
+ replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
+ ExtMI->eraseFromParent();
+ }
+ MI.eraseFromParent();
+}
+
+void CombinerHelper::applyBuildFn(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
Builder.setInstrAndDebugLoc(MI);
MatchInfo(Builder);
MI.eraseFromParent();
+}
+
+void CombinerHelper::applyBuildFnNoErase(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ MatchInfo(Builder);
+}
+
+/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
+bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
+ Register X = MI.getOperand(1).getReg();
+ Register Y = MI.getOperand(2).getReg();
+ if (X != Y)
+ return false;
+ unsigned RotateOpc =
+ Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
+ return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
+}
+
+void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
+ bool IsFSHL = Opc == TargetOpcode::G_FSHL;
+ Observer.changingInstr(MI);
+ MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
+ : TargetOpcode::G_ROTR));
+ MI.RemoveOperand(2);
+ Observer.changedInstr(MI);
+}
+
+// Fold (rot x, c) -> (rot x, c % BitSize)
+bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
+ MI.getOpcode() == TargetOpcode::G_ROTR);
+ unsigned Bitsize =
+ MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
+ Register AmtReg = MI.getOperand(2).getReg();
+ bool OutOfRange = false;
+ auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ OutOfRange |= CI->getValue().uge(Bitsize);
+ return true;
+ };
+ return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
+}
+
+void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
+ MI.getOpcode() == TargetOpcode::G_ROTR);
+ unsigned Bitsize =
+ MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
+ Builder.setInstrAndDebugLoc(MI);
+ Register Amt = MI.getOperand(2).getReg();
+ LLT AmtTy = MRI.getType(Amt);
+ auto Bits = Builder.buildConstant(AmtTy, Bitsize);
+ Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(Amt);
+ Observer.changedInstr(MI);
+}
+
+bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
+ int64_t &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
+ auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
+ Optional<bool> KnownVal;
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unexpected G_ICMP predicate?");
+ case CmpInst::ICMP_EQ:
+ KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_NE:
+ KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SGE:
+ KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SGT:
+ KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SLE:
+ KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SLT:
+ KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_UGE:
+ KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_UGT:
+ KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_ULE:
+ KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_ULT:
+ KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
+ break;
+ }
+ if (!KnownVal)
+ return false;
+ MatchInfo =
+ *KnownVal
+ ? getICmpTrueVal(getTargetLowering(),
+ /*IsVector = */
+ MRI.getType(MI.getOperand(0).getReg()).isVector(),
+ /* IsFP = */ false)
+ : 0;
+ return true;
+}
+
+/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
+bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(Src);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
+ return false;
+ int64_t Width = MI.getOperand(2).getImm();
+ Register ShiftSrc;
+ int64_t ShiftImm;
+ if (!mi_match(
+ Src, MRI,
+ m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
+ m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
+ return false;
+ if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
+ auto Cst2 = B.buildConstant(ExtractTy, Width);
+ B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
+ };
+ return true;
+}
+
+/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
+bool CombinerHelper::matchBitfieldExtractFromAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
+ TargetOpcode::G_UBFX, Ty, Ty))
+ return false;
+
+ int64_t AndImm, LSBImm;
+ Register ShiftSrc;
+ const unsigned Size = Ty.getScalarSizeInBits();
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
+ m_ICst(AndImm))))
+ return false;
+
+ // The mask is a mask of the low bits iff imm & (imm+1) == 0.
+ auto MaybeMask = static_cast<uint64_t>(AndImm);
+ if (MaybeMask & (MaybeMask + 1))
+ return false;
+
+ // LSB must fit within the register.
+ if (static_cast<uint64_t>(LSBImm) >= Size)
+ return false;
+
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
+ B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
+ };
+ return true;
+}
+
+bool CombinerHelper::reassociationCanBreakAddressingModePattern(
+ MachineInstr &PtrAdd) {
+ assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
+
+ Register Src1Reg = PtrAdd.getOperand(1).getReg();
+ MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
+ if (!Src1Def)
+ return false;
+
+ Register Src2Reg = PtrAdd.getOperand(2).getReg();
+
+ if (MRI.hasOneNonDBGUse(Src1Reg))
+ return false;
+
+ auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+ if (!C1)
+ return false;
+ auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ const APInt &C1APIntVal = *C1;
+ const APInt &C2APIntVal = *C2;
+ const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
+
+ for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
+ // This combine may end up running before ptrtoint/inttoptr combines
+ // manage to eliminate redundant conversions, so try to look through them.
+ MachineInstr *ConvUseMI = &UseMI;
+ unsigned ConvUseOpc = ConvUseMI->getOpcode();
+ while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
+ ConvUseOpc == TargetOpcode::G_PTRTOINT) {
+ Register DefReg = ConvUseMI->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(DefReg))
+ break;
+ ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
+ ConvUseOpc = ConvUseMI->getOpcode();
+ }
+ auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
+ ConvUseOpc == TargetOpcode::G_STORE;
+ if (!LoadStore)
+ continue;
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ unsigned AS =
+ MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
+ Type *AccessTy =
+ getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
+ PtrAdd.getMF()->getFunction().getContext());
+ const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
+ if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+ AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+ AccessTy, AS))
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchReassocPtrAdd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+ // We're trying to match a few pointer computation patterns here for
+ // re-association opportunities.
+ // 1) Isolating a constant operand to be on the RHS, e.g.:
+ // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+ //
+ // 2) Folding two constants in each sub-tree as long as such folding
+ // doesn't break a legal addressing mode.
+ // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+ Register Src1Reg = MI.getOperand(1).getReg();
+ Register Src2Reg = MI.getOperand(2).getReg();
+ MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
+ MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
+
+ if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
+ // Try to match example 1).
+ if (RHS->getOpcode() != TargetOpcode::G_ADD)
+ return false;
+ auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=,&MI](MachineIRBuilder &B) {
+ LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto NewBase =
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NewBase.getReg(0));
+ MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ } else {
+ // Try to match example 2.
+ Register LHSSrc1 = LHS->getOperand(1).getReg();
+ Register LHSSrc2 = LHS->getOperand(2).getReg();
+ auto C1 = getConstantVRegVal(LHSSrc2, MRI);
+ if (!C1)
+ return false;
+ auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(LHSSrc1);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ }
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
+ if (!MaybeCst)
+ return false;
+ MatchInfo = *MaybeCst;
return true;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 2de20489e1d1..8146a67d4dfb 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/Module.h"
#define DEBUG_TYPE "gisel-known-bits"
@@ -87,10 +88,10 @@ LLVM_ATTRIBUTE_UNUSED static void
dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
<< "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
- << (Known.Zero | Known.One).toString(16, false) << "\n"
- << "[" << Depth << "] Zero: 0x" << Known.Zero.toString(16, false)
+ << toString(Known.Zero | Known.One, 16, false) << "\n"
+ << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false)
<< "\n"
- << "[" << Depth << "] One: 0x" << Known.One.toString(16, false)
+ << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false)
<< "\n";
}
@@ -113,6 +114,20 @@ void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
Known = KnownBits::commonBits(Known, Known2);
}
+// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
+// created using Width. Use this function when the inputs are KnownBits
+// objects. TODO: Move this KnownBits.h if this is usable in more cases.
+static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
+ const KnownBits &OffsetKnown,
+ const KnownBits &WidthKnown) {
+ KnownBits Mask(BitWidth);
+ Mask.Zero = APInt::getBitsSetFrom(
+ BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth));
+ Mask.One = APInt::getLowBitsSet(
+ BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth));
+ return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask;
+}
+
void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
const APInt &DemandedElts,
unsigned Depth) {
@@ -129,7 +144,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
return;
}
- unsigned BitWidth = DstTy.getSizeInBits();
+ unsigned BitWidth = DstTy.getScalarSizeInBits();
auto CacheEntry = ComputeKnownBitsCache.find(R);
if (CacheEntry != ComputeKnownBitsCache.end()) {
Known = CacheEntry->second;
@@ -140,9 +155,6 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
}
Known = KnownBits(BitWidth); // Don't know anything
- if (DstTy.isVector())
- return; // TODO: Handle vectors.
-
// Depth may get bigger than max depth if it gets passed to a different
// GISelKnownBits object.
// This may happen when say a generic part uses a GISelKnownBits object
@@ -164,6 +176,25 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
Depth);
break;
+ case TargetOpcode::G_BUILD_VECTOR: {
+ // Collect the known bits that are shared by every demanded vector element.
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ // Known bits are the values that are shared by every demanded element.
+ Known = KnownBits::commonBits(Known, Known2);
+
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ }
+ break;
+ }
case TargetOpcode::COPY:
case TargetOpcode::G_PHI:
case TargetOpcode::PHI: {
@@ -244,6 +275,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_PTR_ADD: {
+ if (DstTy.isVector())
+ break;
// G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
@@ -284,7 +317,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Depth + 1);
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
Depth + 1);
- Known = KnownBits::computeForMul(Known, Known2);
+ Known = KnownBits::mul(Known, Known2);
break;
}
case TargetOpcode::G_SELECT: {
@@ -332,6 +365,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
}
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
+ if (DstTy.isVector())
+ break;
if (TL.getBooleanContents(DstTy.isVector(),
Opcode == TargetOpcode::G_FCMP) ==
TargetLowering::ZeroOrOneBooleanContent &&
@@ -347,6 +382,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = Known.sext(BitWidth);
break;
}
+ case TargetOpcode::G_ASSERT_SEXT:
case TargetOpcode::G_SEXT_INREG: {
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Depth + 1);
@@ -368,6 +404,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_ZEXTLOAD: {
+ if (DstTy.isVector())
+ break;
// Everything above the retrieved bits is zero
Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
break;
@@ -401,15 +439,25 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
}
case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_PTRTOINT:
+ if (DstTy.isVector())
+ break;
// Fall through and handle them the same as zext/trunc.
LLVM_FALLTHROUGH;
+ case TargetOpcode::G_ASSERT_ZEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_TRUNC: {
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
- unsigned SrcBitWidth = SrcTy.isPointer()
- ? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
- : SrcTy.getSizeInBits();
+ unsigned SrcBitWidth;
+
+ // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand.
+ if (Opcode == TargetOpcode::G_ASSERT_ZEXT)
+ SrcBitWidth = MI.getOperand(2).getImm();
+ else {
+ SrcBitWidth = SrcTy.isPointer()
+ ? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
+ : SrcTy.getSizeInBits();
+ }
assert(SrcBitWidth && "SrcBitWidth can't be zero");
Known = Known.zextOrTrunc(SrcBitWidth);
computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
@@ -431,6 +479,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_UNMERGE_VALUES: {
+ if (DstTy.isVector())
+ break;
unsigned NumOps = MI.getNumOperands();
Register SrcReg = MI.getOperand(NumOps - 1).getReg();
if (MRI.getType(SrcReg).isVector())
@@ -451,13 +501,41 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
case TargetOpcode::G_BSWAP: {
Register SrcReg = MI.getOperand(1).getReg();
computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
- Known.byteSwap();
+ Known = Known.byteSwap();
break;
}
case TargetOpcode::G_BITREVERSE: {
Register SrcReg = MI.getOperand(1).getReg();
computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
- Known.reverseBits();
+ Known = Known.reverseBits();
+ break;
+ }
+ case TargetOpcode::G_UBFX: {
+ KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
+ Depth + 1);
+ Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
+ break;
+ }
+ case TargetOpcode::G_SBFX: {
+ KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
+ Depth + 1);
+ Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
+ // Sign extend the extracted value using shift left and arithmetic shift
+ // right.
+ KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth));
+ KnownBits ShiftKnown = KnownBits::computeForAddSub(
+ /*Add*/ false, /*NSW*/ false, ExtKnown, WidthKnown);
+ Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
break;
}
}
@@ -523,6 +601,7 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
}
+ case TargetOpcode::G_ASSERT_SEXT:
case TargetOpcode::G_SEXT_INREG: {
// Max of the input and what this extends.
Register Src = MI.getOperand(1).getReg();
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b97c369b832d..73b763710fdf 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -72,6 +72,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/MemoryOpRemark.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -97,6 +98,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
@@ -164,6 +166,8 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<GISelCSEAnalysisWrapperPass>();
if (OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -253,23 +257,13 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
Align IRTranslator::getMemOpAlign(const Instruction &I) {
if (const StoreInst *SI = dyn_cast<StoreInst>(&I))
return SI->getAlign();
- if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(&I))
return LI->getAlign();
- }
- if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
- // TODO(PR27168): This instruction has no alignment attribute, but unlike
- // the default alignment for load/store, the default here is to assume
- // it has NATURAL alignment, not DataLayout-specified alignment.
- const DataLayout &DL = AI->getModule()->getDataLayout();
- return Align(DL.getTypeStoreSize(AI->getCompareOperand()->getType()));
- }
- if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
- // TODO(PR27168): This instruction has no alignment attribute, but unlike
- // the default alignment for load/store, the default here is to assume
- // it has NATURAL alignment, not DataLayout-specified alignment.
- const DataLayout &DL = AI->getModule()->getDataLayout();
- return Align(DL.getTypeStoreSize(AI->getValOperand()->getType()));
- }
+ if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I))
+ return AI->getAlign();
+ if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I))
+ return AI->getAlign();
+
OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
R << "unable to translate memop: " << ore::NV("Opcode", &I);
reportTranslationError(*MF, *TPC, *ORE, R);
@@ -840,9 +834,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
// For conditional branch lowering, we might try to do something silly like
// emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
// just re-use the existing condition vreg.
- if (CI && CI->getZExtValue() == 1 &&
- MRI->getType(CondLHS).getSizeInBits() == 1 &&
- CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+ if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
+ CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
Cond = CondLHS;
} else {
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
@@ -1307,7 +1300,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
AAMDNodes AAMetadata;
LI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
- Ptr, Flags, MRI->getType(Regs[i]).getSizeInBytes(),
+ Ptr, Flags, MRI->getType(Regs[i]),
commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
LI.getSyncScopeID(), LI.getOrdering());
MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
@@ -1349,7 +1342,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
AAMDNodes AAMetadata;
SI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
- Ptr, Flags, MRI->getType(Vals[i]).getSizeInBytes(),
+ Ptr, Flags, MRI->getType(Vals[i]),
commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
SI.getSyncScopeID(), SI.getOrdering());
MIRBuilder.buildStore(Vals[i], Addr, *MMO);
@@ -1479,14 +1472,22 @@ bool IRTranslator::translateGetElementPtr(const User &U,
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
unsigned VectorWidth = 0;
- if (auto *VT = dyn_cast<VectorType>(U.getType()))
+
+ // True if we should use a splat vector; using VectorWidth alone is not
+ // sufficient.
+ bool WantSplatVector = false;
+ if (auto *VT = dyn_cast<VectorType>(U.getType())) {
VectorWidth = cast<FixedVectorType>(VT)->getNumElements();
+ // We don't produce 1 x N vectors; those are treated as scalars.
+ WantSplatVector = VectorWidth > 1;
+ }
// We might need to splat the base pointer into a vector if the offsets
// are vectors.
- if (VectorWidth && !PtrTy.isVector()) {
+ if (WantSplatVector && !PtrTy.isVector()) {
BaseReg =
- MIRBuilder.buildSplatVector(LLT::vector(VectorWidth, PtrTy), BaseReg)
+ MIRBuilder
+ .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg)
.getReg(0);
PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
PtrTy = getLLTForType(*PtrIRTy, *DL);
@@ -1522,7 +1523,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Register IdxReg = getOrCreateVReg(*Idx);
LLT IdxTy = MRI->getType(IdxReg);
if (IdxTy != OffsetTy) {
- if (!IdxTy.isVector() && VectorWidth) {
+ if (!IdxTy.isVector() && WantSplatVector) {
IdxReg = MIRBuilder.buildSplatVector(
OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0);
}
@@ -1571,7 +1572,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
Register SrcReg = getOrCreateVReg(**AI);
LLT SrcTy = MRI->getType(SrcReg);
if (SrcTy.isPointer())
- MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize);
+ MinPtrSize = std::min<unsigned>(SrcTy.getSizeInBits(), MinPtrSize);
SrcRegs.push_back(SrcReg);
}
@@ -1595,6 +1596,9 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
DstAlign = MCI->getDestAlign().valueOrOne();
SrcAlign = MCI->getSourceAlign().valueOrOne();
+ } else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) {
+ DstAlign = MCI->getDestAlign().valueOrOne();
+ SrcAlign = MCI->getSourceAlign().valueOrOne();
} else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
DstAlign = MMI->getDestAlign().valueOrOne();
SrcAlign = MMI->getSourceAlign().valueOrOne();
@@ -1603,10 +1607,12 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
DstAlign = MSI->getDestAlign().valueOrOne();
}
- // We need to propagate the tail call flag from the IR inst as an argument.
- // Otherwise, we have to pessimize and assume later that we cannot tail call
- // any memory intrinsics.
- ICall.addImm(CI.isTailCall() ? 1 : 0);
+ if (Opcode != TargetOpcode::G_MEMCPY_INLINE) {
+ // We need to propagate the tail call flag from the IR inst as an argument.
+ // Otherwise, we have to pessimize and assume later that we cannot tail call
+ // any memory intrinsics.
+ ICall.addImm(CI.isTailCall() ? 1 : 0);
+ }
// Create mem operands to store the alignment and volatile info.
auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
@@ -1633,12 +1639,14 @@ void IRTranslator::getStackGuard(Register DstReg,
if (!Global)
return;
+ unsigned AddrSpace = Global->getType()->getPointerAddressSpace();
+ LLT PtrTy = LLT::pointer(AddrSpace, DL->getPointerSizeInBits(AddrSpace));
+
MachinePointerInfo MPInfo(Global);
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable;
- MachineMemOperand *MemRef =
- MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
- DL->getPointerABIAlignment(0));
+ MachineMemOperand *MemRef = MF->getMachineMemOperand(
+ MPInfo, Flags, PtrTy, DL->getPointerABIAlignment(AddrSpace));
MIB.setMemRefs({MemRef});
}
@@ -1826,6 +1834,16 @@ bool IRTranslator::translateConstrainedFPIntrinsic(
bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder) {
+ if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
+ if (ORE->enabled()) {
+ const Function &F = *MI->getParent()->getParent();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ if (MemoryOpRemark::canHandle(MI, TLI)) {
+ MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, TLI);
+ R.visit(MI);
+ }
+ }
+ }
// If this is a simple intrinsic (that is, we just need to add a def of
// a vreg, and uses for each arg operand, then translate it.
@@ -1924,9 +1942,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
assert(DI.getVariable()->isValidLocationForIntrinsic(
MIRBuilder.getDebugLoc()) &&
"Expected inlined-at fields to agree");
- if (!V) {
- // Currently the optimizer can produce this; insert an undef to
- // help debugging. Probably the optimizer should not do this.
+ if (!V || DI.hasArgList()) {
+ // DI cannot produce a valid DBG_VALUE, so produce an undef DBG_VALUE to
+ // terminate any prior location.
MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
} else if (const auto *CI = dyn_cast<Constant>(V)) {
MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
@@ -2027,6 +2045,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getOrCreateVReg(*CI.getArgOperand(0)),
MachineInstr::copyFlagsFromInstruction(CI));
return true;
+ case Intrinsic::memcpy_inline:
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
case Intrinsic::memcpy:
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
case Intrinsic::memmove:
@@ -2063,7 +2083,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
*MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile,
- PtrTy.getSizeInBits() / 8, Align(8)));
+ PtrTy, Align(8)));
return true;
}
case Intrinsic::stacksave: {
@@ -2255,6 +2275,17 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
Args.push_back(getOrCreateVRegs(*Arg));
}
+ if (auto *CI = dyn_cast<CallInst>(&CB)) {
+ if (ORE->enabled()) {
+ const Function &F = *CI->getParent()->getParent();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ if (MemoryOpRemark::canHandle(CI, TLI)) {
+ MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, TLI);
+ R.visit(CI);
+ }
+ }
+ }
+
// We don't set HasCalls on MFI here yet because call lowering may decide to
// optimize into tail calls. Instead, we defer that to selection where a final
// scan is done to check if any instructions are calls.
@@ -2349,10 +2380,11 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
Align Alignment = Info.align.getValueOr(
DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
-
- uint64_t Size = Info.memVT.getStoreSize();
+ LLT MemTy = Info.memVT.isSimple()
+ ? getLLTForMVT(Info.memVT.getSimpleVT())
+ : LLT::scalar(Info.memVT.getStoreSizeInBits());
MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
- Info.flags, Size, Alignment));
+ Info.flags, MemTy, Alignment));
}
return true;
@@ -2423,8 +2455,6 @@ bool IRTranslator::translateInvoke(const User &U,
const BasicBlock *EHPadBB = I.getSuccessor(1);
const Function *Fn = I.getCalledFunction();
- if (I.isInlineAsm())
- return false;
// FIXME: support invoking patchpoint and statepoint intrinsics.
if (Fn && Fn->isIntrinsic())
@@ -2442,12 +2472,37 @@ bool IRTranslator::translateInvoke(const User &U,
if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
return false;
+ bool LowerInlineAsm = false;
+ if (I.isInlineAsm()) {
+ const InlineAsm *IA = cast<InlineAsm>(I.getCalledOperand());
+ if (!IA->canThrow()) {
+ // Fast path without emitting EH_LABELs.
+
+ if (!translateInlineAsm(I, MIRBuilder))
+ return false;
+
+ MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(),
+ *ReturnMBB = &getMBB(*ReturnBB);
+
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, ReturnMBB, BranchProbability::getOne());
+
+ MIRBuilder.buildBr(*ReturnMBB);
+ return true;
+ } else {
+ LowerInlineAsm = true;
+ }
+ }
+
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
// the region covered by the try.
MCSymbol *BeginSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
- if (!translateCallBase(I, MIRBuilder))
+ if (LowerInlineAsm) {
+ if (!translateInlineAsm(I, MIRBuilder))
+ return false;
+ } else if (!translateCallBase(I, MIRBuilder))
return false;
MCSymbol *EndSymbol = Context.createTempSymbol();
@@ -2695,9 +2750,6 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
auto &TLI = *MF->getSubtarget().getTargetLowering();
auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
- Type *ResType = I.getType();
- Type *ValType = ResType->Type::getStructElementType(0);
-
auto Res = getOrCreateVRegs(I);
Register OldValRes = Res[0];
Register SuccessRes = Res[1];
@@ -2711,9 +2763,9 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
MIRBuilder.buildAtomicCmpXchgWithSuccess(
OldValRes, SuccessRes, Addr, Cmp, NewVal,
*MF->getMachineMemOperand(
- MachinePointerInfo(I.getPointerOperand()), Flags,
- DL->getTypeStoreSize(ValType), getMemOpAlign(I), AAMetadata, nullptr,
- I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering()));
+ MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Cmp),
+ getMemOpAlign(I), AAMetadata, nullptr, I.getSyncScopeID(),
+ I.getSuccessOrdering(), I.getFailureOrdering()));
return true;
}
@@ -2723,8 +2775,6 @@ bool IRTranslator::translateAtomicRMW(const User &U,
auto &TLI = *MF->getSubtarget().getTargetLowering();
auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
- Type *ResType = I.getType();
-
Register Res = getOrCreateVReg(I);
Register Addr = getOrCreateVReg(*I.getPointerOperand());
Register Val = getOrCreateVReg(*I.getValOperand());
@@ -2780,9 +2830,9 @@ bool IRTranslator::translateAtomicRMW(const User &U,
MIRBuilder.buildAtomicRMW(
Opcode, Res, Addr, Val,
*MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
- Flags, DL->getTypeStoreSize(ResType),
- getMemOpAlign(I), AAMetadata, nullptr,
- I.getSyncScopeID(), I.getOrdering()));
+ Flags, MRI->getType(Val), getMemOpAlign(I),
+ AAMetadata, nullptr, I.getSyncScopeID(),
+ I.getOrdering()));
return true;
}
@@ -2853,13 +2903,6 @@ bool IRTranslator::valueIsSplit(const Value &V,
bool IRTranslator::translate(const Instruction &Inst) {
CurBuilder->setDebugLoc(Inst.getDebugLoc());
- // We only emit constants into the entry block from here. To prevent jumpy
- // debug behaviour set the line to 0.
- if (const DebugLoc &DL = Inst.getDebugLoc())
- EntryBuilder->setDebugLoc(DILocation::get(
- Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt()));
- else
- EntryBuilder->setDebugLoc(DebugLoc());
auto &TLI = *MF->getSubtarget().getTargetLowering();
if (TLI.fallBackToDAGISel(Inst))
@@ -2876,6 +2919,13 @@ bool IRTranslator::translate(const Instruction &Inst) {
}
bool IRTranslator::translate(const Constant &C, Register Reg) {
+ // We only emit constants into the entry block from here. To prevent jumpy
+ // debug behaviour set the line to 0.
+ if (auto CurrInstDL = CurBuilder->getDL())
+ EntryBuilder->setDebugLoc(DILocation::get(C.getContext(), 0, 0,
+ CurrInstDL.getScope(),
+ CurrInstDL.getInlinedAt()));
+
if (auto CI = dyn_cast<ConstantInt>(&C))
EntryBuilder->buildConstant(Reg, *CI);
else if (auto CF = dyn_cast<ConstantFP>(&C))
@@ -2887,14 +2937,15 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
else if (auto GV = dyn_cast<GlobalValue>(&C))
EntryBuilder->buildGlobalValue(Reg, GV);
else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
- if (!CAZ->getType()->isVectorTy())
+ if (!isa<FixedVectorType>(CAZ->getType()))
return false;
// Return the scalar if it is a <1 x Ty> vector.
- if (CAZ->getNumElements() == 1)
+ unsigned NumElts = CAZ->getElementCount().getFixedValue();
+ if (NumElts == 1)
return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get());
SmallVector<Register, 4> Ops;
- for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
- Constant &Elt = *CAZ->getElementValue(i);
+ for (unsigned I = 0; I < NumElts; ++I) {
+ Constant &Elt = *CAZ->getElementValue(I);
Ops.push_back(getOrCreateVReg(Elt));
}
EntryBuilder->buildBuildVector(Reg, Ops);
@@ -2968,8 +3019,13 @@ void IRTranslator::finalizeBasicBlock() {
emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
- // FIXME delete this block below?
if (BTB.ContiguousRange && j + 2 == ej) {
+ // We need to record the replacement phi edge here that normally
+ // happens in emitBitTestCase before we delete the case, otherwise the
+ // phi edge will be lost.
+ addMachineCFGPred({BTB.Parent->getBasicBlock(),
+ BTB.Cases[ej - 1].TargetBB->getBasicBlock()},
+ MBB);
// Since we're not going to use the final bit test, remove it.
BTB.Cases.pop_back();
break;
@@ -3079,7 +3135,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
assert(PendingPHIs.empty() && "stale PHIs");
- if (!DL->isLittleEndian()) {
+ // Targets which want to use big endian can enable it using
+ // enableBigEndian()
+ if (!DL->isLittleEndian() && !CLI->enableBigEndian()) {
// Currently we don't properly handle big endian code.
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
@@ -3121,7 +3179,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Make our arguments/constants entry block fallthrough to the IR entry block.
EntryBB->addSuccessor(&getMBB(F.front()));
- if (CLI->fallBackToDAGISel(F)) {
+ if (CLI->fallBackToDAGISel(*MF)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
R << "unable to lower function: " << ore::NV("Prototype", F.getType());
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 25fae5487187..75a8f03fcb3f 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -11,7 +11,11 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@@ -50,16 +54,29 @@ INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
-InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { }
+InstructionSelect::InstructionSelect(CodeGenOpt::Level OL)
+ : MachineFunctionPass(ID), OptLevel(OL) {}
+
+// In order not to crash when calling getAnalysis during testing with -run-pass
+// we use the default opt level here instead of None, so that the addRequired()
+// calls are made in getAnalysisUsage().
+InstructionSelect::InstructionSelect()
+ : MachineFunctionPass(ID), OptLevel(CodeGenOpt::Default) {}
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
- AU.addRequired<GISelKnownBitsAnalysis>();
- AU.addPreserved<GISelKnownBitsAnalysis>();
+ if (OptLevel != CodeGenOpt::None) {
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ }
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -71,13 +88,26 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
return false;
LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
- GISelKnownBits &KB = getAnalysis<GISelKnownBitsAnalysis>().get(MF);
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+
+ CodeGenOpt::Level OldOptLevel = OptLevel;
+ auto RestoreOptLevel = make_scope_exit([=]() { OptLevel = OldOptLevel; });
+ OptLevel = MF.getFunction().hasOptNone() ? CodeGenOpt::None
+ : MF.getTarget().getOptLevel();
+
+ GISelKnownBits *KB = nullptr;
+ if (OptLevel != CodeGenOpt::None) {
+ KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ if (PSI && PSI->hasProfileSummary())
+ BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
+ }
+
CodeGenCoverage CoverageInfo;
assert(ISel && "Cannot work without InstructionSelector");
- ISel->setupMF(MF, KB, CoverageInfo);
+ ISel->setupMF(MF, KB, CoverageInfo, PSI, BFI);
// An optimization remark emitter. Used to report failures.
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
@@ -102,6 +132,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
#endif
for (MachineBasicBlock *MBB : post_order(&MF)) {
+ ISel->CurMBB = MBB;
if (MBB->empty())
continue;
@@ -133,6 +164,25 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ // Eliminate hints.
+ if (isPreISelGenericOptimizationHint(MI.getOpcode())) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ // At this point, the destination register class of the hint may have
+ // been decided.
+ //
+ // Propagate that through to the source register.
+ const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg);
+ if (DstRC)
+ MRI.setRegClass(SrcReg, DstRC);
+ assert(canReplaceReg(DstReg, SrcReg, MRI) &&
+ "Must be able to replace dst with src!");
+ MI.eraseFromParent();
+ MRI.replaceRegWith(DstReg, SrcReg);
+ continue;
+ }
+
if (!ISel->select(MI)) {
// FIXME: It would be nice to dump all inserted instructions. It's
// not obvious how, esp. considering select() can insert after MI.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
new file mode 100644
index 000000000000..727d33fe4a40
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
@@ -0,0 +1,383 @@
+//===- lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp - Legalizer ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement an interface to specify and query how an illegal operation on a
+// given type should be expanded.
+//
+// Issues to be resolved:
+// + Make it fast.
+// + Support weird types like i3, <7 x i3>, ...
+// + Operations with more than one type (ICMP, CMPXCHG, intrinsics, ...)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include <map>
+
+using namespace llvm;
+using namespace LegacyLegalizeActions;
+
+#define DEBUG_TYPE "legalizer-info"
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, LegacyLegalizeAction Action) {
+ switch (Action) {
+ case Legal:
+ OS << "Legal";
+ break;
+ case NarrowScalar:
+ OS << "NarrowScalar";
+ break;
+ case WidenScalar:
+ OS << "WidenScalar";
+ break;
+ case FewerElements:
+ OS << "FewerElements";
+ break;
+ case MoreElements:
+ OS << "MoreElements";
+ break;
+ case Bitcast:
+ OS << "Bitcast";
+ break;
+ case Lower:
+ OS << "Lower";
+ break;
+ case Libcall:
+ OS << "Libcall";
+ break;
+ case Custom:
+ OS << "Custom";
+ break;
+ case Unsupported:
+ OS << "Unsupported";
+ break;
+ case NotFound:
+ OS << "NotFound";
+ break;
+ }
+ return OS;
+}
+
+LegacyLegalizerInfo::LegacyLegalizerInfo() : TablesInitialized(false) {
+ // Set defaults.
+ // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the
+ // fundamental load/store Jakob proposed. Once loads & stores are supported.
+ setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}});
+
+ setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}});
+
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall);
+ setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}});
+}
+
+void LegacyLegalizerInfo::computeTables() {
+ assert(TablesInitialized == false);
+
+ for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) {
+ const unsigned Opcode = FirstOp + OpcodeIdx;
+ for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size();
+ ++TypeIdx) {
+ // 0. Collect information specified through the setAction API, i.e.
+ // for specific bit sizes.
+ // For scalar types:
+ SizeAndActionsVec ScalarSpecifiedActions;
+ // For pointer types:
+ std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions;
+ // For vector types:
+ std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions;
+ for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) {
+ const LLT Type = LLT2Action.first;
+ const LegacyLegalizeAction Action = LLT2Action.second;
+
+ auto SizeAction = std::make_pair(Type.getSizeInBits(), Action);
+ if (Type.isPointer())
+ AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back(
+ SizeAction);
+ else if (Type.isVector())
+ ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()]
+ .push_back(SizeAction);
+ else
+ ScalarSpecifiedActions.push_back(SizeAction);
+ }
+
+ // 1. Handle scalar types
+ {
+ // Decide how to handle bit sizes for which no explicit specification
+ // was given.
+ SizeChangeStrategy S = &unsupportedForDifferentSizes;
+ if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() &&
+ ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
+ S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx];
+ llvm::sort(ScalarSpecifiedActions);
+ checkPartialSizeAndActionsVector(ScalarSpecifiedActions);
+ setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions));
+ }
+
+ // 2. Handle pointer types
+ for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) {
+ llvm::sort(PointerSpecifiedActions.second);
+ checkPartialSizeAndActionsVector(PointerSpecifiedActions.second);
+ // For pointer types, we assume that there isn't a meaningfull way
+ // to change the number of bits used in the pointer.
+ setPointerAction(
+ Opcode, TypeIdx, PointerSpecifiedActions.first,
+ unsupportedForDifferentSizes(PointerSpecifiedActions.second));
+ }
+
+ // 3. Handle vector types
+ SizeAndActionsVec ElementSizesSeen;
+ for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) {
+ llvm::sort(VectorSpecifiedActions.second);
+ const uint16_t ElementSize = VectorSpecifiedActions.first;
+ ElementSizesSeen.push_back({ElementSize, Legal});
+ checkPartialSizeAndActionsVector(VectorSpecifiedActions.second);
+ // For vector types, we assume that the best way to adapt the number
+ // of elements is to the next larger number of elements type for which
+ // the vector type is legal, unless there is no such type. In that case,
+ // legalize towards a vector type with a smaller number of elements.
+ SizeAndActionsVec NumElementsActions;
+ for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) {
+ assert(BitsizeAndAction.first % ElementSize == 0);
+ const uint16_t NumElements = BitsizeAndAction.first / ElementSize;
+ NumElementsActions.push_back({NumElements, BitsizeAndAction.second});
+ }
+ setVectorNumElementAction(
+ Opcode, TypeIdx, ElementSize,
+ moreToWiderTypesAndLessToWidest(NumElementsActions));
+ }
+ llvm::sort(ElementSizesSeen);
+ SizeChangeStrategy VectorElementSizeChangeStrategy =
+ &unsupportedForDifferentSizes;
+ if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() &&
+ VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
+ VectorElementSizeChangeStrategy =
+ VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx];
+ setScalarInVectorAction(
+ Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen));
+ }
+ }
+
+ TablesInitialized = true;
+}
+
+// FIXME: inefficient implementation for now. Without ComputeValueVTs we're
+// probably going to need specialized lookup structures for various types before
+// we have any hope of doing well with something like <13 x i3>. Even the common
+// cases should do better than what we have now.
+std::pair<LegacyLegalizeAction, LLT>
+LegacyLegalizerInfo::getAspectAction(const InstrAspect &Aspect) const {
+ assert(TablesInitialized && "backend forgot to call computeTables");
+ // These *have* to be implemented for now, they're the fundamental basis of
+ // how everything else is transformed.
+ if (Aspect.Type.isScalar() || Aspect.Type.isPointer())
+ return findScalarLegalAction(Aspect);
+ assert(Aspect.Type.isVector());
+ return findVectorLegalAction(Aspect);
+}
+
+LegacyLegalizerInfo::SizeAndActionsVec
+LegacyLegalizerInfo::increaseToLargerTypesAndDecreaseToLargest(
+ const SizeAndActionsVec &v, LegacyLegalizeAction IncreaseAction,
+ LegacyLegalizeAction DecreaseAction) {
+ SizeAndActionsVec result;
+ unsigned LargestSizeSoFar = 0;
+ if (v.size() >= 1 && v[0].first != 1)
+ result.push_back({1, IncreaseAction});
+ for (size_t i = 0; i < v.size(); ++i) {
+ result.push_back(v[i]);
+ LargestSizeSoFar = v[i].first;
+ if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) {
+ result.push_back({LargestSizeSoFar + 1, IncreaseAction});
+ LargestSizeSoFar = v[i].first + 1;
+ }
+ }
+ result.push_back({LargestSizeSoFar + 1, DecreaseAction});
+ return result;
+}
+
+LegacyLegalizerInfo::SizeAndActionsVec
+LegacyLegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest(
+ const SizeAndActionsVec &v, LegacyLegalizeAction DecreaseAction,
+ LegacyLegalizeAction IncreaseAction) {
+ SizeAndActionsVec result;
+ if (v.size() == 0 || v[0].first != 1)
+ result.push_back({1, IncreaseAction});
+ for (size_t i = 0; i < v.size(); ++i) {
+ result.push_back(v[i]);
+ if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) {
+ result.push_back({v[i].first + 1, DecreaseAction});
+ }
+ }
+ return result;
+}
+
+LegacyLegalizerInfo::SizeAndAction
+LegacyLegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {
+ assert(Size >= 1);
+ // Find the last element in Vec that has a bitsize equal to or smaller than
+ // the requested bit size.
+ // That is the element just before the first element that is bigger than Size.
+ auto It = partition_point(
+ Vec, [=](const SizeAndAction &A) { return A.first <= Size; });
+ assert(It != Vec.begin() && "Does Vec not start with size 1?");
+ int VecIdx = It - Vec.begin() - 1;
+
+ LegacyLegalizeAction Action = Vec[VecIdx].second;
+ switch (Action) {
+ case Legal:
+ case Bitcast:
+ case Lower:
+ case Libcall:
+ case Custom:
+ return {Size, Action};
+ case FewerElements:
+ // FIXME: is this special case still needed and correct?
+ // Special case for scalarization:
+ if (Vec == SizeAndActionsVec({{1, FewerElements}}))
+ return {1, FewerElements};
+ LLVM_FALLTHROUGH;
+ case NarrowScalar: {
+ // The following needs to be a loop, as for now, we do allow needing to
+ // go over "Unsupported" bit sizes before finding a legalizable bit size.
+ // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8,
+ // we need to iterate over s9, and then to s32 to return (s32, Legal).
+ // If we want to get rid of the below loop, we should have stronger asserts
+ // when building the SizeAndActionsVecs, probably not allowing
+ // "Unsupported" unless at the ends of the vector.
+ for (int i = VecIdx - 1; i >= 0; --i)
+ if (!needsLegalizingToDifferentSize(Vec[i].second) &&
+ Vec[i].second != Unsupported)
+ return {Vec[i].first, Action};
+ llvm_unreachable("");
+ }
+ case WidenScalar:
+ case MoreElements: {
+ // See above, the following needs to be a loop, at least for now.
+ for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i)
+ if (!needsLegalizingToDifferentSize(Vec[i].second) &&
+ Vec[i].second != Unsupported)
+ return {Vec[i].first, Action};
+ llvm_unreachable("");
+ }
+ case Unsupported:
+ return {Size, Unsupported};
+ case NotFound:
+ llvm_unreachable("NotFound");
+ }
+ llvm_unreachable("Action has an unknown enum value");
+}
+
+std::pair<LegacyLegalizeAction, LLT>
+LegacyLegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const {
+ assert(Aspect.Type.isScalar() || Aspect.Type.isPointer());
+ if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
+ return {NotFound, LLT()};
+ const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode);
+ if (Aspect.Type.isPointer() &&
+ AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) ==
+ AddrSpace2PointerActions[OpcodeIdx].end()) {
+ return {NotFound, LLT()};
+ }
+ const SmallVector<SizeAndActionsVec, 1> &Actions =
+ Aspect.Type.isPointer()
+ ? AddrSpace2PointerActions[OpcodeIdx]
+ .find(Aspect.Type.getAddressSpace())
+ ->second
+ : ScalarActions[OpcodeIdx];
+ if (Aspect.Idx >= Actions.size())
+ return {NotFound, LLT()};
+ const SizeAndActionsVec &Vec = Actions[Aspect.Idx];
+ // FIXME: speed up this search, e.g. by using a results cache for repeated
+ // queries?
+ auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits());
+ return {SizeAndAction.second,
+ Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first)
+ : LLT::pointer(Aspect.Type.getAddressSpace(),
+ SizeAndAction.first)};
+}
+
+std::pair<LegacyLegalizeAction, LLT>
+LegacyLegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
+ assert(Aspect.Type.isVector());
+ // First legalize the vector element size, then legalize the number of
+ // lanes in the vector.
+ if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
+ return {NotFound, Aspect.Type};
+ const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode);
+ const unsigned TypeIdx = Aspect.Idx;
+ if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size())
+ return {NotFound, Aspect.Type};
+ const SizeAndActionsVec &ElemSizeVec =
+ ScalarInVectorActions[OpcodeIdx][TypeIdx];
+
+ LLT IntermediateType;
+ auto ElementSizeAndAction =
+ findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits());
+ IntermediateType = LLT::fixed_vector(Aspect.Type.getNumElements(),
+ ElementSizeAndAction.first);
+ if (ElementSizeAndAction.second != Legal)
+ return {ElementSizeAndAction.second, IntermediateType};
+
+ auto i = NumElements2Actions[OpcodeIdx].find(
+ IntermediateType.getScalarSizeInBits());
+ if (i == NumElements2Actions[OpcodeIdx].end()) {
+ return {NotFound, IntermediateType};
+ }
+ const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx];
+ auto NumElementsAndAction =
+ findAction(NumElementsVec, IntermediateType.getNumElements());
+ return {NumElementsAndAction.second,
+ LLT::fixed_vector(NumElementsAndAction.first,
+ IntermediateType.getScalarSizeInBits())};
+}
+
+unsigned LegacyLegalizerInfo::getOpcodeIdxForOpcode(unsigned Opcode) const {
+ assert(Opcode >= FirstOp && Opcode <= LastOp && "Unsupported opcode");
+ return Opcode - FirstOp;
+}
+
+
+LegacyLegalizeActionStep
+LegacyLegalizerInfo::getAction(const LegalityQuery &Query) const {
+ for (unsigned i = 0; i < Query.Types.size(); ++i) {
+ auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]});
+ if (Action.first != Legal) {
+ LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Action="
+ << Action.first << ", " << Action.second << "\n");
+ return {Action.first, i, Action.second};
+ } else
+ LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n");
+ }
+ LLVM_DEBUG(dbgs() << ".. (legacy) Legal\n");
+ return {Legal, 0, LLT{}};
+}
+
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 1993f6033291..7c5e4e52ca3e 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -55,7 +55,7 @@ LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
SmallVector<TypePairAndMemDesc, 4> TypesAndMemDesc = TypesAndMemDescInit;
return [=](const LegalityQuery &Query) {
TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
- Query.MMODescrs[MMOIdx].SizeInBits,
+ Query.MMODescrs[MMOIdx].MemoryTy,
Query.MMODescrs[MMOIdx].AlignInBits};
return llvm::any_of(TypesAndMemDesc,
[=](const TypePairAndMemDesc &Entry) -> bool {
@@ -176,7 +176,7 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
- return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8);
+ return !isPowerOf2_32(Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
};
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index f3ba3f080198..fc2570ae4b8e 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -69,8 +69,8 @@ LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx,
const LLT VecTy = Query.Types[TypeIdx];
unsigned NewNumElements =
std::max(1u << Log2_32_Ceil(VecTy.getNumElements()), Min);
- return std::make_pair(TypeIdx,
- LLT::vector(NewNumElements, VecTy.getElementType()));
+ return std::make_pair(
+ TypeIdx, LLT::fixed_vector(NewNumElements, VecTy.getElementType()));
};
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 5ba9367cac8a..635b1445ee07 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -43,6 +43,13 @@ static cl::opt<bool>
cl::desc("Should enable CSE in Legalizer"),
cl::Optional, cl::init(false));
+// This is a temporary hack, should be removed soon.
+static cl::opt<bool> AllowGInsertAsArtifact(
+ "allow-ginsert-as-artifact",
+ cl::desc("Allow G_INSERT to be considered an artifact. Hack around AMDGPU "
+ "test infinite loops."),
+ cl::Optional, cl::init(true));
+
enum class DebugLocVerifyLevel {
None,
Legalizations,
@@ -103,6 +110,8 @@ static bool isArtifact(const MachineInstr &MI) {
case TargetOpcode::G_BUILD_VECTOR:
case TargetOpcode::G_EXTRACT:
return true;
+ case TargetOpcode::G_INSERT:
+ return AllowGInsertAsArtifact;
}
}
using InstListTy = GISelWorkList<256>;
@@ -230,7 +239,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
}
// Do the legalization for this instruction.
- auto Res = Helper.legalizeInstrStep(MI);
+ auto Res = Helper.legalizeInstrStep(MI, LocObserver);
// Error out if we couldn't legalize this instruction. We may want to
// fall back to DAG ISel instead in the future.
if (Res == LegalizerHelper::UnableToLegalize) {
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e7f40523efaf..c1e0d2549c42 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -16,12 +16,16 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -56,7 +60,8 @@ getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
unsigned EltSize = OrigTy.getScalarSizeInBits();
if (LeftoverSize % EltSize != 0)
return {-1, -1};
- LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
+ LeftoverTy = LLT::scalarOrVector(
+ ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
} else {
LeftoverTy = LLT::scalar(LeftoverSize);
}
@@ -100,7 +105,8 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
TLI(*MF.getSubtarget().getTargetLowering()) { }
LegalizerHelper::LegalizeResult
-LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
+LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
+ LostDebugLocObserver &LocObserver) {
LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
MIRBuilder.setInstrAndDebugLoc(MI);
@@ -115,7 +121,7 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
return AlreadyLegal;
case Libcall:
LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
- return libcall(MI);
+ return libcall(MI, LocObserver);
case NarrowScalar:
LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
return narrowScalar(MI, Step.TypeIdx, Step.NewType);
@@ -173,7 +179,8 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
unsigned EltSize = MainTy.getScalarSizeInBits();
if (LeftoverSize % EltSize != 0)
return false;
- LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
+ LeftoverTy = LLT::scalarOrVector(
+ ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
} else {
LeftoverTy = LLT::scalar(LeftoverSize);
}
@@ -215,29 +222,12 @@ void LegalizerHelper::insertParts(Register DstReg,
return;
}
- unsigned PartSize = PartTy.getSizeInBits();
- unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
-
- Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
- MIRBuilder.buildUndef(CurResultReg);
-
- unsigned Offset = 0;
- for (Register PartReg : PartRegs) {
- Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
- MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
- CurResultReg = NewResultReg;
- Offset += PartSize;
- }
-
- for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
- // Use the original output register for the final insert to avoid a copy.
- Register NewResultReg = (I + 1 == E) ?
- DstReg : MRI.createGenericVirtualRegister(ResultTy);
-
- MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
- CurResultReg = NewResultReg;
- Offset += LeftoverPartSize;
- }
+ SmallVector<Register> GCDRegs;
+ LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
+ for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
+ extractGCDType(GCDRegs, GCDTy, PartReg);
+ LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
+ buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
}
/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
@@ -490,8 +480,9 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
/// True if an instruction is in tail position in its caller. Intended for
/// legalizing libcalls as tail calls when possible.
-static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
- MachineInstr &MI) {
+static bool isLibCallInTailPosition(MachineInstr &MI,
+ const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI) {
MachineBasicBlock &MBB = *MI.getParent();
const Function &F = MBB.getParent()->getFunction();
@@ -510,8 +501,47 @@ static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
return false;
- // Only tail call if the following instruction is a standard return.
+ // Only tail call if the following instruction is a standard return or if we
+ // have a `thisreturn` callee, and a sequence like:
+ //
+ // G_MEMCPY %0, %1, %2
+ // $x0 = COPY %0
+ // RET_ReallyLR implicit $x0
auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
+ if (Next != MBB.instr_end() && Next->isCopy()) {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unsupported opcode");
+ case TargetOpcode::G_BZERO:
+ return false;
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET:
+ break;
+ }
+
+ Register VReg = MI.getOperand(0).getReg();
+ if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
+ return false;
+
+ Register PReg = Next->getOperand(0).getReg();
+ if (!PReg.isPhysical())
+ return false;
+
+ auto Ret = next_nodbg(Next, MBB.instr_end());
+ if (Ret == MBB.instr_end() || !Ret->isReturn())
+ return false;
+
+ if (Ret->getNumImplicitOperands() != 1)
+ return false;
+
+ if (PReg != Ret->getOperand(0).getReg())
+ return false;
+
+ // Skip over the COPY that we just validated.
+ Next = Ret;
+ }
+
if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
return false;
@@ -552,16 +582,17 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
Type *OpType) {
auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
+ // FIXME: What does the original arg index mean here?
SmallVector<CallLowering::ArgInfo, 3> Args;
for (unsigned i = 1; i < MI.getNumOperands(); i++)
- Args.push_back({MI.getOperand(i).getReg(), OpType});
- return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
- Args);
+ Args.push_back({MI.getOperand(i).getReg(), OpType, 0});
+ return createLibcall(MIRBuilder, Libcall,
+ {MI.getOperand(0).getReg(), OpType, 0}, Args);
}
LegalizerHelper::LegalizeResult
llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstr &MI) {
+ MachineInstr &MI, LostDebugLocObserver &LocObserver) {
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
SmallVector<CallLowering::ArgInfo, 3> Args;
@@ -576,33 +607,47 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
else
OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
- Args.push_back({Reg, OpTy});
+ Args.push_back({Reg, OpTy, 0});
}
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
RTLIB::Libcall RTLibcall;
- switch (MI.getOpcode()) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case TargetOpcode::G_BZERO:
+ RTLibcall = RTLIB::BZERO;
+ break;
case TargetOpcode::G_MEMCPY:
RTLibcall = RTLIB::MEMCPY;
+ Args[0].Flags[0].setReturned();
break;
case TargetOpcode::G_MEMMOVE:
RTLibcall = RTLIB::MEMMOVE;
+ Args[0].Flags[0].setReturned();
break;
case TargetOpcode::G_MEMSET:
RTLibcall = RTLIB::MEMSET;
+ Args[0].Flags[0].setReturned();
break;
default:
- return LegalizerHelper::UnableToLegalize;
+ llvm_unreachable("unsupported opcode");
}
const char *Name = TLI.getLibcallName(RTLibcall);
+ // Unsupported libcall on the target.
+ if (!Name) {
+ LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
+ << MIRBuilder.getTII().getName(Opc) << "\n");
+ return LegalizerHelper::UnableToLegalize;
+ }
+
CallLowering::CallLoweringInfo Info;
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
Info.Callee = MachineOperand::CreateES(Name);
- Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
+ Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
- isLibCallInTailPosition(MIRBuilder.getTII(), MI);
+ isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
@@ -610,16 +655,24 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
if (Info.LoweredTailCall) {
assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
+
+ // Check debug locations before removing the return.
+ LocObserver.checkpoint(true);
+
// We must have a return following the call (or debug insts) to get past
// isLibCallInTailPosition.
do {
MachineInstr *Next = MI.getNextNode();
- assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
+ assert(Next &&
+ (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
"Expected instr following MI to be return or debug inst?");
// We lowered a tail call, so the call is now the return from the block.
// Delete the old return.
Next->eraseFromParent();
} while (MI.getNextNode());
+
+ // We expect to lose the debug location from the return.
+ LocObserver.checkpoint(false);
}
return LegalizerHelper::Legalized;
@@ -651,12 +704,13 @@ static LegalizerHelper::LegalizeResult
conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
Type *FromType) {
RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
- return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
- {{MI.getOperand(1).getReg(), FromType}});
+ return createLibcall(MIRBuilder, Libcall,
+ {MI.getOperand(0).getReg(), ToType, 0},
+ {{MI.getOperand(1).getReg(), FromType, 0}});
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::libcall(MachineInstr &MI) {
+LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = LLTy.getSizeInBits();
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
@@ -748,10 +802,14 @@ LegalizerHelper::libcall(MachineInstr &MI) {
return Status;
break;
}
+ case TargetOpcode::G_BZERO:
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET: {
- LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
+ LegalizeResult Result =
+ createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
+ if (Result != Legalized)
+ return Result;
MI.eraseFromParent();
return Result;
}
@@ -783,7 +841,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (SizeOp0 % NarrowSize != 0) {
LLT ImplicitTy = NarrowTy;
if (DstTy.isVector())
- ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
+ ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
@@ -859,74 +917,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
case TargetOpcode::G_FREEZE:
return reduceOperationWidth(MI, TypeIdx, NarrowTy);
-
- case TargetOpcode::G_ADD: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
- // Expand in terms of carry-setting/consuming G_ADDE instructions.
- int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
-
- SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
- extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
-
- Register CarryIn;
- for (int i = 0; i < NumParts; ++i) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
-
- if (i == 0)
- MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
- else {
- MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
- Src2Regs[i], CarryIn);
- }
-
- DstRegs.push_back(DstReg);
- CarryIn = CarryOut;
- }
- Register DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_SUB: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
-
- int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
-
- SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
- extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
-
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
- MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
- {Src1Regs[0], Src2Regs[0]});
- DstRegs.push_back(DstReg);
- Register BorrowIn = BorrowOut;
- for (int i = 1; i < NumParts; ++i) {
- DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
-
- MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
- {Src1Regs[i], Src2Regs[i], BorrowIn});
-
- DstRegs.push_back(DstReg);
- BorrowIn = BorrowOut;
- }
- MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
- MI.eraseFromParent();
- return Legalized;
- }
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_USUBE:
+ return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_MUL:
case TargetOpcode::G_UMULH:
return narrowScalarMul(MI, NarrowTy);
@@ -935,53 +936,53 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
case TargetOpcode::G_INSERT:
return narrowScalarInsert(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_LOAD: {
- auto &MMO = **MI.memoperands_begin();
- Register DstReg = MI.getOperand(0).getReg();
+ auto &LoadMI = cast<GLoad>(MI);
+ Register DstReg = LoadMI.getDstReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
return UnableToLegalize;
- if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
+ if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
+ MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
MIRBuilder.buildAnyExt(DstReg, TmpReg);
- MI.eraseFromParent();
+ LoadMI.eraseFromParent();
return Legalized;
}
- return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
+ return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
}
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_SEXTLOAD: {
- bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
- Register DstReg = MI.getOperand(0).getReg();
- Register PtrReg = MI.getOperand(1).getReg();
+ auto &LoadMI = cast<GExtLoad>(MI);
+ Register DstReg = LoadMI.getDstReg();
+ Register PtrReg = LoadMI.getPointerReg();
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- auto &MMO = **MI.memoperands_begin();
+ auto &MMO = LoadMI.getMMO();
unsigned MemSize = MMO.getSizeInBits();
if (MemSize == NarrowSize) {
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
} else if (MemSize < NarrowSize) {
- MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
+ MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
} else if (MemSize > NarrowSize) {
// FIXME: Need to split the load.
return UnableToLegalize;
}
- if (ZExt)
+ if (isa<GZExtLoad>(LoadMI))
MIRBuilder.buildZExt(DstReg, TmpReg);
else
MIRBuilder.buildSExt(DstReg, TmpReg);
- MI.eraseFromParent();
+ LoadMI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_STORE: {
- const auto &MMO = **MI.memoperands_begin();
+ auto &StoreMI = cast<GStore>(MI);
- Register SrcReg = MI.getOperand(0).getReg();
+ Register SrcReg = StoreMI.getValueReg();
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
return UnableToLegalize;
@@ -992,16 +993,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (SrcTy.isVector() && LeftoverBits != 0)
return UnableToLegalize;
- if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
+ if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- auto &MMO = **MI.memoperands_begin();
MIRBuilder.buildTrunc(TmpReg, SrcReg);
- MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
- MI.eraseFromParent();
+ MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
+ StoreMI.eraseFromParent();
return Legalized;
}
- return reduceLoadStoreWidth(MI, 0, NarrowTy);
+ return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
}
case TargetOpcode::G_SELECT:
return narrowScalarSelect(MI, TypeIdx, NarrowTy);
@@ -1063,6 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PHI: {
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+
unsigned NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs(NumParts);
SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
@@ -1100,38 +1105,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
}
case TargetOpcode::G_ICMP: {
- uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
- if (NarrowSize * 2 != SrcSize)
+ Register LHS = MI.getOperand(2).getReg();
+ LLT SrcTy = MRI.getType(LHS);
+ uint64_t SrcSize = SrcTy.getSizeInBits();
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+ // TODO: Handle the non-equality case for weird sizes.
+ if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
return UnableToLegalize;
- Observer.changingInstr(MI);
- Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
- Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
+ LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
+ SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
+ if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
+ LHSLeftoverRegs))
+ return UnableToLegalize;
- Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
- Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
+ LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
+ SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
+ if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
+ RHSPartRegs, RHSLeftoverRegs))
+ return UnableToLegalize;
- CmpInst::Predicate Pred =
- static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
- LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
-
- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
- MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
- MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
- MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
- MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
- MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
+ // We now have the LHS and RHS of the compare split into narrow-type
+ // registers, plus potentially some leftover type.
+ Register Dst = MI.getOperand(0).getReg();
+ LLT ResTy = MRI.getType(Dst);
+ if (ICmpInst::isEquality(Pred)) {
+ // For each part on the LHS and RHS, keep track of the result of XOR-ing
+ // them together. For each equal part, the result should be all 0s. For
+ // each non-equal part, we'll get at least one 1.
+ auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
+ SmallVector<Register, 4> Xors;
+ for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
+ auto LHS = std::get<0>(LHSAndRHS);
+ auto RHS = std::get<1>(LHSAndRHS);
+ auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
+ Xors.push_back(Xor);
+ }
+
+ // Build a G_XOR for each leftover register. Each G_XOR must be widened
+ // to the desired narrow type so that we can OR them together later.
+ SmallVector<Register, 4> WidenedXors;
+ for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
+ auto LHS = std::get<0>(LHSAndRHS);
+ auto RHS = std::get<1>(LHSAndRHS);
+ auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
+ LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
+ buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
+ /* PadStrategy = */ TargetOpcode::G_ZEXT);
+ Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
+ }
+
+ // Now, for each part we broke up, we know if they are equal/not equal
+ // based off the G_XOR. We can OR these all together and compare against
+ // 0 to get the result.
+ assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
+ auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
+ for (unsigned I = 2, E = Xors.size(); I < E; ++I)
+ Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
+ MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
} else {
+ // TODO: Handle non-power-of-two types.
+ assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
+ assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
+ Register LHSL = LHSPartRegs[0];
+ Register LHSH = LHSPartRegs[1];
+ Register RHSL = RHSPartRegs[0];
+ Register RHSH = RHSPartRegs[1];
MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpHEQ =
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
- MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
+ MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
}
- Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
@@ -1252,22 +1300,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changedInstr(MI);
return Legalized;
}
- case TargetOpcode::G_FPTOUI: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_FPTOSI: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FPTOSI:
+ return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_FPEXT:
if (TypeIdx != 0)
return UnableToLegalize;
@@ -1758,22 +1793,68 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
+LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
if (TypeIdx == 1)
return UnableToLegalize; // TODO
- unsigned Op = MI.getOpcode();
- unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO
- ? TargetOpcode::G_ADD
- : TargetOpcode::G_SUB;
- unsigned ExtOpcode =
- Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO
- ? TargetOpcode::G_ZEXT
- : TargetOpcode::G_SEXT;
+
+ unsigned Opcode;
+ unsigned ExtOpcode;
+ Optional<Register> CarryIn = None;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_SADDO:
+ Opcode = TargetOpcode::G_ADD;
+ ExtOpcode = TargetOpcode::G_SEXT;
+ break;
+ case TargetOpcode::G_SSUBO:
+ Opcode = TargetOpcode::G_SUB;
+ ExtOpcode = TargetOpcode::G_SEXT;
+ break;
+ case TargetOpcode::G_UADDO:
+ Opcode = TargetOpcode::G_ADD;
+ ExtOpcode = TargetOpcode::G_ZEXT;
+ break;
+ case TargetOpcode::G_USUBO:
+ Opcode = TargetOpcode::G_SUB;
+ ExtOpcode = TargetOpcode::G_ZEXT;
+ break;
+ case TargetOpcode::G_SADDE:
+ Opcode = TargetOpcode::G_UADDE;
+ ExtOpcode = TargetOpcode::G_SEXT;
+ CarryIn = MI.getOperand(4).getReg();
+ break;
+ case TargetOpcode::G_SSUBE:
+ Opcode = TargetOpcode::G_USUBE;
+ ExtOpcode = TargetOpcode::G_SEXT;
+ CarryIn = MI.getOperand(4).getReg();
+ break;
+ case TargetOpcode::G_UADDE:
+ Opcode = TargetOpcode::G_UADDE;
+ ExtOpcode = TargetOpcode::G_ZEXT;
+ CarryIn = MI.getOperand(4).getReg();
+ break;
+ case TargetOpcode::G_USUBE:
+ Opcode = TargetOpcode::G_USUBE;
+ ExtOpcode = TargetOpcode::G_ZEXT;
+ CarryIn = MI.getOperand(4).getReg();
+ break;
+ }
+
auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
// Do the arithmetic in the larger type.
- auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt});
+ Register NewOp;
+ if (CarryIn) {
+ LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
+ NewOp = MIRBuilder
+ .buildInstr(Opcode, {WideTy, CarryOutTy},
+ {LHSExt, RHSExt, *CarryIn})
+ .getReg(0);
+ } else {
+ NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
+ }
LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
@@ -1830,10 +1911,105 @@ LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx == 1)
+ return UnableToLegalize;
+
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
+ Register Result = MI.getOperand(0).getReg();
+ Register OriginalOverflow = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+ LLT SrcTy = MRI.getType(LHS);
+ LLT OverflowTy = MRI.getType(OriginalOverflow);
+ unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
+
+ // To determine if the result overflowed in the larger type, we extend the
+ // input to the larger type, do the multiply (checking if it overflows),
+ // then also check the high bits of the result to see if overflow happened
+ // there.
+ unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+ auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
+ auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
+
+ auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
+ {LeftOperand, RightOperand});
+ auto Mul = Mulo->getOperand(0);
+ MIRBuilder.buildTrunc(Result, Mul);
+
+ MachineInstrBuilder ExtResult;
+ // Overflow occurred if it occurred in the larger type, or if the high part
+ // of the result does not zero/sign-extend the low part. Check this second
+ // possibility first.
+ if (IsSigned) {
+ // For signed, overflow occurred when the high part does not sign-extend
+ // the low part.
+ ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
+ } else {
+ // Unsigned overflow occurred when the high part does not zero-extend the
+ // low part.
+ ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
+ }
+
+ // Multiplication cannot overflow if the WideTy is >= 2 * original width,
+ // so we don't need to check the overflow result of larger type Mulo.
+ if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
+ auto Overflow =
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
+ // Finally check if the multiplication in the larger type itself overflowed.
+ MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
+ } else {
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
+ }
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_ATOMICRMW_XCHG:
+ case TargetOpcode::G_ATOMICRMW_ADD:
+ case TargetOpcode::G_ATOMICRMW_SUB:
+ case TargetOpcode::G_ATOMICRMW_AND:
+ case TargetOpcode::G_ATOMICRMW_OR:
+ case TargetOpcode::G_ATOMICRMW_XOR:
+ case TargetOpcode::G_ATOMICRMW_MIN:
+ case TargetOpcode::G_ATOMICRMW_MAX:
+ case TargetOpcode::G_ATOMICRMW_UMIN:
+ case TargetOpcode::G_ATOMICRMW_UMAX:
+ assert(TypeIdx == 0 && "atomicrmw with second scalar type");
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
+ if (TypeIdx == 0) {
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ assert(TypeIdx == 1 &&
+ "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
case TargetOpcode::G_EXTRACT:
return widenScalarExtract(MI, TypeIdx, WideTy);
case TargetOpcode::G_INSERT:
@@ -1846,7 +2022,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_SSUBO:
case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO:
- return widenScalarAddoSubo(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_USUBE:
+ return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO:
+ return widenScalarMulo(MI, TypeIdx, WideTy);
case TargetOpcode::G_SADDSAT:
case TargetOpcode::G_SSUBSAT:
case TargetOpcode::G_SSHLSAT:
@@ -1943,6 +2126,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_ABS:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
@@ -1959,6 +2149,21 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_SBFX:
+ case TargetOpcode::G_UBFX:
+ Observer.changingInstr(MI);
+
+ if (TypeIdx == 0) {
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ } else {
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
+ }
+
+ Observer.changedInstr(MI);
+ return Legalized;
+
case TargetOpcode::G_SHL:
Observer.changingInstr(MI);
@@ -1986,6 +2191,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_SDIVREM:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+ widenScalarDst(MI, WideTy);
+ widenScalarDst(MI, WideTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
Observer.changingInstr(MI);
@@ -2016,6 +2230,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_UDIVREM:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
+ widenScalarDst(MI, WideTy);
+ widenScalarDst(MI, WideTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+
case TargetOpcode::G_SELECT:
Observer.changingInstr(MI);
if (TypeIdx == 0) {
@@ -2202,9 +2425,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
LLT VecTy = MRI.getType(VecReg);
Observer.changingInstr(MI);
- widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
- WideTy.getSizeInBits()),
- 1, TargetOpcode::G_SEXT);
+ widenScalarSrc(
+ MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
+ TargetOpcode::G_SEXT);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
@@ -2225,7 +2448,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Register VecReg = MI.getOperand(1).getReg();
LLT VecTy = MRI.getType(VecReg);
- LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
+ LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
@@ -2385,7 +2608,7 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) {
// %3:_(<2 x s8>) = G_BITCAST %2
// %4:_(<2 x s8>) = G_BITCAST %3
// %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
- DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
+ DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
SrcPartTy = SrcEltTy;
} else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
//
@@ -2397,7 +2620,7 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) {
// %3:_(s16) = G_BITCAST %2
// %4:_(s16) = G_BITCAST %3
// %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
- SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
+ SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
DstCastTy = DstEltTy;
}
@@ -2488,7 +2711,8 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
// Type of the intermediate result vector.
const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
- LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
+ LLT MidTy =
+ LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
@@ -2654,125 +2878,168 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
return UnableToLegalize;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerLoad(MachineInstr &MI) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
// Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
- Register DstReg = MI.getOperand(0).getReg();
- Register PtrReg = MI.getOperand(1).getReg();
+ Register DstReg = LoadMI.getDstReg();
+ Register PtrReg = LoadMI.getPointerReg();
LLT DstTy = MRI.getType(DstReg);
- auto &MMO = **MI.memoperands_begin();
+ MachineMemOperand &MMO = LoadMI.getMMO();
+ LLT MemTy = MMO.getMemoryType();
+ MachineFunction &MF = MIRBuilder.getMF();
+ if (MemTy.isVector())
+ return UnableToLegalize;
- if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
- if (MI.getOpcode() == TargetOpcode::G_LOAD) {
- // This load needs splitting into power of 2 sized loads.
- if (DstTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(DstTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
- // Our strategy here is to generate anyextending loads for the smaller
- // types up to next power-2 result type, and then combine the two larger
- // result values together, before truncating back down to the non-pow-2
- // type.
- // E.g. v1 = i24 load =>
- // v2 = i32 zextload (2 byte)
- // v3 = i32 load (1 byte)
- // v4 = i32 shl v3, 16
- // v5 = i32 or v4, v2
- // v1 = i24 trunc v5
- // By doing this we generate the correct truncate which should get
- // combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
- uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
-
- MachineFunction &MF = MIRBuilder.getMF();
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
- &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
-
- LLT PtrTy = MRI.getType(PtrReg);
- unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
- LLT AnyExtTy = LLT::scalar(AnyExtSize);
- Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
- Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
- auto LargeLoad = MIRBuilder.buildLoadInstr(
- TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
-
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
- auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
- *SmallMMO);
-
- auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
- auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
- auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
- MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
- MI.eraseFromParent();
- return Legalized;
+ unsigned MemSizeInBits = MemTy.getSizeInBits();
+ unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
+
+ if (MemSizeInBits != MemStoreSizeInBits) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
+ MachineMemOperand *NewMMO =
+ MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
+
+ Register LoadReg = DstReg;
+ LLT LoadTy = DstTy;
+
+ // If this wasn't already an extending load, we need to widen the result
+ // register to avoid creating a load with a narrower result than the source.
+ if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
+ LoadTy = WideMemTy;
+ LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
}
- MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
- MI.eraseFromParent();
+ if (isa<GSExtLoad>(LoadMI)) {
+ auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
+ MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
+ } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) {
+ auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from Wide thus automatically gives zext from MemVT.
+ MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
+ } else {
+ MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
+ }
+
+ if (DstTy != LoadTy)
+ MIRBuilder.buildTrunc(DstReg, LoadReg);
+
+ LoadMI.eraseFromParent();
return Legalized;
}
- if (DstTy.isScalar()) {
- Register TmpReg =
- MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
- MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected opcode");
- case TargetOpcode::G_LOAD:
- MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
- break;
- case TargetOpcode::G_SEXTLOAD:
- MIRBuilder.buildSExt(DstReg, TmpReg);
- break;
- case TargetOpcode::G_ZEXTLOAD:
- MIRBuilder.buildZExt(DstReg, TmpReg);
- break;
- }
+ // This load needs splitting into power of 2 sized loads.
+ if (DstTy.isVector())
+ return UnableToLegalize;
+ if (isPowerOf2_32(MemSizeInBits))
+ return UnableToLegalize; // Don't know what we're being asked to do.
- MI.eraseFromParent();
- return Legalized;
+ // Big endian lowering not implemented.
+ if (MIRBuilder.getDataLayout().isBigEndian())
+ return UnableToLegalize;
+
+ // Our strategy here is to generate anyextending loads for the smaller
+ // types up to next power-2 result type, and then combine the two larger
+ // result values together, before truncating back down to the non-pow-2
+ // type.
+ // E.g. v1 = i24 load =>
+ // v2 = i32 zextload (2 byte)
+ // v3 = i32 load (1 byte)
+ // v4 = i32 shl v3, 16
+ // v5 = i32 or v4, v2
+ // v1 = i24 trunc v5
+ // By doing this we generate the correct truncate which should get
+ // combined away as an artifact with a matching extend.
+ uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
+
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO =
+ MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
+ LLT AnyExtTy = LLT::scalar(AnyExtSize);
+ auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
+ PtrReg, *LargeMMO);
+
+ auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
+ LargeSplitSize / 8);
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
+ auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
+ SmallPtr, *SmallMMO);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
+ auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+
+ if (AnyExtTy == DstTy)
+ MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
+ else {
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+ MIRBuilder.buildTrunc(DstReg, {Or});
}
- return UnableToLegalize;
+ LoadMI.eraseFromParent();
+ return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerStore(MachineInstr &MI) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
// Lower a non-power of 2 store into multiple pow-2 stores.
// E.g. split an i24 store into an i16 store + i8 store.
// We do this by first extending the stored value to the next largest power
// of 2 type, and then using truncating stores to store the components.
// By doing this, likewise with G_LOAD, generate an extend that can be
// artifact-combined away instead of leaving behind extracts.
- Register SrcReg = MI.getOperand(0).getReg();
- Register PtrReg = MI.getOperand(1).getReg();
+ Register SrcReg = StoreMI.getValueReg();
+ Register PtrReg = StoreMI.getPointerReg();
LLT SrcTy = MRI.getType(SrcReg);
- MachineMemOperand &MMO = **MI.memoperands_begin();
- if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
- return UnableToLegalize;
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand &MMO = **StoreMI.memoperands_begin();
+ LLT MemTy = MMO.getMemoryType();
+
if (SrcTy.isVector())
return UnableToLegalize;
- if (isPowerOf2_32(SrcTy.getSizeInBits()))
+
+ unsigned StoreWidth = MemTy.getSizeInBits();
+ unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
+
+ if (StoreWidth != StoreSizeInBits) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ LLT WideTy = LLT::scalar(StoreSizeInBits);
+
+ if (StoreSizeInBits > SrcTy.getSizeInBits()) {
+ // Avoid creating a store with a narrower source than result.
+ SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
+ SrcTy = WideTy;
+ }
+
+ auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
+
+ MachineMemOperand *NewMMO =
+ MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
+ MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
+ StoreMI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (isPowerOf2_32(MemTy.getSizeInBits()))
return UnableToLegalize; // Don't know what we're being asked to do.
- // Extend to the next pow-2.
- const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
- auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
+ // Extend to the next pow-2. If this store was itself the result of lowering,
+ // e.g. an s56 store being broken into s32 + s24, we might have a stored type
+ // that's wider the stored size.
+ const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits()));
+ auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
// Obtain the smaller value by shifting away the larger value.
- uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
- uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
- auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
- auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
+ uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
+ uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
+ auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
+ auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
// Generate the PtrAdd and truncating stores.
LLT PtrTy = MRI.getType(PtrReg);
@@ -2780,16 +3047,15 @@ LegalizerHelper::lowerStore(MachineInstr &MI) {
LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
- MachineFunction &MF = MIRBuilder.getMF();
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
MachineMemOperand *SmallMMO =
MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
- MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
- MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
- MI.eraseFromParent();
+ MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
+ MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
+ StoreMI.eraseFromParent();
return Legalized;
}
@@ -2799,9 +3065,15 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
case TargetOpcode::G_LOAD: {
if (TypeIdx != 0)
return UnableToLegalize;
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+
+ // Not sure how to interpret a bitcast of an extending load.
+ if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
+ return UnableToLegalize;
Observer.changingInstr(MI);
bitcastDst(MI, CastTy, 0);
+ MMO.setType(CastTy);
Observer.changedInstr(MI);
return Legalized;
}
@@ -2809,8 +3081,15 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
if (TypeIdx != 0)
return UnableToLegalize;
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+
+ // Not sure how to interpret a bitcast of a truncating store.
+ if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
+ return UnableToLegalize;
+
Observer.changingInstr(MI);
bitcastSrc(MI, CastTy, 0);
+ MMO.setType(CastTy);
Observer.changedInstr(MI);
return Legalized;
}
@@ -2980,9 +3259,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
- return lowerLoad(MI);
+ return lowerLoad(cast<GAnyLoad>(MI));
case TargetOpcode::G_STORE:
- return lowerStore(MI);
+ return lowerStore(cast<GStore>(MI));
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
@@ -3132,24 +3411,19 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_SSHLSAT:
case G_USHLSAT:
return lowerShlSat(MI);
- case G_ABS: {
- // Expand %res = G_ABS %a into:
- // %v1 = G_ASHR %a, scalar_size-1
- // %v2 = G_ADD %a, %v1
- // %res = G_XOR %v2, %v1
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- Register OpReg = MI.getOperand(1).getReg();
- auto ShiftAmt =
- MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
- auto Shift =
- MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
- auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
- MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
- MI.eraseFromParent();
- return Legalized;
- }
+ case G_ABS:
+ return lowerAbsToAddXor(MI);
case G_SELECT:
return lowerSelect(MI);
+ case G_SDIVREM:
+ case G_UDIVREM:
+ return lowerDIVREM(MI);
+ case G_FSHL:
+ case G_FSHR:
+ return lowerFunnelShift(MI);
+ case G_ROTL:
+ case G_ROTR:
+ return lowerRotate(MI);
}
}
@@ -3248,9 +3522,6 @@ LegalizerHelper::fewerElementsVectorMultiEltType(
return UnableToLegalize;
const LLT NarrowTy0 = NarrowTyArg;
- const unsigned NewNumElts =
- NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
-
const Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT LeftoverTy0;
@@ -3270,7 +3541,9 @@ LegalizerHelper::fewerElementsVectorMultiEltType(
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
Register SrcReg = MI.getOperand(I).getReg();
LLT SrcTyI = MRI.getType(SrcReg);
- LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
+ const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount()
+ : ElementCount::getFixed(1);
+ LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType());
LLT LeftoverTyI;
// Split this operand into the requested typed registers, and any leftover
@@ -3345,7 +3618,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
return UnableToLegalize;
- NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
+ NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType());
} else {
NumParts = DstTy.getNumElements();
NarrowTy1 = SrcTy.getElementType();
@@ -3389,9 +3662,9 @@ LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
NarrowTy0 = NarrowTy;
NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
- NarrowTy1 = NarrowTy.isVector() ?
- LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
- SrcTy.getElementType();
+ NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(),
+ SrcTy.getScalarSizeInBits())
+ : SrcTy.getElementType();
} else {
unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
@@ -3399,8 +3672,8 @@ LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
NarrowTy.getNumElements();
- NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
- DstTy.getScalarSizeInBits());
+ NarrowTy0 =
+ LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits());
NarrowTy1 = NarrowTy;
}
@@ -3471,8 +3744,9 @@ LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
if (CondTy.getNumElements() == NumParts)
NarrowTy1 = CondTy.getElementType();
else
- NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
- CondTy.getScalarSizeInBits());
+ NarrowTy1 =
+ LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts),
+ CondTy.getScalarSizeInBits());
}
} else {
NumParts = CondTy.getNumElements();
@@ -3618,6 +3892,55 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
return Legalized;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ Register Result = MI.getOperand(0).getReg();
+ Register Overflow = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+
+ LLT SrcTy = MRI.getType(LHS);
+ if (!SrcTy.isVector())
+ return UnableToLegalize;
+
+ LLT ElementType = SrcTy.getElementType();
+ LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
+ const ElementCount NumResult = SrcTy.getElementCount();
+ LLT GCDTy = getGCDType(SrcTy, NarrowTy);
+
+ // Unmerge the operands to smaller parts of GCD type.
+ auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
+ auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
+
+ const int NumOps = UnmergeLHS->getNumOperands() - 1;
+ const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps);
+ LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
+ LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
+
+ // Perform the operation over unmerged parts.
+ SmallVector<Register, 8> ResultParts;
+ SmallVector<Register, 8> OverflowParts;
+ for (int I = 0; I != NumOps; ++I) {
+ Register Operand1 = UnmergeLHS->getOperand(I).getReg();
+ Register Operand2 = UnmergeRHS->getOperand(I).getReg();
+ auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
+ {Operand1, Operand2});
+ ResultParts.push_back(PartMul->getOperand(0).getReg());
+ OverflowParts.push_back(PartMul->getOperand(1).getReg());
+ }
+
+ LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
+ LLT OverflowLCMTy =
+ LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy);
+
+ // Recombine the pieces to the original result and overflow registers.
+ buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
+ buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
// a vector
//
@@ -3681,7 +4004,11 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
// If the index is a constant, we can really break this down as you would
// expect, and index into the target size pieces.
int64_t IdxVal;
- if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+ auto MaybeCst =
+ getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true,
+ /*HandleFConstants*/ false);
+ if (MaybeCst) {
+ IdxVal = MaybeCst->Value.getSExtValue();
// Avoid out of bounds indexing the pieces.
if (IdxVal >= VecTy.getNumElements()) {
MIRBuilder.buildUndef(DstReg);
@@ -3731,27 +4058,24 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
+LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
LLT NarrowTy) {
// FIXME: Don't know how to handle secondary types yet.
if (TypeIdx != 0)
return UnableToLegalize;
- MachineMemOperand *MMO = *MI.memoperands_begin();
-
// This implementation doesn't work for atomics. Give up instead of doing
// something invalid.
- if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
- MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
+ if (LdStMI.isAtomic())
return UnableToLegalize;
- bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
- Register ValReg = MI.getOperand(0).getReg();
- Register AddrReg = MI.getOperand(1).getReg();
+ bool IsLoad = isa<GLoad>(LdStMI);
+ Register ValReg = LdStMI.getReg(0);
+ Register AddrReg = LdStMI.getPointerReg();
LLT ValTy = MRI.getType(ValReg);
// FIXME: Do we need a distinct NarrowMemory legalize action?
- if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
+ if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
return UnableToLegalize;
}
@@ -3782,20 +4106,20 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
// is a load, return the new registers in ValRegs. For a store, each elements
// of ValRegs should be PartTy. Returns the next offset that needs to be
// handled.
+ auto MMO = LdStMI.getMMO();
auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
unsigned Offset) -> unsigned {
MachineFunction &MF = MIRBuilder.getMF();
unsigned PartSize = PartTy.getSizeInBits();
for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
Offset += PartSize, ++Idx) {
- unsigned ByteSize = PartSize / 8;
unsigned ByteOffset = Offset / 8;
Register NewAddrReg;
MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
MachineMemOperand *NewMMO =
- MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
+ MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
if (IsLoad) {
Register Dst = MRI.createGenericVirtualRegister(PartTy);
@@ -3820,7 +4144,7 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
LeftoverTy, NarrowLeftoverRegs);
}
- MI.eraseFromParent();
+ LdStMI.eraseFromParent();
return Legalized;
}
@@ -3830,28 +4154,32 @@ LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
assert(TypeIdx == 0 && "only one type index expected");
const unsigned Opc = MI.getOpcode();
- const int NumOps = MI.getNumOperands() - 1;
- const Register DstReg = MI.getOperand(0).getReg();
+ const int NumDefOps = MI.getNumExplicitDefs();
+ const int NumSrcOps = MI.getNumOperands() - NumDefOps;
const unsigned Flags = MI.getFlags();
const unsigned NarrowSize = NarrowTy.getSizeInBits();
const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
- assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
+ assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 "
+ "result and 1-3 sources or 2 results and "
+ "1-2 sources");
+
+ SmallVector<Register, 2> DstRegs;
+ for (int I = 0; I < NumDefOps; ++I)
+ DstRegs.push_back(MI.getOperand(I).getReg());
// First of all check whether we are narrowing (changing the element type)
// or reducing the vector elements
- const LLT DstTy = MRI.getType(DstReg);
+ const LLT DstTy = MRI.getType(DstRegs[0]);
const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
SmallVector<Register, 8> ExtractedRegs[3];
SmallVector<Register, 8> Parts;
- unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
-
// Break down all the sources into NarrowTy pieces we can operate on. This may
// involve creating merges to a wider type, padded with undef.
- for (int I = 0; I != NumOps; ++I) {
- Register SrcReg = MI.getOperand(I + 1).getReg();
+ for (int I = 0; I != NumSrcOps; ++I) {
+ Register SrcReg = MI.getOperand(I + NumDefOps).getReg();
LLT SrcTy = MRI.getType(SrcReg);
// The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
@@ -3868,7 +4196,9 @@ LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
}
} else {
- OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
+ auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount()
+ : ElementCount::getFixed(1);
+ OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType());
}
LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
@@ -3878,10 +4208,10 @@ LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
TargetOpcode::G_ANYEXT);
}
- SmallVector<Register, 8> ResultRegs;
+ SmallVector<Register, 8> ResultRegs[2];
// Input operands for each sub-instruction.
- SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
+ SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register());
int NumParts = ExtractedRegs[0].size();
const unsigned DstSize = DstTy.getSizeInBits();
@@ -3903,33 +4233,44 @@ LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
for (int I = 0; I != NumRealParts; ++I) {
// Emit this instruction on each of the split pieces.
- for (int J = 0; J != NumOps; ++J)
+ for (int J = 0; J != NumSrcOps; ++J)
InputRegs[J] = ExtractedRegs[J][I];
- auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
- ResultRegs.push_back(Inst.getReg(0));
+ MachineInstrBuilder Inst;
+ if (NumDefOps == 1)
+ Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
+ else
+ Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs,
+ Flags);
+
+ for (int J = 0; J != NumDefOps; ++J)
+ ResultRegs[J].push_back(Inst.getReg(J));
}
// Fill out the widened result with undef instead of creating instructions
// with undef inputs.
int NumUndefParts = NumParts - NumRealParts;
- if (NumUndefParts != 0)
- ResultRegs.append(NumUndefParts,
- MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
+ if (NumUndefParts != 0) {
+ Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0);
+ for (int I = 0; I != NumDefOps; ++I)
+ ResultRegs[I].append(NumUndefParts, Undef);
+ }
// Extract the possibly padded result. Use a scratch register if we need to do
// a final bitcast, otherwise use the original result register.
Register MergeDstReg;
- if (IsNarrow && DstTy.isVector())
- MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
- else
- MergeDstReg = DstReg;
+ for (int I = 0; I != NumDefOps; ++I) {
+ if (IsNarrow && DstTy.isVector())
+ MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
+ else
+ MergeDstReg = DstRegs[I];
- buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
+ buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]);
- // Recast to vector if we narrowed a vector
- if (IsNarrow && DstTy.isVector())
- MIRBuilder.buildBitcast(DstReg, MergeDstReg);
+ // Recast to vector if we narrowed a vector
+ if (IsNarrow && DstTy.isVector())
+ MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg);
+ }
MI.eraseFromParent();
return Legalized;
@@ -4007,10 +4348,13 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_UDIV:
case G_SREM:
case G_UREM:
+ case G_SDIVREM:
+ case G_UDIVREM:
case G_SMIN:
case G_SMAX:
case G_UMIN:
case G_UMAX:
+ case G_ABS:
case G_FMINNUM:
case G_FMAXNUM:
case G_FMINNUM_IEEE:
@@ -4025,6 +4369,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_UADDSAT:
case G_USUBSAT:
return reduceOperationWidth(MI, TypeIdx, NarrowTy);
+ case G_UMULO:
+ case G_SMULO:
+ return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
case G_SHL:
case G_LSHR:
case G_ASHR:
@@ -4071,14 +4418,286 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
case G_LOAD:
case G_STORE:
- return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
+ return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
case G_SEXT_INREG:
return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
+ GISEL_VECREDUCE_CASES_NONSEQ
+ return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
+ case G_SHUFFLE_VECTOR:
+ return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
default:
return UnableToLegalize;
}
}
+LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
+ MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src1Reg = MI.getOperand(1).getReg();
+ Register Src2Reg = MI.getOperand(2).getReg();
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT Src1Ty = MRI.getType(Src1Reg);
+ LLT Src2Ty = MRI.getType(Src2Reg);
+ // The shuffle should be canonicalized by now.
+ if (DstTy != Src1Ty)
+ return UnableToLegalize;
+ if (DstTy != Src2Ty)
+ return UnableToLegalize;
+
+ if (!isPowerOf2_32(DstTy.getNumElements()))
+ return UnableToLegalize;
+
+ // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
+ // Further legalization attempts will be needed to do split further.
+ NarrowTy =
+ DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
+ unsigned NewElts = NarrowTy.getNumElements();
+
+ SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
+ extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
+ extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
+ Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
+ SplitSrc2Regs[1]};
+
+ Register Hi, Lo;
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ Register &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool UseBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = Mask[FirstMaskIdx + MaskOffset];
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ UseBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (UseBuildVector) {
+ LLT EltTy = NarrowTy.getElementType();
+ SmallVector<Register, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = Mask[FirstMaskIdx + MaskOffset];
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(MIRBuilder
+ .buildExtractVectorElement(
+ EltTy, Inputs[Input],
+ MIRBuilder.buildConstant(LLT::scalar(32), Idx))
+ .getReg(0));
+ }
+
+ // Construct the Lo/Hi output using a G_BUILD_VECTOR.
+ Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
+ } else {
+ Register Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ Register Op1 = InputUsed[1] == -1U
+ ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
+ : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
+ }
+
+ Ops.clear();
+ }
+
+ MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
+ MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
+ Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
+ "Sequential reductions not expected");
+
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ // The semantics of the normal non-sequential reductions allow us to freely
+ // re-associate the operation.
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)
+ return UnableToLegalize;
+
+ SmallVector<Register> SplitSrcs;
+ const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements();
+ extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
+ SmallVector<Register> PartialReductions;
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ PartialReductions.push_back(
+ MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
+ }
+
+ unsigned ScalarOpc;
+ switch (Opc) {
+ case TargetOpcode::G_VECREDUCE_FADD:
+ ScalarOpc = TargetOpcode::G_FADD;
+ break;
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ ScalarOpc = TargetOpcode::G_FMUL;
+ break;
+ case TargetOpcode::G_VECREDUCE_FMAX:
+ ScalarOpc = TargetOpcode::G_FMAXNUM;
+ break;
+ case TargetOpcode::G_VECREDUCE_FMIN:
+ ScalarOpc = TargetOpcode::G_FMINNUM;
+ break;
+ case TargetOpcode::G_VECREDUCE_ADD:
+ ScalarOpc = TargetOpcode::G_ADD;
+ break;
+ case TargetOpcode::G_VECREDUCE_MUL:
+ ScalarOpc = TargetOpcode::G_MUL;
+ break;
+ case TargetOpcode::G_VECREDUCE_AND:
+ ScalarOpc = TargetOpcode::G_AND;
+ break;
+ case TargetOpcode::G_VECREDUCE_OR:
+ ScalarOpc = TargetOpcode::G_OR;
+ break;
+ case TargetOpcode::G_VECREDUCE_XOR:
+ ScalarOpc = TargetOpcode::G_XOR;
+ break;
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ ScalarOpc = TargetOpcode::G_SMAX;
+ break;
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ ScalarOpc = TargetOpcode::G_SMIN;
+ break;
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ ScalarOpc = TargetOpcode::G_UMAX;
+ break;
+ case TargetOpcode::G_VECREDUCE_UMIN:
+ ScalarOpc = TargetOpcode::G_UMIN;
+ break;
+ default:
+ LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n");
+ return UnableToLegalize;
+ }
+
+ // If the types involved are powers of 2, we can generate intermediate vector
+ // ops, before generating a final reduction operation.
+ if (isPowerOf2_32(SrcTy.getNumElements()) &&
+ isPowerOf2_32(NarrowTy.getNumElements())) {
+ return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
+ }
+
+ Register Acc = PartialReductions[0];
+ for (unsigned Part = 1; Part < NumParts; ++Part) {
+ if (Part == NumParts - 1) {
+ MIRBuilder.buildInstr(ScalarOpc, {DstReg},
+ {Acc, PartialReductions[Part]});
+ } else {
+ Acc = MIRBuilder
+ .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
+ .getReg(0);
+ }
+ }
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
+ LLT SrcTy, LLT NarrowTy,
+ unsigned ScalarOpc) {
+ SmallVector<Register> SplitSrcs;
+ // Split the sources into NarrowTy size pieces.
+ extractParts(SrcReg, NarrowTy,
+ SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
+ // We're going to do a tree reduction using vector operations until we have
+ // one NarrowTy size value left.
+ while (SplitSrcs.size() > 1) {
+ SmallVector<Register> PartialRdxs;
+ for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
+ Register LHS = SplitSrcs[Idx];
+ Register RHS = SplitSrcs[Idx + 1];
+ // Create the intermediate vector op.
+ Register Res =
+ MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
+ PartialRdxs.push_back(Res);
+ }
+ SplitSrcs = std::move(PartialRdxs);
+ }
+ // Finally generate the requested NarrowTy based reduction.
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(SplitSrcs[0]);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
const LLT HalfTy, const LLT AmtTy) {
@@ -4388,11 +5007,56 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
}
case TargetOpcode::G_PHI:
return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
default:
return UnableToLegalize;
}
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
+ unsigned int TypeIdx, LLT MoreTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src1Reg = MI.getOperand(1).getReg();
+ Register Src2Reg = MI.getOperand(2).getReg();
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT Src1Ty = MRI.getType(Src1Reg);
+ LLT Src2Ty = MRI.getType(Src2Reg);
+ unsigned NumElts = DstTy.getNumElements();
+ unsigned WidenNumElts = MoreTy.getNumElements();
+
+ // Expect a canonicalized shuffle.
+ if (DstTy != Src1Ty || DstTy != Src2Ty)
+ return UnableToLegalize;
+
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned I = 0; I != NumElts; ++I) {
+ int Idx = Mask[I];
+ if (Idx < static_cast<int>(NumElts))
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned I = NumElts; I != WidenNumElts; ++I)
+ NewMask.push_back(-1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg(),
+ MI.getOperand(2).getReg(), NewMask);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
ArrayRef<Register> Src1Regs,
ArrayRef<Register> Src2Regs,
@@ -4457,6 +5121,100 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstType = MRI.getType(DstReg);
+ // FIXME: add support for vector types
+ if (DstType.isVector())
+ return UnableToLegalize;
+
+ unsigned Opcode = MI.getOpcode();
+ unsigned OpO, OpE, OpF;
+ switch (Opcode) {
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_ADD:
+ OpO = TargetOpcode::G_UADDO;
+ OpE = TargetOpcode::G_UADDE;
+ OpF = TargetOpcode::G_UADDE;
+ if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
+ OpF = TargetOpcode::G_SADDE;
+ break;
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SUB:
+ OpO = TargetOpcode::G_USUBO;
+ OpE = TargetOpcode::G_USUBE;
+ OpF = TargetOpcode::G_USUBE;
+ if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
+ OpF = TargetOpcode::G_SSUBE;
+ break;
+ default:
+ llvm_unreachable("Unexpected add/sub opcode!");
+ }
+
+ // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
+ unsigned NumDefs = MI.getNumExplicitDefs();
+ Register Src1 = MI.getOperand(NumDefs).getReg();
+ Register Src2 = MI.getOperand(NumDefs + 1).getReg();
+ Register CarryDst, CarryIn;
+ if (NumDefs == 2)
+ CarryDst = MI.getOperand(1).getReg();
+ if (MI.getNumOperands() == NumDefs + 3)
+ CarryIn = MI.getOperand(NumDefs + 2).getReg();
+
+ LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT LeftoverTy, DummyTy;
+ SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
+ extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
+ extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
+
+ int NarrowParts = Src1Regs.size();
+ for (int I = 0, E = Src1Left.size(); I != E; ++I) {
+ Src1Regs.push_back(Src1Left[I]);
+ Src2Regs.push_back(Src2Left[I]);
+ }
+ DstRegs.reserve(Src1Regs.size());
+
+ for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
+ Register DstReg =
+ MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
+ Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ // Forward the final carry-out to the destination register
+ if (i == e - 1 && CarryDst)
+ CarryOut = CarryDst;
+
+ if (!CarryIn) {
+ MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
+ {Src1Regs[i], Src2Regs[i]});
+ } else if (i == e - 1) {
+ MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
+ {Src1Regs[i], Src2Regs[i], CarryIn});
+ } else {
+ MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
+ {Src1Regs[i], Src2Regs[i], CarryIn});
+ }
+
+ DstRegs.push_back(DstReg);
+ CarryIn = CarryOut;
+ }
+ insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
+ makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
+ makeArrayRef(DstRegs).drop_front(NarrowParts));
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
Register DstReg = MI.getOperand(0).getReg();
Register Src1 = MI.getOperand(1).getReg();
@@ -4492,6 +5250,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
+
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+
+ // If all finite floats fit into the narrowed integer type, we can just swap
+ // out the result type. This is practically only useful for conversions from
+ // half to at least 16-bits, so just handle the one case.
+ if (SrcTy.getScalarType() != LLT::scalar(16) ||
+ NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0,
+ IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 1)
@@ -4565,37 +5348,43 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 0)
return UnableToLegalize;
- uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- uint64_t NarrowSize = NarrowTy.getSizeInBits();
-
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
-
- int NumParts = SizeOp0 / NarrowSize;
-
- SmallVector<Register, 2> SrcRegs, DstRegs;
+ SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
SmallVector<uint64_t, 2> Indexes;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+ LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT LeftoverTy;
+ extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
+ LeftoverRegs);
+
+ for (Register Reg : LeftoverRegs)
+ SrcRegs.push_back(Reg);
+ uint64_t NarrowSize = NarrowTy.getSizeInBits();
Register OpReg = MI.getOperand(2).getReg();
uint64_t OpStart = MI.getOperand(3).getImm();
uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstStart = i * NarrowSize;
+ for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
+ unsigned DstStart = I * NarrowSize;
- if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
- // No part of the insert affects this subregister, forward the original.
- DstRegs.push_back(SrcRegs[i]);
- continue;
- } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
// The entire subregister is defined by this insert, forward the new
// value.
DstRegs.push_back(OpReg);
continue;
}
+ Register SrcReg = SrcRegs[I];
+ if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
+ // The leftover reg is smaller than NarrowTy, so we need to extend it.
+ SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
+ }
+
+ if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
+ // No part of the insert affects this subregister, forward the original.
+ DstRegs.push_back(SrcReg);
+ continue;
+ }
+
// OpSegStart is where this destination segment would start in OpReg if it
// extended infinitely in both directions.
int64_t ExtractOffset, InsertOffset;
@@ -4619,16 +5408,19 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
}
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
+ MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
DstRegs.push_back(DstReg);
}
- assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
+ uint64_t WideSize = DstRegs.size() * NarrowSize;
Register DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
+ if (WideSize > RegTy.getSizeInBits()) {
+ Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
+ MIRBuilder.buildMerge(MergeReg, DstRegs);
+ MIRBuilder.buildTrunc(DstReg, MergeReg);
+ } else
MIRBuilder.buildMerge(DstReg, DstRegs);
+
MI.eraseFromParent();
return Legalized;
}
@@ -5002,6 +5794,209 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
}
}
+// Check that (every element of) Reg is undef or not an exact multiple of BW.
+static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
+ Register Reg, unsigned BW) {
+ return matchUnaryPredicate(
+ MRI, Reg,
+ [=](const Constant *C) {
+ // Null constant here means an undef.
+ const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
+ return !CI || CI->getValue().urem(BW) != 0;
+ },
+ /*AllowUndefs*/ true);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ Register Y = MI.getOperand(2).getReg();
+ Register Z = MI.getOperand(3).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShTy = MRI.getType(Z);
+
+ unsigned BW = Ty.getScalarSizeInBits();
+
+ if (!isPowerOf2_32(BW))
+ return UnableToLegalize;
+
+ const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
+ unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
+
+ if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
+ // fshl X, Y, Z -> fshr X, Y, -Z
+ // fshr X, Y, Z -> fshl X, Y, -Z
+ auto Zero = MIRBuilder.buildConstant(ShTy, 0);
+ Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
+ } else {
+ // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
+ // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
+ auto One = MIRBuilder.buildConstant(ShTy, 1);
+ if (IsFSHL) {
+ Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
+ X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
+ } else {
+ X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
+ Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
+ }
+
+ Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
+ }
+
+ MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ Register Y = MI.getOperand(2).getReg();
+ Register Z = MI.getOperand(3).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShTy = MRI.getType(Z);
+
+ const unsigned BW = Ty.getScalarSizeInBits();
+ const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
+
+ Register ShX, ShY;
+ Register ShAmt, InvShAmt;
+
+ // FIXME: Emit optimized urem by constant instead of letting it expand later.
+ if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
+ // fshl: X << C | Y >> (BW - C)
+ // fshr: X << (BW - C) | Y >> C
+ // where C = Z % BW is not zero
+ auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
+ ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
+ InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
+ ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
+ ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
+ } else {
+ // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
+ // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
+ auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
+ if (isPowerOf2_32(BW)) {
+ // Z % BW -> Z & (BW - 1)
+ ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
+ // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
+ auto NotZ = MIRBuilder.buildNot(ShTy, Z);
+ InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
+ } else {
+ auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
+ ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
+ InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
+ }
+
+ auto One = MIRBuilder.buildConstant(ShTy, 1);
+ if (IsFSHL) {
+ ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
+ auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
+ ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
+ } else {
+ auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
+ ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
+ ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
+ }
+ }
+
+ MIRBuilder.buildOr(Dst, ShX, ShY);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
+ // These operations approximately do the following (while avoiding undefined
+ // shifts by BW):
+ // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
+
+ bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
+ unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
+
+ // TODO: Use smarter heuristic that accounts for vector legalization.
+ if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
+ return lowerFunnelShiftAsShifts(MI);
+
+ // This only works for powers of 2, fallback to shifts if it fails.
+ LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
+ if (Result == UnableToLegalize)
+ return lowerFunnelShiftAsShifts(MI);
+ return Result;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Amt = MI.getOperand(2).getReg();
+ LLT AmtTy = MRI.getType(Amt);
+ auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
+ bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
+ unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
+ auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
+ MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Amt = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Dst);
+ LLT AmtTy = MRI.getType(Amt);
+
+ unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
+ bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
+
+ MIRBuilder.setInstrAndDebugLoc(MI);
+
+ // If a rotate in the other direction is supported, use it.
+ unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
+ if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
+ isPowerOf2_32(EltSizeInBits))
+ return lowerRotateWithReverseRotate(MI);
+
+ auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
+ unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
+ unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
+ auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
+ Register ShVal;
+ Register RevShiftVal;
+ if (isPowerOf2_32(EltSizeInBits)) {
+ // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
+ // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
+ auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
+ auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
+ ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
+ auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
+ RevShiftVal =
+ MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
+ } else {
+ // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
+ // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
+ auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
+ auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
+ ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
+ auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
+ auto One = MIRBuilder.buildConstant(AmtTy, 1);
+ auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
+ RevShiftVal =
+ MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
+ }
+ MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
// representation.
LegalizerHelper::LegalizeResult
@@ -5192,7 +6187,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
// Expand f32 -> i64 conversion
// This algorithm comes from compiler-rt's implementation of fixsfdi:
- // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
+ // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
@@ -5429,31 +6424,27 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
auto NotSignBitMask = MIRBuilder.buildConstant(
Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
- auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
- MachineInstr *Or;
-
+ Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
+ Register And1;
if (Src0Ty == Src1Ty) {
- auto And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask);
- Or = MIRBuilder.buildOr(Dst, And0, And1);
+ And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
} else if (Src0Size > Src1Size) {
auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
- auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
- Or = MIRBuilder.buildOr(Dst, And0, And1);
+ And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
} else {
auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
- auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
- Or = MIRBuilder.buildOr(Dst, And0, And1);
+ And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
}
// Be careful about setting nsz/nnan/ninf on every instruction, since the
// constants are a nan and -0.0, but the final result should preserve
// everything.
- if (unsigned Flags = MI.getFlags())
- Or->setFlags(Flags);
+ unsigned Flags = MI.getFlags();
+ MIRBuilder.buildOr(Dst, And0, And1, Flags);
MI.eraseFromParent();
return Legalized;
@@ -6254,3 +7245,51 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
+ // Split DIVREM into individual instructions.
+ unsigned Opcode = MI.getOpcode();
+
+ MIRBuilder.buildInstr(
+ Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
+ : TargetOpcode::G_UDIV,
+ {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
+ MIRBuilder.buildInstr(
+ Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
+ : TargetOpcode::G_UREM,
+ {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
+ // Expand %res = G_ABS %a into:
+ // %v1 = G_ASHR %a, scalar_size-1
+ // %v2 = G_ADD %a, %v1
+ // %res = G_XOR %v2, %v1
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register OpReg = MI.getOperand(1).getReg();
+ auto ShiftAmt =
+ MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
+ auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
+ auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
+ MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
+ // Expand %res = G_ABS %a into:
+ // %v1 = G_CONSTANT 0
+ // %v2 = G_SUB %v1, %a
+ // %res = G_SMAX %a, %v2
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(SrcReg);
+ auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
+ auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
+ MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 30acac14bc5f..3e3141657e87 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -9,11 +9,6 @@
// Implement an interface to specify and query how an illegal operation on a
// given type should be expanded.
//
-// Issues to be resolved:
-// + Make it fast.
-// + Support weird types like i3, <7 x i3>, ...
-// + Operations with more than one type (ICMP, CMPXCHG, intrinsics, ...)
-//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@@ -93,7 +88,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
OS << Opcode << ", MMOs={";
for (const auto &MMODescr : MMODescrs) {
- OS << MMODescr.SizeInBits << ", ";
+ OS << MMODescr.MemoryTy << ", ";
}
OS << "}";
@@ -256,146 +251,6 @@ bool LegalizeRuleSet::verifyImmIdxsCoverage(unsigned NumImmIdxs) const {
#endif
}
-LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
- // Set defaults.
- // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the
- // fundamental load/store Jakob proposed. Once loads & stores are supported.
- setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}});
- setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}});
- setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}});
- setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}});
- setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}});
-
- setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}});
- setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}});
-
- setLegalizeScalarToDifferentSizeStrategy(
- TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall);
- setLegalizeScalarToDifferentSizeStrategy(
- TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest);
- setLegalizeScalarToDifferentSizeStrategy(
- TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest);
- setLegalizeScalarToDifferentSizeStrategy(
- TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall);
- setLegalizeScalarToDifferentSizeStrategy(
- TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall);
-
- setLegalizeScalarToDifferentSizeStrategy(
- TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise);
- setLegalizeScalarToDifferentSizeStrategy(
- TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall);
- setLegalizeScalarToDifferentSizeStrategy(
- TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall);
- setLegalizeScalarToDifferentSizeStrategy(
- TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall);
- setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}});
-}
-
-void LegalizerInfo::computeTables() {
- assert(TablesInitialized == false);
-
- for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) {
- const unsigned Opcode = FirstOp + OpcodeIdx;
- for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size();
- ++TypeIdx) {
- // 0. Collect information specified through the setAction API, i.e.
- // for specific bit sizes.
- // For scalar types:
- SizeAndActionsVec ScalarSpecifiedActions;
- // For pointer types:
- std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions;
- // For vector types:
- std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions;
- for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) {
- const LLT Type = LLT2Action.first;
- const LegalizeAction Action = LLT2Action.second;
-
- auto SizeAction = std::make_pair(Type.getSizeInBits(), Action);
- if (Type.isPointer())
- AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back(
- SizeAction);
- else if (Type.isVector())
- ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()]
- .push_back(SizeAction);
- else
- ScalarSpecifiedActions.push_back(SizeAction);
- }
-
- // 1. Handle scalar types
- {
- // Decide how to handle bit sizes for which no explicit specification
- // was given.
- SizeChangeStrategy S = &unsupportedForDifferentSizes;
- if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() &&
- ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
- S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx];
- llvm::sort(ScalarSpecifiedActions);
- checkPartialSizeAndActionsVector(ScalarSpecifiedActions);
- setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions));
- }
-
- // 2. Handle pointer types
- for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) {
- llvm::sort(PointerSpecifiedActions.second);
- checkPartialSizeAndActionsVector(PointerSpecifiedActions.second);
- // For pointer types, we assume that there isn't a meaningfull way
- // to change the number of bits used in the pointer.
- setPointerAction(
- Opcode, TypeIdx, PointerSpecifiedActions.first,
- unsupportedForDifferentSizes(PointerSpecifiedActions.second));
- }
-
- // 3. Handle vector types
- SizeAndActionsVec ElementSizesSeen;
- for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) {
- llvm::sort(VectorSpecifiedActions.second);
- const uint16_t ElementSize = VectorSpecifiedActions.first;
- ElementSizesSeen.push_back({ElementSize, Legal});
- checkPartialSizeAndActionsVector(VectorSpecifiedActions.second);
- // For vector types, we assume that the best way to adapt the number
- // of elements is to the next larger number of elements type for which
- // the vector type is legal, unless there is no such type. In that case,
- // legalize towards a vector type with a smaller number of elements.
- SizeAndActionsVec NumElementsActions;
- for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) {
- assert(BitsizeAndAction.first % ElementSize == 0);
- const uint16_t NumElements = BitsizeAndAction.first / ElementSize;
- NumElementsActions.push_back({NumElements, BitsizeAndAction.second});
- }
- setVectorNumElementAction(
- Opcode, TypeIdx, ElementSize,
- moreToWiderTypesAndLessToWidest(NumElementsActions));
- }
- llvm::sort(ElementSizesSeen);
- SizeChangeStrategy VectorElementSizeChangeStrategy =
- &unsupportedForDifferentSizes;
- if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() &&
- VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
- VectorElementSizeChangeStrategy =
- VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx];
- setScalarInVectorAction(
- Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen));
- }
- }
-
- TablesInitialized = true;
-}
-
-// FIXME: inefficient implementation for now. Without ComputeValueVTs we're
-// probably going to need specialized lookup structures for various types before
-// we have any hope of doing well with something like <13 x i3>. Even the common
-// cases should do better than what we have now.
-std::pair<LegalizeAction, LLT>
-LegalizerInfo::getAspectAction(const InstrAspect &Aspect) const {
- assert(TablesInitialized && "backend forgot to call computeTables");
- // These *have* to be implemented for now, they're the fundamental basis of
- // how everything else is transformed.
- if (Aspect.Type.isScalar() || Aspect.Type.isPointer())
- return findScalarLegalAction(Aspect);
- assert(Aspect.Type.isVector());
- return findVectorLegalAction(Aspect);
-}
-
/// Helper function to get LLT for the given type index.
static LLT getTypeFromTypeIdx(const MachineInstr &MI,
const MachineRegisterInfo &MRI, unsigned OpIdx,
@@ -446,8 +301,8 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(
assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() &&
"Initializer list must have at least two opcodes");
- for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I)
- aliasActionDefinitions(Representative, *I);
+ for (unsigned Op : llvm::drop_begin(Opcodes))
+ aliasActionDefinitions(Representative, Op);
auto &Return = getActionDefinitionsBuilder(Representative);
Return.setIsAliasedByAnother();
@@ -469,23 +324,13 @@ LegalizerInfo::getAction(const LegalityQuery &Query) const {
return Step;
}
- for (unsigned i = 0; i < Query.Types.size(); ++i) {
- auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]});
- if (Action.first != Legal) {
- LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Action="
- << Action.first << ", " << Action.second << "\n");
- return {Action.first, i, Action.second};
- } else
- LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n");
- }
- LLVM_DEBUG(dbgs() << ".. (legacy) Legal\n");
- return {Legal, 0, LLT{}};
+ return getLegacyLegalizerInfo().getAction(Query);
}
LegalizeActionStep
LegalizerInfo::getAction(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const {
- SmallVector<LLT, 2> Types;
+ SmallVector<LLT, 8> Types;
SmallBitVector SeenTypes(8);
const MCOperandInfo *OpInfo = MI.getDesc().OpInfo;
// FIXME: probably we'll need to cache the results here somehow?
@@ -507,8 +352,8 @@ LegalizerInfo::getAction(const MachineInstr &MI,
SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;
for (const auto &MMO : MI.memoperands())
- MemDescrs.push_back({8 * MMO->getSize() /* in bits */,
- 8 * MMO->getAlign().value(), MMO->getOrdering()});
+ MemDescrs.push_back({MMO->getMemoryType(), 8 * MMO->getAlign().value(),
+ MMO->getSuccessOrdering()});
return getAction({MI.getOpcode(), Types, MemDescrs});
}
@@ -526,163 +371,6 @@ bool LegalizerInfo::isLegalOrCustom(const MachineInstr &MI,
return Action == Legal || Action == Custom;
}
-LegalizerInfo::SizeAndActionsVec
-LegalizerInfo::increaseToLargerTypesAndDecreaseToLargest(
- const SizeAndActionsVec &v, LegalizeAction IncreaseAction,
- LegalizeAction DecreaseAction) {
- SizeAndActionsVec result;
- unsigned LargestSizeSoFar = 0;
- if (v.size() >= 1 && v[0].first != 1)
- result.push_back({1, IncreaseAction});
- for (size_t i = 0; i < v.size(); ++i) {
- result.push_back(v[i]);
- LargestSizeSoFar = v[i].first;
- if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) {
- result.push_back({LargestSizeSoFar + 1, IncreaseAction});
- LargestSizeSoFar = v[i].first + 1;
- }
- }
- result.push_back({LargestSizeSoFar + 1, DecreaseAction});
- return result;
-}
-
-LegalizerInfo::SizeAndActionsVec
-LegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest(
- const SizeAndActionsVec &v, LegalizeAction DecreaseAction,
- LegalizeAction IncreaseAction) {
- SizeAndActionsVec result;
- if (v.size() == 0 || v[0].first != 1)
- result.push_back({1, IncreaseAction});
- for (size_t i = 0; i < v.size(); ++i) {
- result.push_back(v[i]);
- if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) {
- result.push_back({v[i].first + 1, DecreaseAction});
- }
- }
- return result;
-}
-
-LegalizerInfo::SizeAndAction
-LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {
- assert(Size >= 1);
- // Find the last element in Vec that has a bitsize equal to or smaller than
- // the requested bit size.
- // That is the element just before the first element that is bigger than Size.
- auto It = partition_point(
- Vec, [=](const SizeAndAction &A) { return A.first <= Size; });
- assert(It != Vec.begin() && "Does Vec not start with size 1?");
- int VecIdx = It - Vec.begin() - 1;
-
- LegalizeAction Action = Vec[VecIdx].second;
- switch (Action) {
- case Legal:
- case Bitcast:
- case Lower:
- case Libcall:
- case Custom:
- return {Size, Action};
- case FewerElements:
- // FIXME: is this special case still needed and correct?
- // Special case for scalarization:
- if (Vec == SizeAndActionsVec({{1, FewerElements}}))
- return {1, FewerElements};
- LLVM_FALLTHROUGH;
- case NarrowScalar: {
- // The following needs to be a loop, as for now, we do allow needing to
- // go over "Unsupported" bit sizes before finding a legalizable bit size.
- // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8,
- // we need to iterate over s9, and then to s32 to return (s32, Legal).
- // If we want to get rid of the below loop, we should have stronger asserts
- // when building the SizeAndActionsVecs, probably not allowing
- // "Unsupported" unless at the ends of the vector.
- for (int i = VecIdx - 1; i >= 0; --i)
- if (!needsLegalizingToDifferentSize(Vec[i].second) &&
- Vec[i].second != Unsupported)
- return {Vec[i].first, Action};
- llvm_unreachable("");
- }
- case WidenScalar:
- case MoreElements: {
- // See above, the following needs to be a loop, at least for now.
- for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i)
- if (!needsLegalizingToDifferentSize(Vec[i].second) &&
- Vec[i].second != Unsupported)
- return {Vec[i].first, Action};
- llvm_unreachable("");
- }
- case Unsupported:
- return {Size, Unsupported};
- case NotFound:
- case UseLegacyRules:
- llvm_unreachable("NotFound");
- }
- llvm_unreachable("Action has an unknown enum value");
-}
-
-std::pair<LegalizeAction, LLT>
-LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const {
- assert(Aspect.Type.isScalar() || Aspect.Type.isPointer());
- if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
- return {NotFound, LLT()};
- const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode);
- if (Aspect.Type.isPointer() &&
- AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) ==
- AddrSpace2PointerActions[OpcodeIdx].end()) {
- return {NotFound, LLT()};
- }
- const SmallVector<SizeAndActionsVec, 1> &Actions =
- Aspect.Type.isPointer()
- ? AddrSpace2PointerActions[OpcodeIdx]
- .find(Aspect.Type.getAddressSpace())
- ->second
- : ScalarActions[OpcodeIdx];
- if (Aspect.Idx >= Actions.size())
- return {NotFound, LLT()};
- const SizeAndActionsVec &Vec = Actions[Aspect.Idx];
- // FIXME: speed up this search, e.g. by using a results cache for repeated
- // queries?
- auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits());
- return {SizeAndAction.second,
- Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first)
- : LLT::pointer(Aspect.Type.getAddressSpace(),
- SizeAndAction.first)};
-}
-
-std::pair<LegalizeAction, LLT>
-LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
- assert(Aspect.Type.isVector());
- // First legalize the vector element size, then legalize the number of
- // lanes in the vector.
- if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
- return {NotFound, Aspect.Type};
- const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode);
- const unsigned TypeIdx = Aspect.Idx;
- if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size())
- return {NotFound, Aspect.Type};
- const SizeAndActionsVec &ElemSizeVec =
- ScalarInVectorActions[OpcodeIdx][TypeIdx];
-
- LLT IntermediateType;
- auto ElementSizeAndAction =
- findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits());
- IntermediateType =
- LLT::vector(Aspect.Type.getNumElements(), ElementSizeAndAction.first);
- if (ElementSizeAndAction.second != Legal)
- return {ElementSizeAndAction.second, IntermediateType};
-
- auto i = NumElements2Actions[OpcodeIdx].find(
- IntermediateType.getScalarSizeInBits());
- if (i == NumElements2Actions[OpcodeIdx].end()) {
- return {NotFound, IntermediateType};
- }
- const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx];
- auto NumElementsAndAction =
- findAction(NumElementsVec, IntermediateType.getNumElements());
- return {NumElementsAndAction.second,
- LLT::vector(NumElementsAndAction.first,
- IntermediateType.getScalarSizeInBits())};
-}
-
unsigned LegalizerInfo::getExtOpcodeForWideningConstant(LLT SmallTy) const {
return SmallTy.isByteSized() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index 30c00c63f6f4..d45fdae43f01 100644
--- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -82,8 +82,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
// we start doing CSE across blocks.
auto &MBB = MF.front();
auto &TL = *MF.getSubtarget().getTargetLowering();
- for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) {
- MachineInstr &MI = *RI;
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
if (!TL.shouldLocalize(MI, TTI))
continue;
LLVM_DEBUG(dbgs() << "Should localize: " << MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 67ef02a4e7b2..54ac62793b08 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -240,6 +240,18 @@ MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
return buildInstr(TargetOpcode::COPY, Res, Op);
}
+MachineInstrBuilder MachineIRBuilder::buildAssertSExt(const DstOp &Res,
+ const SrcOp &Op,
+ unsigned Size) {
+ return buildInstr(TargetOpcode::G_ASSERT_SEXT, Res, Op).addImm(Size);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAssertZExt(const DstOp &Res,
+ const SrcOp &Op,
+ unsigned Size) {
+ return buildInstr(TargetOpcode::G_ASSERT_ZEXT, Res, Op).addImm(Size);
+}
+
MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
const ConstantInt &Val) {
LLT Ty = Res.getLLTTy(*getMRI());
@@ -335,10 +347,9 @@ MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr,
MMOFlags |= MachineMemOperand::MOLoad;
assert((MMOFlags & MachineMemOperand::MOStore) == 0);
- uint64_t Size = MemoryLocation::getSizeOrUnknown(
- TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes()));
+ LLT Ty = Dst.getLLTTy(*getMRI());
MachineMemOperand *MMO =
- getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ getMF().getMachineMemOperand(PtrInfo, MMOFlags, Ty, Alignment, AAInfo);
return buildLoad(Dst, Addr, *MMO);
}
@@ -361,7 +372,7 @@ MachineInstrBuilder MachineIRBuilder::buildLoadFromOffset(
MachineMemOperand &BaseMMO, int64_t Offset) {
LLT LoadTy = Dst.getLLTTy(*getMRI());
MachineMemOperand *OffsetMMO =
- getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy.getSizeInBytes());
+ getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy);
if (Offset == 0) // This may be a size or type changing load.
return buildLoad(Dst, BasePtr, *OffsetMMO);
@@ -394,10 +405,9 @@ MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr,
MMOFlags |= MachineMemOperand::MOStore;
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
- uint64_t Size = MemoryLocation::getSizeOrUnknown(
- TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes()));
+ LLT Ty = Val.getLLTTy(*getMRI());
MachineMemOperand *MMO =
- getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ getMF().getMachineMemOperand(PtrInfo, MMOFlags, Ty, Alignment, AAInfo);
return buildStore(Val, Addr, *MMO);
}
@@ -474,6 +484,15 @@ MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(const DstOp &Res,
return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op);
}
+MachineInstrBuilder MachineIRBuilder::buildZExtInReg(const DstOp &Res,
+ const SrcOp &Op,
+ int64_t ImmOp) {
+ LLT ResTy = Res.getLLTTy(*getMRI());
+ auto Mask = buildConstant(
+ ResTy, APInt::getLowBitsSet(ResTy.getScalarSizeInBits(), ImmOp));
+ return buildAnd(Res, Op, Mask);
+}
+
MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
const SrcOp &Src) {
LLT SrcTy = Src.getLLTTy(*getMRI());
@@ -657,10 +676,11 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
assert(DstTy.getElementType() == Src1Ty.getElementType() &&
DstTy.getElementType() == Src2Ty.getElementType());
+ (void)DstTy;
(void)Src1Ty;
(void)Src2Ty;
ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask);
- return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2})
+ return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {Res}, {Src1, Src2})
.addShuffleMask(MaskAlloc);
}
@@ -1095,7 +1115,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
DstOps[0].getLLTTy(*getMRI());
}) &&
"type mismatch in output list");
- assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ assert((TypeSize::ScalarTy)DstOps.size() *
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
"input operands do not cover output register");
break;
@@ -1109,7 +1130,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI());
}) &&
"type mismatch in input list");
- assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ assert((TypeSize::ScalarTy)SrcOps.size() *
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
"input operands do not cover output register");
if (SrcOps.size() == 1)
@@ -1160,7 +1182,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI());
}) &&
"type mismatch in input list");
- assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ assert((TypeSize::ScalarTy)SrcOps.size() *
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
"input scalars do not exactly cover the output vector register");
break;
@@ -1193,7 +1216,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI()));
}) &&
"type mismatch in input list");
- assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ assert((TypeSize::ScalarTy)SrcOps.size() *
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
"input vectors do not exactly cover the output vector register");
break;
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 356e0e437d32..644a81d8021e 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -622,6 +622,23 @@ bool RegBankSelect::applyMapping(
bool RegBankSelect::assignInstr(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "Assign: " << MI);
+
+ unsigned Opc = MI.getOpcode();
+ if (isPreISelGenericOptimizationHint(Opc)) {
+ assert((Opc == TargetOpcode::G_ASSERT_ZEXT ||
+ Opc == TargetOpcode::G_ASSERT_SEXT) &&
+ "Unexpected hint opcode!");
+ // The only correct mapping for these is to always use the source register
+ // bank.
+ const RegisterBank *RB = MRI->getRegBankOrNull(MI.getOperand(1).getReg());
+ // We can assume every instruction above this one has a selected register
+ // bank.
+ assert(RB && "Expected source register to have a register bank?");
+ LLVM_DEBUG(dbgs() << "... Hint always uses source's register bank.\n");
+ MRI->setRegBank(MI.getOperand(0).getReg(), *RB);
+ return true;
+ }
+
// Remember the repairing placement for all the operands.
SmallVector<RepairingPlacement, 4> RepairPts;
@@ -702,6 +719,10 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
if (MI.isDebugInstr())
continue;
+ // Ignore IMPLICIT_DEF which must have a regclass.
+ if (MI.isImplicitDef())
+ continue;
+
if (!assignInstr(MI)) {
reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
"unable to map instruction", MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
index fc9c802693ab..5c4d18ad79c5 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
@@ -98,17 +99,12 @@ void RegisterBank::print(raw_ostream &OS, bool IsForDebug,
return;
assert(ContainedRegClasses.size() == TRI->getNumRegClasses() &&
"TRI does not match the initialization process?");
- bool IsFirst = true;
OS << "Covered register classes:\n";
+ ListSeparator LS;
for (unsigned RCId = 0, End = TRI->getNumRegClasses(); RCId != End; ++RCId) {
const TargetRegisterClass &RC = *TRI->getRegClass(RCId);
- if (!covers(RC))
- continue;
-
- if (!IsFirst)
- OS << ", ";
- OS << TRI->getRegClassName(&RC);
- IsFirst = false;
+ if (covers(RC))
+ OS << LS << TRI->getRegClassName(&RC);
}
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index cd2483224489..f64e41b9dccc 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -199,6 +200,10 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
// Don't delete frame allocation labels.
if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE)
return false;
+ // LIFETIME markers should be preserved even if they seem dead.
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END)
+ return false;
// If we can move an instruction, we can remove it. Otherwise, it has
// a side-effect of some sort.
@@ -360,6 +365,14 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
return ValueAndVReg{Val, VReg};
}
+const ConstantInt *llvm::getConstantIntVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
+ MachineInstr *MI = MRI.getVRegDef(VReg);
+ if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return nullptr;
+ return MI->getOperand(1).getCImm();
+}
+
const ConstantFP *
llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
MachineInstr *MI = MRI.getVRegDef(VReg);
@@ -375,13 +388,15 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
if (!DstTy.isValid())
return None;
- while (DefMI->getOpcode() == TargetOpcode::COPY) {
+ unsigned Opc = DefMI->getOpcode();
+ while (Opc == TargetOpcode::COPY || isPreISelGenericOptimizationHint(Opc)) {
Register SrcReg = DefMI->getOperand(1).getReg();
auto SrcTy = MRI.getType(SrcReg);
if (!SrcTy.isValid())
break;
DefMI = MRI.getVRegDef(SrcReg);
DefSrcReg = SrcReg;
+ Opc = DefMI->getOpcode();
}
return DefinitionAndSourceRegister{DefMI, DefSrcReg};
}
@@ -474,6 +489,60 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
return None;
}
+Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI) {
+ const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
+ if (!Op2Cst)
+ return None;
+
+ const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
+ if (!Op1Cst)
+ return None;
+
+ APFloat C1 = Op1Cst->getValueAPF();
+ const APFloat &C2 = Op2Cst->getValueAPF();
+ switch (Opcode) {
+ case TargetOpcode::G_FADD:
+ C1.add(C2, APFloat::rmNearestTiesToEven);
+ return C1;
+ case TargetOpcode::G_FSUB:
+ C1.subtract(C2, APFloat::rmNearestTiesToEven);
+ return C1;
+ case TargetOpcode::G_FMUL:
+ C1.multiply(C2, APFloat::rmNearestTiesToEven);
+ return C1;
+ case TargetOpcode::G_FDIV:
+ C1.divide(C2, APFloat::rmNearestTiesToEven);
+ return C1;
+ case TargetOpcode::G_FREM:
+ C1.mod(C2);
+ return C1;
+ case TargetOpcode::G_FCOPYSIGN:
+ C1.copySign(C2);
+ return C1;
+ case TargetOpcode::G_FMINNUM:
+ return minnum(C1, C2);
+ case TargetOpcode::G_FMAXNUM:
+ return maxnum(C1, C2);
+ case TargetOpcode::G_FMINIMUM:
+ return minimum(C1, C2);
+ case TargetOpcode::G_FMAXIMUM:
+ return maximum(C1, C2);
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ // FIXME: These operations were unfortunately named. fminnum/fmaxnum do not
+ // follow the IEEE behavior for signaling nans and follow libm's fmin/fmax,
+ // and currently there isn't a nice wrapper in APFloat for the version with
+ // correct snan handling.
+ break;
+ default:
+ break;
+ }
+
+ return None;
+}
+
bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
bool SNaN) {
const MachineInstr *DefMI = MRI.getVRegDef(Val);
@@ -484,6 +553,42 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
return true;
+ // If the value is a constant, we can obviously see if it is a NaN or not.
+ if (const ConstantFP *FPVal = getConstantFPVRegVal(Val, MRI)) {
+ return !FPVal->getValueAPF().isNaN() ||
+ (SNaN && !FPVal->getValueAPF().isSignaling());
+ }
+
+ if (DefMI->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
+ for (const auto &Op : DefMI->uses())
+ if (!isKnownNeverNaN(Op.getReg(), MRI, SNaN))
+ return false;
+ return true;
+ }
+
+ switch (DefMI->getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE: {
+ if (SNaN)
+ return true;
+ // This can return a NaN if either operand is an sNaN, or if both operands
+ // are NaN.
+ return (isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI) &&
+ isKnownNeverSNaN(DefMI->getOperand(2).getReg(), MRI)) ||
+ (isKnownNeverSNaN(DefMI->getOperand(1).getReg(), MRI) &&
+ isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI));
+ }
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM: {
+ // Only one needs to be known not-nan, since it will be returned if the
+ // other ends up being one.
+ return isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI, SNaN) ||
+ isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI, SNaN);
+ }
+ }
+
if (SNaN) {
// FP operations quiet. For now, just handle the ones inserted during
// legalization.
@@ -509,6 +614,11 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
MPO.Offset);
}
+ if (const Value *V = MPO.V.dyn_cast<const Value *>()) {
+ const Module *M = MF.getFunction().getParent();
+ return V->getPointerAlignment(M->getDataLayout());
+ }
+
return Align(1);
}
@@ -563,6 +673,19 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
return None;
}
+Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
+ Register Src,
+ const MachineRegisterInfo &MRI) {
+ assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP);
+ if (auto MaybeSrcVal = getConstantVRegVal(Src, MRI)) {
+ APFloat DstVal(getFltSemanticForLLT(DstTy));
+ DstVal.convertFromAPInt(*MaybeSrcVal, Opcode == TargetOpcode::G_SITOFP,
+ APFloat::rmNearestTiesToEven);
+ return DstVal;
+ }
+ return None;
+}
+
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
GISelKnownBits *KB) {
Optional<DefinitionAndSourceRegister> DefSrcReg =
@@ -599,11 +722,32 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
break;
}
+ case TargetOpcode::G_BUILD_VECTOR: {
+ // TODO: Probably should have a recursion depth guard since you could have
+ // bitcasted vector elements.
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+ if (!isKnownToBeAPowerOfTwo(MI.getOperand(I).getReg(), MRI, KB))
+ return false;
+ }
+
+ return true;
+ }
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
+ // Only handle constants since we would need to know if number of leading
+ // zeros is greater than the truncation amount.
+ const unsigned BitWidth = Ty.getScalarSizeInBits();
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+ auto Const = getConstantVRegVal(MI.getOperand(I).getReg(), MRI);
+ if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2())
+ return false;
+ }
+
+ return true;
+ }
default:
break;
}
- // TODO: Are all operands of a build vector constant powers of two?
if (!KB)
return false;
@@ -642,8 +786,9 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(),
TargetTy.getNumElements());
// Prefer the original element type.
- int Mul = OrigTy.getNumElements() * TargetTy.getNumElements();
- return LLT::vector(Mul / GCDElts, OrigTy.getElementType());
+ ElementCount Mul = OrigTy.getElementCount() * TargetTy.getNumElements();
+ return LLT::vector(Mul.divideCoefficientBy(GCDElts),
+ OrigTy.getElementType());
}
} else {
if (OrigElt.getSizeInBits() == TargetSize)
@@ -651,12 +796,12 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
}
unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
- return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt);
+ return LLT::fixed_vector(LCMSize / OrigElt.getSizeInBits(), OrigElt);
}
if (TargetTy.isVector()) {
unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
- return LLT::vector(LCMSize / OrigSize, OrigTy);
+ return LLT::fixed_vector(LCMSize / OrigSize, OrigTy);
}
unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
@@ -684,7 +829,7 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
TargetTy.getNumElements());
- return LLT::scalarOrVector(GCD, OrigElt);
+ return LLT::scalarOrVector(ElementCount::getFixed(GCD), OrigElt);
}
} else {
// If the source is a vector of pointers, return a pointer element.
@@ -700,7 +845,7 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
// scalar.
if (GCD < OrigElt.getSizeInBits())
return LLT::scalar(GCD);
- return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt);
+ return LLT::fixed_vector(GCD / OrigElt.getSizeInBits(), OrigElt);
}
if (TargetTy.isVector()) {
@@ -789,6 +934,52 @@ bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
return isBuildVectorConstantSplat(MI, MRI, -1);
}
+Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ unsigned Opc = MI.getOpcode();
+ if (!isBuildVectorOp(Opc))
+ return None;
+ if (auto Splat = getBuildVectorConstantSplat(MI, MRI))
+ return RegOrConstant(*Splat);
+ auto Reg = MI.getOperand(1).getReg();
+ if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()),
+ [&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; }))
+ return None;
+ return RegOrConstant(Reg);
+}
+
+bool llvm::matchUnaryPredicate(
+ const MachineRegisterInfo &MRI, Register Reg,
+ std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) {
+
+ const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+ if (AllowUndefs && Def->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
+ return Match(nullptr);
+
+ // TODO: Also handle fconstant
+ if (Def->getOpcode() == TargetOpcode::G_CONSTANT)
+ return Match(Def->getOperand(1).getCImm());
+
+ if (Def->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
+ return false;
+
+ for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
+ Register SrcElt = Def->getOperand(I).getReg();
+ const MachineInstr *SrcDef = getDefIgnoringCopies(SrcElt, MRI);
+ if (AllowUndefs && SrcDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) {
+ if (!Match(nullptr))
+ return false;
+ continue;
+ }
+
+ if (SrcDef->getOpcode() != TargetOpcode::G_CONSTANT ||
+ !Match(SrcDef->getOperand(1).getCImm()))
+ return false;
+ }
+
+ return true;
+}
+
bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
bool IsFP) {
switch (TLI.getBooleanContents(IsVector, IsFP)) {
@@ -813,3 +1004,10 @@ int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
}
llvm_unreachable("Invalid boolean contents");
}
+
+bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ const auto &F = MBB.getParent()->getFunction();
+ return F.hasOptSize() || F.hasMinSize() ||
+ llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
+}
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 810b10c9c82a..4316034371a5 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -232,11 +232,9 @@ bool HardwareLoops::runOnFunction(Function &F) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
M = F.getParent();
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
- Loop *L = *I;
+ for (Loop *L : *LI)
if (L->isOutermost())
TryConvertLoop(L);
- }
return MadeChange;
}
@@ -408,8 +406,8 @@ Value *HardwareLoop::InitLoopCount() {
}
if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) {
- LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount "
- << *TripCount << "\n");
+ LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " << *TripCount
+ << "\n");
return nullptr;
}
@@ -426,9 +424,9 @@ Value *HardwareLoop::InitLoopCount() {
UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
- << " - Expanded Count in " << BB->getName() << "\n"
- << " - Will insert set counter intrinsic into: "
- << BeginBB->getName() << "\n");
+ << " - Expanded Count in " << BB->getName() << "\n"
+ << " - Will insert set counter intrinsic into: "
+ << BeginBB->getName() << "\n");
return Count;
}
@@ -436,25 +434,32 @@ Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
IRBuilder<> Builder(BeginBB->getTerminator());
Type *Ty = LoopCountInit->getType();
bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
- Intrinsic::ID ID = UseLoopGuard ? Intrinsic::test_set_loop_iterations
- : (UsePhi ? Intrinsic::start_loop_iterations
- : Intrinsic::set_loop_iterations);
+ Intrinsic::ID ID = UseLoopGuard
+ ? (UsePhi ? Intrinsic::test_start_loop_iterations
+ : Intrinsic::test_set_loop_iterations)
+ : (UsePhi ? Intrinsic::start_loop_iterations
+ : Intrinsic::set_loop_iterations);
Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
- Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
+ Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
// Use the return value of the intrinsic to control the entry of the loop.
if (UseLoopGuard) {
assert((isa<BranchInst>(BeginBB->getTerminator()) &&
cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
"Expected conditional branch");
+
+ Value *SetCount =
+ UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
LoopGuard->setCondition(SetCount);
if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
LoopGuard->swapSuccessors();
}
- LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
- << *SetCount << "\n");
- return UseLoopGuard ? LoopCountInit : SetCount;
+ LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
+ << "\n");
+ if (UsePhi && UseLoopGuard)
+ LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
+ return !UsePhi ? LoopCountInit : LoopSetup;
}
void HardwareLoop::InsertLoopDec() {
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index 37be2eabf5fe..681e2f3dc848 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -300,7 +300,7 @@ namespace {
MachineBasicBlock::iterator TIE = TBBInfo.BB->end();
MachineBasicBlock::iterator FIE = FBBInfo.BB->end();
- unsigned Dups1, Dups2;
+ unsigned Dups1 = 0, Dups2 = 0;
if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
*TBBInfo.BB, *FBBInfo.BB,
/*SkipUnconditionalBranches*/ true))
@@ -742,8 +742,8 @@ bool IfConverter::CountDuplicatedInstructions(
bool SkipUnconditionalBranches) const {
while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
- TIB = skipDebugInstructionsForward(TIB, TIE);
- FIB = skipDebugInstructionsForward(FIB, FIE);
+ TIB = skipDebugInstructionsForward(TIB, TIE, false);
+ FIB = skipDebugInstructionsForward(FIB, FIE, false);
if (TIB == TIE || FIB == FIE)
break;
if (!TIB->isIdenticalTo(*FIB))
@@ -785,8 +785,8 @@ bool IfConverter::CountDuplicatedInstructions(
while (RTIE != RTIB && RFIE != RFIB) {
// Skip dbg_value instructions. These do not count.
// Note that these are reverse iterators going forward.
- RTIE = skipDebugInstructionsForward(RTIE, RTIB);
- RFIE = skipDebugInstructionsForward(RFIE, RFIB);
+ RTIE = skipDebugInstructionsForward(RTIE, RTIB, false);
+ RFIE = skipDebugInstructionsForward(RFIE, RFIB, false);
if (RTIE == RTIB || RFIE == RFIB)
break;
if (!RTIE->isIdenticalTo(*RFIE))
@@ -838,8 +838,8 @@ static void verifySameBranchInstructions(
MachineBasicBlock::reverse_iterator E1 = MBB1->rbegin();
MachineBasicBlock::reverse_iterator E2 = MBB2->rbegin();
while (E1 != B1 && E2 != B2) {
- skipDebugInstructionsForward(E1, B1);
- skipDebugInstructionsForward(E2, B2);
+ skipDebugInstructionsForward(E1, B1, false);
+ skipDebugInstructionsForward(E2, B2, false);
if (E1 == B1 && E2 == B2)
break;
@@ -1564,8 +1564,8 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
if (MRI->tracksLiveness()) {
// Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.
- Redefs.addLiveIns(CvtMBB);
- Redefs.addLiveIns(NextMBB);
+ Redefs.addLiveInsNoPristines(CvtMBB);
+ Redefs.addLiveInsNoPristines(NextMBB);
}
// Remove the branches from the entry so we can add the contents of the true
@@ -1665,8 +1665,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// predicated instructions.
Redefs.init(*TRI);
if (MRI->tracksLiveness()) {
- Redefs.addLiveIns(CvtMBB);
- Redefs.addLiveIns(NextMBB);
+ Redefs.addLiveInsNoPristines(CvtMBB);
+ Redefs.addLiveInsNoPristines(NextMBB);
}
bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
@@ -1828,14 +1828,14 @@ bool IfConverter::IfConvertDiamondCommon(
// after tracking the BB1 instructions.
Redefs.init(*TRI);
if (MRI->tracksLiveness()) {
- Redefs.addLiveIns(MBB1);
- Redefs.addLiveIns(MBB2);
+ Redefs.addLiveInsNoPristines(MBB1);
+ Redefs.addLiveInsNoPristines(MBB2);
}
// Remove the duplicated instructions at the beginnings of both paths.
// Skip dbg_value instructions.
- MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr();
- MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr();
+ MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr(false);
+ MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr(false);
BBI1->NonPredSize -= NumDups1;
BBI2->NonPredSize -= NumDups1;
diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 5cdaa9b74e80..0882ce366c9c 100644
--- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -353,10 +353,9 @@ ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI,
return AR_MayAlias;
continue;
}
- llvm::AliasResult AAResult = AA->alias(
- MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
- MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo()));
- if (AAResult != NoAlias)
+ if (!AA->isNoAlias(
+ MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
+ MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo())))
return AR_MayAlias;
}
}
diff --git a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index 4473a139d3ad..e4606daba352 100644
--- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -28,9 +28,11 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -59,6 +61,10 @@ public:
initializeIndirectBrExpandPassPass(*PassRegistry::getPassRegistry());
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
bool runOnFunction(Function &F) override;
};
@@ -66,8 +72,11 @@ public:
char IndirectBrExpandPass::ID = 0;
-INITIALIZE_PASS(IndirectBrExpandPass, DEBUG_TYPE,
- "Expand indirectbr instructions", false, false)
+INITIALIZE_PASS_BEGIN(IndirectBrExpandPass, DEBUG_TYPE,
+ "Expand indirectbr instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(IndirectBrExpandPass, DEBUG_TYPE,
+ "Expand indirectbr instructions", false, false)
FunctionPass *llvm::createIndirectBrExpandPass() {
return new IndirectBrExpandPass();
@@ -85,6 +94,10 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
return false;
TLI = STI.getTargetLowering();
+ Optional<DomTreeUpdater> DTU;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy);
+
SmallVector<IndirectBrInst *, 1> IndirectBrs;
// Set of all potential successors for indirectbr instructions.
@@ -158,10 +171,22 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
if (BBs.empty()) {
// There are no blocks whose address is taken, so any indirectbr instruction
// cannot get a valid input and we can replace all of them with unreachable.
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ if (DTU)
+ Updates.reserve(IndirectBrSuccs.size());
for (auto *IBr : IndirectBrs) {
+ if (DTU) {
+ for (BasicBlock *SuccBB : IBr->successors())
+ Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB});
+ }
(void)new UnreachableInst(F.getContext(), IBr);
IBr->eraseFromParent();
}
+ if (DTU) {
+ assert(Updates.size() == IndirectBrSuccs.size() &&
+ "Got unexpected update count.");
+ DTU->applyUpdates(Updates);
+ }
return true;
}
@@ -183,12 +208,22 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
Twine(IBr->getAddress()->getName()) + ".switch_cast", IBr);
};
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+
if (IndirectBrs.size() == 1) {
// If we only have one indirectbr, we can just directly replace it within
// its block.
- SwitchBB = IndirectBrs[0]->getParent();
- SwitchValue = GetSwitchValue(IndirectBrs[0]);
- IndirectBrs[0]->eraseFromParent();
+ IndirectBrInst *IBr = IndirectBrs[0];
+ SwitchBB = IBr->getParent();
+ SwitchValue = GetSwitchValue(IBr);
+ if (DTU) {
+ Updates.reserve(IndirectBrSuccs.size());
+ for (BasicBlock *SuccBB : IBr->successors())
+ Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB});
+ assert(Updates.size() == IndirectBrSuccs.size() &&
+ "Got unexpected update count.");
+ }
+ IBr->eraseFromParent();
} else {
// Otherwise we need to create a new block to hold the switch across BBs,
// jump to that block instead of each indirectbr, and phi together the
@@ -200,9 +235,16 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
// Now replace the indirectbr instructions with direct branches to the
// switch block and fill out the PHI operands.
+ if (DTU)
+ Updates.reserve(IndirectBrs.size() + 2 * IndirectBrSuccs.size());
for (auto *IBr : IndirectBrs) {
SwitchPN->addIncoming(GetSwitchValue(IBr), IBr->getParent());
BranchInst::Create(SwitchBB, IBr);
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, IBr->getParent(), SwitchBB});
+ for (BasicBlock *SuccBB : IBr->successors())
+ Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB});
+ }
IBr->eraseFromParent();
}
}
@@ -215,5 +257,15 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
for (int i : llvm::seq<int>(1, BBs.size()))
SI->addCase(ConstantInt::get(CommonITy, i + 1), BBs[i]);
+ if (DTU) {
+ // If there were multiple indirectbr's, they may have common successors,
+ // but in the dominator tree, we only track unique edges.
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessors(BBs.begin(), BBs.end());
+ Updates.reserve(Updates.size() + UniqueSuccessors.size());
+ for (BasicBlock *BB : UniqueSuccessors)
+ Updates.push_back({DominatorTree::Insert, SwitchBB, BB});
+ DTU->applyUpdates(Updates);
+ }
+
return true;
}
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 876e1d3f932a..71e91b445d9a 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -173,7 +173,7 @@ class InlineSpiller : public Spiller {
LiveRangeEdit *Edit;
LiveInterval *StackInt;
int StackSlot;
- unsigned Original;
+ Register Original;
// All registers to spill to StackSlot, including the main register.
SmallVector<Register, 8> RegsToSpill;
@@ -191,19 +191,23 @@ class InlineSpiller : public Spiller {
// Object records spills information and does the hoisting.
HoistSpillHelper HSpiller;
+ // Live range weight calculator.
+ VirtRegAuxInfo &VRAI;
+
~InlineSpiller() override = default;
public:
- InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
- : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
- LSS(pass.getAnalysis<LiveStacks>()),
- AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
- MDT(pass.getAnalysis<MachineDominatorTree>()),
- Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
- MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
- TRI(*mf.getSubtarget().getRegisterInfo()),
- MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
- HSpiller(pass, mf, vrm) {}
+ InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM,
+ VirtRegAuxInfo &VRAI)
+ : MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()),
+ LSS(Pass.getAnalysis<LiveStacks>()),
+ AA(&Pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
+ MDT(Pass.getAnalysis<MachineDominatorTree>()),
+ Loops(Pass.getAnalysis<MachineLoopInfo>()), VRM(VRM),
+ MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
+ TRI(*MF.getSubtarget().getRegisterInfo()),
+ MBFI(Pass.getAnalysis<MachineBlockFrequencyInfo>()),
+ HSpiller(Pass, MF, VRM), VRAI(VRAI) {}
void spill(LiveRangeEdit &) override;
void postOptimization() override;
@@ -239,10 +243,10 @@ Spiller::~Spiller() = default;
void Spiller::anchor() {}
-Spiller *llvm::createInlineSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm) {
- return new InlineSpiller(pass, mf, vrm);
+Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass,
+ MachineFunction &MF, VirtRegMap &VRM,
+ VirtRegAuxInfo &VRAI) {
+ return new InlineSpiller(Pass, MF, VRM, VRAI);
}
//===----------------------------------------------------------------------===//
@@ -1044,7 +1048,7 @@ void InlineSpiller::spillAroundUses(Register Reg) {
// Modify DBG_VALUE now that the value is in a spill slot.
MachineBasicBlock *MBB = MI->getParent();
LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI);
- buildDbgValueForSpill(*MBB, MI, *MI, StackSlot);
+ buildDbgValueForSpill(*MBB, MI, *MI, StackSlot, Reg);
MBB->erase(MI);
continue;
}
@@ -1200,7 +1204,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
if (!RegsToSpill.empty())
spillAll();
- Edit->calculateRegClassAndHint(MF, Loops, MBFI);
+ Edit->calculateRegClassAndHint(MF, VRAI);
}
/// Optimizations after all the reg selections and spills are done.
@@ -1241,13 +1245,16 @@ bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill,
/// i.e., there should be a living sibling of OrigReg at the insert point.
bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
MachineBasicBlock &BB, Register &LiveReg) {
- SlotIndex Idx;
+ SlotIndex Idx = IPA.getLastInsertPoint(OrigLI, BB);
+ // The original def could be after the last insert point in the root block,
+ // we can't hoist to here.
+ if (Idx < OrigVNI.def) {
+ // TODO: We could be better here. If LI is not alive in landing pad
+ // we could hoist spill after LIP.
+ LLVM_DEBUG(dbgs() << "can't spill in root block - def after LIP\n");
+ return false;
+ }
Register OrigReg = OrigLI.reg();
- MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB);
- if (MI != BB.end())
- Idx = LIS.getInstructionIndex(*MI);
- else
- Idx = LIS.getMBBEndIdx(&BB).getPrevSlot();
SmallSetVector<Register, 16> &Siblings = Virt2SiblingsMap[OrigReg];
assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI");
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index b22e6faeb91c..24a57cc21c57 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -385,8 +385,7 @@ bool InterleavedAccess::lowerInterleavedLoad(
return !Extracts.empty() || BinOpShuffleChanged;
}
- for (auto SVI : Shuffles)
- DeadInsts.push_back(SVI);
+ append_range(DeadInsts, Shuffles);
DeadInsts.push_back(LI);
return true;
@@ -409,8 +408,8 @@ bool InterleavedAccess::replaceBinOpShuffles(
auto *NewSVI2 = new ShuffleVectorInst(
BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
SVI->getName(), SVI);
- Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2,
- BI->getName(), SVI);
+ BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags(
+ BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI);
SVI->replaceAllUsesWith(NewBI);
LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
<< "\n With : " << *NewSVI1 << "\n And : "
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index ff3f93d51ea8..71bfb1d87d66 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index f9b7bf613ff6..37c0b44ea2b2 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -64,8 +64,12 @@ void LLVMTargetMachine::initAsmInfo() {
if (Options.BinutilsVersion.first > 0)
TmpAsmInfo->setBinutilsVersion(Options.BinutilsVersion);
- if (Options.DisableIntegratedAS)
+ if (Options.DisableIntegratedAS) {
TmpAsmInfo->setUseIntegratedAssembler(false);
+ // If there is explict option disable integratedAS, we can't use it for
+ // inlineasm either.
+ TmpAsmInfo->setParseInlineAsmUsingAsmParser(false);
+ }
TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments);
diff --git a/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index 8a7a41d0f763..c3e0553418a5 100644
--- a/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -55,9 +55,8 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
/// of SU, return it, otherwise return null.
SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
SUnit *OnlyAvailablePred = nullptr;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- SUnit &Pred = *I->getSUnit();
+ for (const SDep &P : SU->Preds) {
+ SUnit &Pred = *P.getSUnit();
if (!Pred.isScheduled) {
// We found an available, but not scheduled, predecessor. If it's the
// only one we have found, keep track of it... otherwise give up.
@@ -90,10 +89,8 @@ void LatencyPriorityQueue::push(SUnit *SU) {
// single predecessor has a higher priority, since scheduling it will make
// the node available.
void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- AdjustPriorityOfUnscheduledPreds(I->getSUnit());
- }
+ for (const SDep &Succ : SU->Succs)
+ AdjustPriorityOfUnscheduledPreds(Succ.getSUnit());
}
/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
diff --git a/llvm/lib/CodeGen/LexicalScopes.cpp b/llvm/lib/CodeGen/LexicalScopes.cpp
index 8139c2cbb6cd..47c19c3d8ec4 100644
--- a/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -75,6 +75,11 @@ void LexicalScopes::extractLexicalScopes(
const MachineInstr *PrevMI = nullptr;
const DILocation *PrevDL = nullptr;
for (const auto &MInsn : MBB) {
+ // Ignore DBG_VALUE and similar instruction that do not contribute to any
+ // instruction in the output.
+ if (MInsn.isMetaInstruction())
+ continue;
+
// Check if instruction has valid location information.
const DILocation *MIDL = MInsn.getDebugLoc();
if (!MIDL) {
@@ -88,11 +93,6 @@ void LexicalScopes::extractLexicalScopes(
continue;
}
- // Ignore DBG_VALUE and similar instruction that do not contribute to any
- // instruction in the output.
- if (MInsn.isMetaInstruction())
- continue;
-
if (RangeBeginMI) {
// If we have already seen a beginning of an instruction range and
// current instruction scope does not match scope of first instruction
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 18ffe8ba0669..dc9907058340 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -148,6 +148,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -160,6 +161,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
@@ -184,6 +186,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -199,23 +203,16 @@
using namespace llvm;
+// SSAUpdaterImple sets DEBUG_TYPE, change it.
+#undef DEBUG_TYPE
#define DEBUG_TYPE "livedebugvalues"
-STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
-STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed");
-
// Act more like the VarLoc implementation, by propagating some locations too
// far and ignoring some transfers.
static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden,
cl::desc("Act like old LiveDebugValues did"),
cl::init(false));
-// Rely on isStoreToStackSlotPostFE and similar to observe all stack spills.
-static cl::opt<bool>
- ObserveAllStackops("observe-all-stack-ops", cl::Hidden,
- cl::desc("Allow non-kill spill and restores"),
- cl::init(false));
-
namespace {
// The location at which a spilled value resides. It consists of a register and
@@ -959,25 +956,27 @@ public:
class TransferTracker {
public:
const TargetInstrInfo *TII;
+ const TargetLowering *TLI;
/// This machine location tracker is assumed to always contain the up-to-date
/// value mapping for all machine locations. TransferTracker only reads
/// information from it. (XXX make it const?)
MLocTracker *MTracker;
MachineFunction &MF;
+ bool ShouldEmitDebugEntryValues;
/// Record of all changes in variable locations at a block position. Awkwardly
/// we allow inserting either before or after the point: MBB != nullptr
/// indicates it's before, otherwise after.
struct Transfer {
- MachineBasicBlock::iterator Pos; /// Position to insert DBG_VALUes
- MachineBasicBlock *MBB; /// non-null if we should insert after.
+ MachineBasicBlock::instr_iterator Pos; /// Position to insert DBG_VALUes
+ MachineBasicBlock *MBB; /// non-null if we should insert after.
SmallVector<MachineInstr *, 4> Insts; /// Vector of DBG_VALUEs to insert.
};
- typedef struct {
+ struct LocAndProperties {
LocIdx Loc;
DbgValueProperties Properties;
- } LocAndProperties;
+ };
/// Collection of transfers (DBG_VALUEs) to be inserted.
SmallVector<Transfer, 32> Transfers;
@@ -1027,9 +1026,13 @@ public:
TransferTracker(const TargetInstrInfo *TII, MLocTracker *MTracker,
MachineFunction &MF, const TargetRegisterInfo &TRI,
- const BitVector &CalleeSavedRegs)
+ const BitVector &CalleeSavedRegs, const TargetPassConfig &TPC)
: TII(TII), MTracker(MTracker), MF(MF), TRI(TRI),
- CalleeSavedRegs(CalleeSavedRegs) {}
+ CalleeSavedRegs(CalleeSavedRegs) {
+ TLI = MF.getSubtarget().getTargetLowering();
+ auto &TM = TPC.getTM<TargetMachine>();
+ ShouldEmitDebugEntryValues = TM.Options.ShouldEmitDebugEntryValues();
+ }
/// Load object with live-in variable values. \p mlocs contains the live-in
/// values in each machine location, while \p vlocs the live-in variable
@@ -1097,6 +1100,8 @@ public:
// use-before-def to be resolved as we step through the block.
if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI())
addUseBeforeDef(Var.first, Var.second.Properties, Num);
+ else
+ recoverAsEntryValue(Var.first, Var.second.Properties, Num);
continue;
}
@@ -1152,10 +1157,73 @@ public:
/// Helper to move created DBG_VALUEs into Transfers collection.
void flushDbgValues(MachineBasicBlock::iterator Pos, MachineBasicBlock *MBB) {
- if (PendingDbgValues.size() > 0) {
- Transfers.push_back({Pos, MBB, PendingDbgValues});
- PendingDbgValues.clear();
- }
+ if (PendingDbgValues.size() == 0)
+ return;
+
+ // Pick out the instruction start position.
+ MachineBasicBlock::instr_iterator BundleStart;
+ if (MBB && Pos == MBB->begin())
+ BundleStart = MBB->instr_begin();
+ else
+ BundleStart = getBundleStart(Pos->getIterator());
+
+ Transfers.push_back({BundleStart, MBB, PendingDbgValues});
+ PendingDbgValues.clear();
+ }
+
+ bool isEntryValueVariable(const DebugVariable &Var,
+ const DIExpression *Expr) const {
+ if (!Var.getVariable()->isParameter())
+ return false;
+
+ if (Var.getInlinedAt())
+ return false;
+
+ if (Expr->getNumElements() > 0)
+ return false;
+
+ return true;
+ }
+
+ bool isEntryValueValue(const ValueIDNum &Val) const {
+ // Must be in entry block (block number zero), and be a PHI / live-in value.
+ if (Val.getBlock() || !Val.isPHI())
+ return false;
+
+ // Entry values must enter in a register.
+ if (MTracker->isSpill(Val.getLoc()))
+ return false;
+
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register FP = TRI.getFrameRegister(MF);
+ Register Reg = MTracker->LocIdxToLocID[Val.getLoc()];
+ return Reg != SP && Reg != FP;
+ }
+
+ bool recoverAsEntryValue(const DebugVariable &Var, DbgValueProperties &Prop,
+ const ValueIDNum &Num) {
+ // Is this variable location a candidate to be an entry value. First,
+ // should we be trying this at all?
+ if (!ShouldEmitDebugEntryValues)
+ return false;
+
+ // Is the variable appropriate for entry values (i.e., is a parameter).
+ if (!isEntryValueVariable(Var, Prop.DIExpr))
+ return false;
+
+ // Is the value assigned to this variable still the entry value?
+ if (!isEntryValueValue(Num))
+ return false;
+
+ // Emit a variable location using an entry value expression.
+ DIExpression *NewExpr =
+ DIExpression::prepend(Prop.DIExpr, DIExpression::EntryValue);
+ Register Reg = MTracker->LocIdxToLocID[Num.getLoc()];
+ MachineOperand MO = MachineOperand::CreateReg(Reg, false);
+ MO.setIsDebug(true);
+
+ PendingDbgValues.push_back(emitMOLoc(MO, Var, {NewExpr, Prop.Indirect}));
+ return true;
}
/// Change a variable value after encountering a DBG_VALUE inside a block.
@@ -1224,26 +1292,70 @@ public:
}
}
- /// Explicitly terminate variable locations based on \p mloc. Creates undef
- /// DBG_VALUEs for any variables that were located there, and clears
- /// #ActiveMLoc / #ActiveVLoc tracking information for that location.
- void clobberMloc(LocIdx MLoc, MachineBasicBlock::iterator Pos) {
- assert(MTracker->isSpill(MLoc));
+ /// Account for a location \p mloc being clobbered. Examine the variable
+ /// locations that will be terminated: and try to recover them by using
+ /// another location. Optionally, given \p MakeUndef, emit a DBG_VALUE to
+ /// explicitly terminate a location if it can't be recovered.
+ void clobberMloc(LocIdx MLoc, MachineBasicBlock::iterator Pos,
+ bool MakeUndef = true) {
auto ActiveMLocIt = ActiveMLocs.find(MLoc);
if (ActiveMLocIt == ActiveMLocs.end())
return;
+ // What was the old variable value?
+ ValueIDNum OldValue = VarLocs[MLoc.asU64()];
VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue;
+ // Examine the remaining variable locations: if we can find the same value
+ // again, we can recover the location.
+ Optional<LocIdx> NewLoc = None;
+ for (auto Loc : MTracker->locations())
+ if (Loc.Value == OldValue)
+ NewLoc = Loc.Idx;
+
+ // If there is no location, and we weren't asked to make the variable
+ // explicitly undef, then stop here.
+ if (!NewLoc && !MakeUndef) {
+ // Try and recover a few more locations with entry values.
+ for (auto &Var : ActiveMLocIt->second) {
+ auto &Prop = ActiveVLocs.find(Var)->second.Properties;
+ recoverAsEntryValue(Var, Prop, OldValue);
+ }
+ flushDbgValues(Pos, nullptr);
+ return;
+ }
+
+ // Examine all the variables based on this location.
+ DenseSet<DebugVariable> NewMLocs;
for (auto &Var : ActiveMLocIt->second) {
auto ActiveVLocIt = ActiveVLocs.find(Var);
- // Create an undef. We can't feed in a nullptr DIExpression alas,
- // so use the variables last expression. Pass None as the location.
+ // Re-state the variable location: if there's no replacement then NewLoc
+ // is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE
+ // identifying the alternative location will be emitted.
const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr;
DbgValueProperties Properties(Expr, false);
- PendingDbgValues.push_back(MTracker->emitLoc(None, Var, Properties));
- ActiveVLocs.erase(ActiveVLocIt);
+ PendingDbgValues.push_back(MTracker->emitLoc(NewLoc, Var, Properties));
+
+ // Update machine locations <=> variable locations maps. Defer updating
+ // ActiveMLocs to avoid invalidaing the ActiveMLocIt iterator.
+ if (!NewLoc) {
+ ActiveVLocs.erase(ActiveVLocIt);
+ } else {
+ ActiveVLocIt->second.Loc = *NewLoc;
+ NewMLocs.insert(Var);
+ }
}
+
+ // Commit any deferred ActiveMLoc changes.
+ if (!NewMLocs.empty())
+ for (auto &Var : NewMLocs)
+ ActiveMLocs[*NewLoc].insert(Var);
+
+ // We lazily track what locations have which values; if we've found a new
+ // location for the clobbered value, remember it.
+ if (NewLoc)
+ VarLocs[NewLoc->asU64()] = OldValue;
+
flushDbgValues(Pos, nullptr);
ActiveMLocIt->second.clear();
@@ -1332,6 +1444,7 @@ private:
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
const TargetFrameLowering *TFI;
+ const MachineFrameInfo *MFI;
BitVector CalleeSavedRegs;
LexicalScopes LS;
TargetPassConfig *TPC;
@@ -1372,6 +1485,23 @@ private:
/// instruction numbers in DBG_INSTR_REFs into machine value numbers.
std::map<uint64_t, InstAndNum> DebugInstrNumToInstr;
+ /// Record of where we observed a DBG_PHI instruction.
+ class DebugPHIRecord {
+ public:
+ uint64_t InstrNum; ///< Instruction number of this DBG_PHI.
+ MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred.
+ ValueIDNum ValueRead; ///< The value number read by the DBG_PHI.
+ LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads.
+
+ operator unsigned() const { return InstrNum; }
+ };
+
+ /// Map from instruction numbers defined by DBG_PHIs to a record of what that
+ /// DBG_PHI read and where. Populated and edited during the machine value
+ /// location problem -- we use LLVMs SSA Updater to fix changes by
+ /// optimizations that destroy PHI instructions.
+ SmallVector<DebugPHIRecord, 32> DebugPHINumToValue;
+
// Map of overlapping variable fragments.
OverlapMap OverlapFragments;
VarToFragments SeenFragments;
@@ -1398,7 +1528,8 @@ private:
SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
/// Observe a single instruction while stepping through a block.
- void process(MachineInstr &MI);
+ void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr,
+ ValueIDNum **MLiveIns = nullptr);
/// Examines whether \p MI is a DBG_VALUE and notifies trackers.
/// \returns true if MI was recognized and processed.
@@ -1406,7 +1537,13 @@ private:
/// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
/// \returns true if MI was recognized and processed.
- bool transferDebugInstrRef(MachineInstr &MI);
+ bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns);
+
+ /// Stores value-information about where this PHI occurred, and what
+ /// instruction number is associated with it.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugPHI(MachineInstr &MI);
/// Examines whether \p MI is copy instruction, and notifies trackers.
/// \returns true if MI was recognized and processed.
@@ -1425,6 +1562,18 @@ private:
void accumulateFragmentMap(MachineInstr &MI);
+ /// Determine the machine value number referred to by (potentially several)
+ /// DBG_PHI instructions. Block duplication and tail folding can duplicate
+ /// DBG_PHIs, shifting the position where values in registers merge, and
+ /// forming another mini-ssa problem to solve.
+ /// \p Here the position of a DBG_INSTR_REF seeking a machine value number
+ /// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
+ /// \returns The machine value number at position Here, or None.
+ Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
+ ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns, MachineInstr &Here,
+ uint64_t InstrNum);
+
/// Step through the function, recording register definitions and movements
/// in an MLocTracker. Convert the observations into a per-block transfer
/// function in \p MLocTransfer, suitable for using with the machine value
@@ -1527,8 +1676,9 @@ private:
/// right now "order of appearence in function, when explored in RPO", so
/// that we can compare explictly against VarLocBasedImpl.
void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns,
- ValueIDNum **MInLocs,
- DenseMap<DebugVariable, unsigned> &AllVarsNumbering);
+ ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
+ const TargetPassConfig &TPC);
/// Boilerplate computation of some initial sets, artifical blocks and
/// RPOT block ordering.
@@ -1640,7 +1790,9 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
return true;
}
-bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) {
+bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
+ ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns) {
if (!MI.isDebugRef())
return false;
@@ -1669,12 +1821,22 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) {
// Various optimizations may have happened to the value during codegen,
// recorded in the value substitution table. Apply any substitutions to
- // the instruction / operand number in this DBG_INSTR_REF.
- auto Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo));
- while (Sub != MF.DebugValueSubstitutions.end()) {
- InstNo = Sub->second.first;
- OpNo = Sub->second.second;
- Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo));
+ // the instruction / operand number in this DBG_INSTR_REF, and collect
+ // any subregister extractions performed during optimization.
+
+ // Create dummy substitution with Src set, for lookup.
+ auto SoughtSub =
+ MachineFunction::DebugSubstitution({InstNo, OpNo}, {0, 0}, 0);
+
+ SmallVector<unsigned, 4> SeenSubregs;
+ auto LowerBoundIt = llvm::lower_bound(MF.DebugValueSubstitutions, SoughtSub);
+ while (LowerBoundIt != MF.DebugValueSubstitutions.end() &&
+ LowerBoundIt->Src == SoughtSub.Src) {
+ std::tie(InstNo, OpNo) = LowerBoundIt->Dest;
+ SoughtSub.Src = LowerBoundIt->Dest;
+ if (unsigned Subreg = LowerBoundIt->Subreg)
+ SeenSubregs.push_back(Subreg);
+ LowerBoundIt = llvm::lower_bound(MF.DebugValueSubstitutions, SoughtSub);
}
// Default machine value number is <None> -- if no instruction defines
@@ -1682,8 +1844,10 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) {
Optional<ValueIDNum> NewID = None;
// Try to lookup the instruction number, and find the machine value number
- // that it defines.
+ // that it defines. It could be an instruction, or a PHI.
auto InstrIt = DebugInstrNumToInstr.find(InstNo);
+ auto PHIIt = std::lower_bound(DebugPHINumToValue.begin(),
+ DebugPHINumToValue.end(), InstNo);
if (InstrIt != DebugInstrNumToInstr.end()) {
const MachineInstr &TargetInstr = *InstrIt->second.first;
uint64_t BlockNo = TargetInstr.getParent()->getNumber();
@@ -1698,6 +1862,82 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) {
unsigned LocID = MTracker->getLocID(MO.getReg(), false);
LocIdx L = MTracker->LocIDToLocIdx[LocID];
NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ } else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) {
+ // It's actually a PHI value. Which value it is might not be obvious, use
+ // the resolver helper to find out.
+ NewID = resolveDbgPHIs(*MI.getParent()->getParent(), MLiveOuts, MLiveIns,
+ MI, InstNo);
+ }
+
+ // Apply any subregister extractions, in reverse. We might have seen code
+ // like this:
+ // CALL64 @foo, implicit-def $rax
+ // %0:gr64 = COPY $rax
+ // %1:gr32 = COPY %0.sub_32bit
+ // %2:gr16 = COPY %1.sub_16bit
+ // %3:gr8 = COPY %2.sub_8bit
+ // In which case each copy would have been recorded as a substitution with
+ // a subregister qualifier. Apply those qualifiers now.
+ if (NewID && !SeenSubregs.empty()) {
+ unsigned Offset = 0;
+ unsigned Size = 0;
+
+ // Look at each subregister that we passed through, and progressively
+ // narrow in, accumulating any offsets that occur. Substitutions should
+ // only ever be the same or narrower width than what they read from;
+ // iterate in reverse order so that we go from wide to small.
+ for (unsigned Subreg : reverse(SeenSubregs)) {
+ unsigned ThisSize = TRI->getSubRegIdxSize(Subreg);
+ unsigned ThisOffset = TRI->getSubRegIdxOffset(Subreg);
+ Offset += ThisOffset;
+ Size = (Size == 0) ? ThisSize : std::min(Size, ThisSize);
+ }
+
+ // If that worked, look for an appropriate subregister with the register
+ // where the define happens. Don't look at values that were defined during
+ // a stack write: we can't currently express register locations within
+ // spills.
+ LocIdx L = NewID->getLoc();
+ if (NewID && !MTracker->isSpill(L)) {
+ // Find the register class for the register where this def happened.
+ // FIXME: no index for this?
+ Register Reg = MTracker->LocIdxToLocID[L];
+ const TargetRegisterClass *TRC = nullptr;
+ for (auto *TRCI : TRI->regclasses())
+ if (TRCI->contains(Reg))
+ TRC = TRCI;
+ assert(TRC && "Couldn't find target register class?");
+
+ // If the register we have isn't the right size or in the right place,
+ // Try to find a subregister inside it.
+ unsigned MainRegSize = TRI->getRegSizeInBits(*TRC);
+ if (Size != MainRegSize || Offset) {
+ // Enumerate all subregisters, searching.
+ Register NewReg = 0;
+ for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
+ unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI);
+ unsigned SubregSize = TRI->getSubRegIdxSize(Subreg);
+ unsigned SubregOffset = TRI->getSubRegIdxOffset(Subreg);
+ if (SubregSize == Size && SubregOffset == Offset) {
+ NewReg = *SRI;
+ break;
+ }
+ }
+
+ // If we didn't find anything: there's no way to express our value.
+ if (!NewReg) {
+ NewID = None;
+ } else {
+ // Re-state the value as being defined within the subregister
+ // that we found.
+ LocIdx NewLoc = MTracker->lookupOrTrackRegister(NewReg);
+ NewID = ValueIDNum(NewID->getBlock(), NewID->getInst(), NewLoc);
+ }
+ }
+ } else {
+ // If we can't handle subregisters, unset the new value.
+ NewID = None;
+ }
}
// We, we have a value number or None. Tell the variable value tracker about
@@ -1752,6 +1992,55 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) {
MachineInstr *DbgMI = MTracker->emitLoc(FoundLoc, V, Properties);
TTracker->PendingDbgValues.push_back(DbgMI);
TTracker->flushDbgValues(MI.getIterator(), nullptr);
+ return true;
+}
+
+bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
+ if (!MI.isDebugPHI())
+ return false;
+
+ // Analyse these only when solving the machine value location problem.
+ if (VTracker || TTracker)
+ return true;
+
+ // First operand is the value location, either a stack slot or register.
+ // Second is the debug instruction number of the original PHI.
+ const MachineOperand &MO = MI.getOperand(0);
+ unsigned InstrNum = MI.getOperand(1).getImm();
+
+ if (MO.isReg()) {
+ // The value is whatever's currently in the register. Read and record it,
+ // to be analysed later.
+ Register Reg = MO.getReg();
+ ValueIDNum Num = MTracker->readReg(Reg);
+ auto PHIRec = DebugPHIRecord(
+ {InstrNum, MI.getParent(), Num, MTracker->lookupOrTrackRegister(Reg)});
+ DebugPHINumToValue.push_back(PHIRec);
+ } else {
+ // The value is whatever's in this stack slot.
+ assert(MO.isFI());
+ unsigned FI = MO.getIndex();
+
+ // If the stack slot is dead, then this was optimized away.
+ // FIXME: stack slot colouring should account for slots that get merged.
+ if (MFI->isDeadObjectIndex(FI))
+ return true;
+
+ // Identify this spill slot.
+ Register Base;
+ StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base);
+ SpillLoc SL = {Base, Offs};
+ Optional<ValueIDNum> Num = MTracker->readSpill(SL);
+
+ if (!Num)
+ // Nothing ever writes to this slot. Curious, but nothing we can do.
+ return true;
+
+ // Record this DBG_PHI for later analysis.
+ auto DbgPHI = DebugPHIRecord(
+ {InstrNum, MI.getParent(), *Num, *MTracker->getSpillMLoc(SL)});
+ DebugPHINumToValue.push_back(DbgPHI);
+ }
return true;
}
@@ -1803,6 +2092,32 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
for (auto *MO : RegMaskPtrs)
MTracker->writeRegMask(MO, CurBB, CurInst);
+
+ if (!TTracker)
+ return;
+
+ // When committing variable values to locations: tell transfer tracker that
+ // we've clobbered things. It may be able to recover the variable from a
+ // different location.
+
+ // Inform TTracker about any direct clobbers.
+ for (uint32_t DeadReg : DeadRegs) {
+ LocIdx Loc = MTracker->lookupOrTrackRegister(DeadReg);
+ TTracker->clobberMloc(Loc, MI.getIterator(), false);
+ }
+
+ // Look for any clobbers performed by a register mask. Only test locations
+ // that are actually being tracked.
+ for (auto L : MTracker->locations()) {
+ // Stack locations can't be clobbered by regmasks.
+ if (MTracker->isSpill(L.Idx))
+ continue;
+
+ Register Reg = MTracker->LocIdxToLocID[L.Idx];
+ for (auto *MO : RegMaskPtrs)
+ if (MO->clobbersPhysReg(Reg))
+ TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
+ }
}
void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
@@ -1871,47 +2186,9 @@ bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI,
if (!isSpillInstruction(MI, MF))
return false;
- // XXX FIXME: On x86, isStoreToStackSlotPostFE returns '1' instead of an
- // actual register number.
- if (ObserveAllStackops) {
- int FI;
- Reg = TII->isStoreToStackSlotPostFE(MI, FI);
- return Reg != 0;
- }
-
- auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
- if (!MO.isReg() || !MO.isUse()) {
- Reg = 0;
- return false;
- }
- Reg = MO.getReg();
- return MO.isKill();
- };
-
- for (const MachineOperand &MO : MI.operands()) {
- // In a spill instruction generated by the InlineSpiller the spilled
- // register has its kill flag set.
- if (isKilledReg(MO, Reg))
- return true;
- if (Reg != 0) {
- // Check whether next instruction kills the spilled register.
- // FIXME: Current solution does not cover search for killed register in
- // bundles and instructions further down the chain.
- auto NextI = std::next(MI.getIterator());
- // Skip next instruction that points to basic block end iterator.
- if (MI.getParent()->end() == NextI)
- continue;
- unsigned RegNext;
- for (const MachineOperand &MONext : NextI->operands()) {
- // Return true if we came across the register from the
- // previous spill instruction that is killed in NextI.
- if (isKilledReg(MONext, RegNext) && RegNext == Reg)
- return true;
- }
- }
- }
- // Return false if we didn't find spilled register.
- return false;
+ int FI;
+ Reg = TII->isStoreToStackSlotPostFE(MI, FI);
+ return Reg != 0;
}
Optional<SpillLoc>
@@ -1950,8 +2227,12 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
if (TTracker) {
Optional<LocIdx> MLoc = MTracker->getSpillMLoc(*Loc);
- if (MLoc)
+ if (MLoc) {
+ // Un-set this location before clobbering, so that we don't salvage
+ // the variable location back to the same place.
+ MTracker->setMLoc(*MLoc, ValueIDNum::EmptyValue);
TTracker->clobberMloc(*MLoc, MI.getIterator());
+ }
}
}
@@ -2066,6 +2347,15 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
if (EmulateOldLDV && SrcReg != DestReg)
MTracker->defReg(SrcReg, CurBB, CurInst);
+ // Finally, the copy might have clobbered variables based on the destination
+ // register. Tell TTracker about it, in case a backup location exists.
+ if (TTracker) {
+ for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) {
+ LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI);
+ TTracker->clobberMloc(ClobberedLoc, MI.getIterator(), false);
+ }
+ }
+
return true;
}
@@ -2124,13 +2414,16 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
AllSeenFragments.insert(ThisFragment);
}
-void InstrRefBasedLDV::process(MachineInstr &MI) {
+void InstrRefBasedLDV::process(MachineInstr &MI, ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns) {
// Try to interpret an MI as a debug or transfer instruction. Only if it's
// none of these should we interpret it's register defs as new value
// definitions.
if (transferDebugValue(MI))
return;
- if (transferDebugInstrRef(MI))
+ if (transferDebugInstrRef(MI, MLiveOuts, MLiveIns))
+ return;
+ if (transferDebugPHI(MI))
return;
if (transferRegisterCopy(MI))
return;
@@ -2641,9 +2934,7 @@ std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin(
auto &ILS = *ILSIt->second;
// Order predecessors by RPOT order, for exploring them in that order.
- SmallVector<MachineBasicBlock *, 8> BlockOrders;
- for (auto p : MBB.predecessors())
- BlockOrders.push_back(p);
+ SmallVector<MachineBasicBlock *, 8> BlockOrders(MBB.predecessors());
auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
return BBToOrder[A] < BBToOrder[B];
@@ -3128,9 +3419,10 @@ void InstrRefBasedLDV::dump_mloc_transfer(
#endif
void InstrRefBasedLDV::emitLocations(
- MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MInLocs,
- DenseMap<DebugVariable, unsigned> &AllVarsNumbering) {
- TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs);
+ MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MOutLocs,
+ ValueIDNum **MInLocs, DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
+ const TargetPassConfig &TPC) {
+ TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC);
unsigned NumLocs = MTracker->getNumLocs();
// For each block, load in the machine value locations and variable value
@@ -3146,7 +3438,7 @@ void InstrRefBasedLDV::emitLocations(
CurBB = bbnum;
CurInst = 1;
for (auto &MI : MBB) {
- process(MI);
+ process(MI, MOutLocs, MInLocs);
TTracker->checkInstForNewValues(CurInst, MI.getIterator());
++CurInst;
}
@@ -3178,9 +3470,14 @@ void InstrRefBasedLDV::emitLocations(
MBB.insert(P.Pos, MI);
}
} else {
+ // Terminators, like tail calls, can clobber things. Don't try and place
+ // transfers after them.
+ if (P.Pos->isTerminator())
+ continue;
+
MachineBasicBlock &MBB = *P.Pos->getParent();
for (auto *MI : P.Insts) {
- MBB.insertAfter(P.Pos, MI);
+ MBB.insertAfterBundle(P.Pos, MI);
}
}
}
@@ -3201,12 +3498,27 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
// Compute mappings of block <=> RPO order.
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
unsigned int RPONumber = 0;
- for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
- OrderToBB[RPONumber] = *RI;
- BBToOrder[*RI] = RPONumber;
- BBNumToRPO[(*RI)->getNumber()] = RPONumber;
+ for (MachineBasicBlock *MBB : RPOT) {
+ OrderToBB[RPONumber] = MBB;
+ BBToOrder[MBB] = RPONumber;
+ BBNumToRPO[MBB->getNumber()] = RPONumber;
++RPONumber;
}
+
+ // Order value substitutions by their "source" operand pair, for quick lookup.
+ llvm::sort(MF.DebugValueSubstitutions);
+
+#ifdef EXPENSIVE_CHECKS
+ // As an expensive check, test whether there are any duplicate substitution
+ // sources in the collection.
+ if (MF.DebugValueSubstitutions.size() > 2) {
+ for (auto It = MF.DebugValueSubstitutions.begin();
+ It != std::prev(MF.DebugValueSubstitutions.end()); ++It) {
+ assert(It->Src != std::next(It)->Src && "Duplicate variable location "
+ "substitution seen");
+ }
+ }
+#endif
}
/// Calculate the liveness information for the given machine function and
@@ -3224,6 +3536,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
TII = MF.getSubtarget().getInstrInfo();
TFI = MF.getSubtarget().getFrameLowering();
TFI->getCalleeSaves(MF, CalleeSavedRegs);
+ MFI = &MF.getFrameInfo();
LS.initialize(MF);
MTracker =
@@ -3266,6 +3579,21 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// dataflow problem.
mlocDataflow(MInLocs, MOutLocs, MLocTransfer);
+ // Patch up debug phi numbers, turning unknown block-live-in values into
+ // either live-through machine values, or PHIs.
+ for (auto &DBG_PHI : DebugPHINumToValue) {
+ // Identify unresolved block-live-ins.
+ ValueIDNum &Num = DBG_PHI.ValueRead;
+ if (!Num.isPHI())
+ continue;
+
+ unsigned BlockNo = Num.getBlock();
+ LocIdx LocNo = Num.getLoc();
+ Num = MInLocs[BlockNo][LocNo.asU64()];
+ }
+ // Later, we'll be looking up ranges of instruction numbers.
+ llvm::sort(DebugPHINumToValue);
+
// Walk back through each block / instruction, collecting DBG_VALUE
// instructions and recording what machine value their operands refer to.
for (auto &OrderPair : OrderToBB) {
@@ -3276,7 +3604,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
MTracker->loadFromArray(MInLocs[CurBB], CurBB);
CurInst = 1;
for (auto &MI : MBB) {
- process(MI);
+ process(MI, MOutLocs, MInLocs);
++CurInst;
}
MTracker->reset();
@@ -3331,7 +3659,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// Using the computed value locations and variable values for each block,
// create the DBG_VALUE instructions representing the extended variable
// locations.
- emitLocations(MF, SavedLiveIns, MInLocs, AllVarsNumbering);
+ emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC);
for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) {
delete[] MOutLocs[Idx];
@@ -3354,6 +3682,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
BBToOrder.clear();
BBNumToRPO.clear();
DebugInstrNumToInstr.clear();
+ DebugPHINumToValue.clear();
return Changed;
}
@@ -3361,3 +3690,389 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
LDVImpl *llvm::makeInstrRefBasedLiveDebugValues() {
return new InstrRefBasedLDV();
}
+
+namespace {
+class LDVSSABlock;
+class LDVSSAUpdater;
+
+// Pick a type to identify incoming block values as we construct SSA. We
+// can't use anything more robust than an integer unfortunately, as SSAUpdater
+// expects to zero-initialize the type.
+typedef uint64_t BlockValueNum;
+
+/// Represents an SSA PHI node for the SSA updater class. Contains the block
+/// this PHI is in, the value number it would have, and the expected incoming
+/// values from parent blocks.
+class LDVSSAPhi {
+public:
+ SmallVector<std::pair<LDVSSABlock *, BlockValueNum>, 4> IncomingValues;
+ LDVSSABlock *ParentBlock;
+ BlockValueNum PHIValNum;
+ LDVSSAPhi(BlockValueNum PHIValNum, LDVSSABlock *ParentBlock)
+ : ParentBlock(ParentBlock), PHIValNum(PHIValNum) {}
+
+ LDVSSABlock *getParent() { return ParentBlock; }
+};
+
+/// Thin wrapper around a block predecessor iterator. Only difference from a
+/// normal block iterator is that it dereferences to an LDVSSABlock.
+class LDVSSABlockIterator {
+public:
+ MachineBasicBlock::pred_iterator PredIt;
+ LDVSSAUpdater &Updater;
+
+ LDVSSABlockIterator(MachineBasicBlock::pred_iterator PredIt,
+ LDVSSAUpdater &Updater)
+ : PredIt(PredIt), Updater(Updater) {}
+
+ bool operator!=(const LDVSSABlockIterator &OtherIt) const {
+ return OtherIt.PredIt != PredIt;
+ }
+
+ LDVSSABlockIterator &operator++() {
+ ++PredIt;
+ return *this;
+ }
+
+ LDVSSABlock *operator*();
+};
+
+/// Thin wrapper around a block for SSA Updater interface. Necessary because
+/// we need to track the PHI value(s) that we may have observed as necessary
+/// in this block.
+class LDVSSABlock {
+public:
+ MachineBasicBlock &BB;
+ LDVSSAUpdater &Updater;
+ using PHIListT = SmallVector<LDVSSAPhi, 1>;
+ /// List of PHIs in this block. There should only ever be one.
+ PHIListT PHIList;
+
+ LDVSSABlock(MachineBasicBlock &BB, LDVSSAUpdater &Updater)
+ : BB(BB), Updater(Updater) {}
+
+ LDVSSABlockIterator succ_begin() {
+ return LDVSSABlockIterator(BB.succ_begin(), Updater);
+ }
+
+ LDVSSABlockIterator succ_end() {
+ return LDVSSABlockIterator(BB.succ_end(), Updater);
+ }
+
+ /// SSAUpdater has requested a PHI: create that within this block record.
+ LDVSSAPhi *newPHI(BlockValueNum Value) {
+ PHIList.emplace_back(Value, this);
+ return &PHIList.back();
+ }
+
+ /// SSAUpdater wishes to know what PHIs already exist in this block.
+ PHIListT &phis() { return PHIList; }
+};
+
+/// Utility class for the SSAUpdater interface: tracks blocks, PHIs and values
+/// while SSAUpdater is exploring the CFG. It's passed as a handle / baton to
+// SSAUpdaterTraits<LDVSSAUpdater>.
+class LDVSSAUpdater {
+public:
+ /// Map of value numbers to PHI records.
+ DenseMap<BlockValueNum, LDVSSAPhi *> PHIs;
+ /// Map of which blocks generate Undef values -- blocks that are not
+ /// dominated by any Def.
+ DenseMap<MachineBasicBlock *, BlockValueNum> UndefMap;
+ /// Map of machine blocks to our own records of them.
+ DenseMap<MachineBasicBlock *, LDVSSABlock *> BlockMap;
+ /// Machine location where any PHI must occur.
+ LocIdx Loc;
+ /// Table of live-in machine value numbers for blocks / locations.
+ ValueIDNum **MLiveIns;
+
+ LDVSSAUpdater(LocIdx L, ValueIDNum **MLiveIns) : Loc(L), MLiveIns(MLiveIns) {}
+
+ void reset() {
+ for (auto &Block : BlockMap)
+ delete Block.second;
+
+ PHIs.clear();
+ UndefMap.clear();
+ BlockMap.clear();
+ }
+
+ ~LDVSSAUpdater() { reset(); }
+
+ /// For a given MBB, create a wrapper block for it. Stores it in the
+ /// LDVSSAUpdater block map.
+ LDVSSABlock *getSSALDVBlock(MachineBasicBlock *BB) {
+ auto it = BlockMap.find(BB);
+ if (it == BlockMap.end()) {
+ BlockMap[BB] = new LDVSSABlock(*BB, *this);
+ it = BlockMap.find(BB);
+ }
+ return it->second;
+ }
+
+ /// Find the live-in value number for the given block. Looks up the value at
+ /// the PHI location on entry.
+ BlockValueNum getValue(LDVSSABlock *LDVBB) {
+ return MLiveIns[LDVBB->BB.getNumber()][Loc.asU64()].asU64();
+ }
+};
+
+LDVSSABlock *LDVSSABlockIterator::operator*() {
+ return Updater.getSSALDVBlock(*PredIt);
+}
+
+#ifndef NDEBUG
+
+raw_ostream &operator<<(raw_ostream &out, const LDVSSAPhi &PHI) {
+ out << "SSALDVPHI " << PHI.PHIValNum;
+ return out;
+}
+
+#endif
+
+} // namespace
+
+namespace llvm {
+
+/// Template specialization to give SSAUpdater access to CFG and value
+/// information. SSAUpdater calls methods in these traits, passing in the
+/// LDVSSAUpdater object, to learn about blocks and the values they define.
+/// It also provides methods to create PHI nodes and track them.
+template <> class SSAUpdaterTraits<LDVSSAUpdater> {
+public:
+ using BlkT = LDVSSABlock;
+ using ValT = BlockValueNum;
+ using PhiT = LDVSSAPhi;
+ using BlkSucc_iterator = LDVSSABlockIterator;
+
+ // Methods to access block successors -- dereferencing to our wrapper class.
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+ /// Iterator for PHI operands.
+ class PHI_iterator {
+ private:
+ LDVSSAPhi *PHI;
+ unsigned Idx;
+
+ public:
+ explicit PHI_iterator(LDVSSAPhi *P) // begin iterator
+ : PHI(P), Idx(0) {}
+ PHI_iterator(LDVSSAPhi *P, bool) // end iterator
+ : PHI(P), Idx(PHI->IncomingValues.size()) {}
+
+ PHI_iterator &operator++() {
+ Idx++;
+ return *this;
+ }
+ bool operator==(const PHI_iterator &X) const { return Idx == X.Idx; }
+ bool operator!=(const PHI_iterator &X) const { return !operator==(X); }
+
+ BlockValueNum getIncomingValue() { return PHI->IncomingValues[Idx].second; }
+
+ LDVSSABlock *getIncomingBlock() { return PHI->IncomingValues[Idx].first; }
+ };
+
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+ /// vector.
+ static void FindPredecessorBlocks(LDVSSABlock *BB,
+ SmallVectorImpl<LDVSSABlock *> *Preds) {
+ for (MachineBasicBlock::pred_iterator PI = BB->BB.pred_begin(),
+ E = BB->BB.pred_end();
+ PI != E; ++PI)
+ Preds->push_back(BB->Updater.getSSALDVBlock(*PI));
+ }
+
+ /// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new
+ /// register. For LiveDebugValues, represents a block identified as not having
+ /// any DBG_PHI predecessors.
+ static BlockValueNum GetUndefVal(LDVSSABlock *BB, LDVSSAUpdater *Updater) {
+ // Create a value number for this block -- it needs to be unique and in the
+ // "undef" collection, so that we know it's not real. Use a number
+ // representing a PHI into this block.
+ BlockValueNum Num = ValueIDNum(BB->BB.getNumber(), 0, Updater->Loc).asU64();
+ Updater->UndefMap[&BB->BB] = Num;
+ return Num;
+ }
+
+ /// CreateEmptyPHI - Create a (representation of a) PHI in the given block.
+ /// SSAUpdater will populate it with information about incoming values. The
+ /// value number of this PHI is whatever the machine value number problem
+ /// solution determined it to be. This includes non-phi values if SSAUpdater
+ /// tries to create a PHI where the incoming values are identical.
+ static BlockValueNum CreateEmptyPHI(LDVSSABlock *BB, unsigned NumPreds,
+ LDVSSAUpdater *Updater) {
+ BlockValueNum PHIValNum = Updater->getValue(BB);
+ LDVSSAPhi *PHI = BB->newPHI(PHIValNum);
+ Updater->PHIs[PHIValNum] = PHI;
+ return PHIValNum;
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(LDVSSAPhi *PHI, BlockValueNum Val, LDVSSABlock *Pred) {
+ PHI->IncomingValues.push_back(std::make_pair(Pred, Val));
+ }
+
+ /// ValueIsPHI - Check if the instruction that defines the specified value
+ /// is a PHI instruction.
+ static LDVSSAPhi *ValueIsPHI(BlockValueNum Val, LDVSSAUpdater *Updater) {
+ auto PHIIt = Updater->PHIs.find(Val);
+ if (PHIIt == Updater->PHIs.end())
+ return nullptr;
+ return PHIIt->second;
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static LDVSSAPhi *ValueIsNewPHI(BlockValueNum Val, LDVSSAUpdater *Updater) {
+ LDVSSAPhi *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->IncomingValues.size() == 0)
+ return PHI;
+ return nullptr;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the value
+ /// that it defines.
+ static BlockValueNum GetPHIValue(LDVSSAPhi *PHI) { return PHI->PHIValNum; }
+};
+
+} // end namespace llvm
+
+Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF,
+ ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns,
+ MachineInstr &Here,
+ uint64_t InstrNum) {
+ // Pick out records of DBG_PHI instructions that have been observed. If there
+ // are none, then we cannot compute a value number.
+ auto RangePair = std::equal_range(DebugPHINumToValue.begin(),
+ DebugPHINumToValue.end(), InstrNum);
+ auto LowerIt = RangePair.first;
+ auto UpperIt = RangePair.second;
+
+ // No DBG_PHI means there can be no location.
+ if (LowerIt == UpperIt)
+ return None;
+
+ // If there's only one DBG_PHI, then that is our value number.
+ if (std::distance(LowerIt, UpperIt) == 1)
+ return LowerIt->ValueRead;
+
+ auto DBGPHIRange = make_range(LowerIt, UpperIt);
+
+ // Pick out the location (physreg, slot) where any PHIs must occur. It's
+ // technically possible for us to merge values in different registers in each
+ // block, but highly unlikely that LLVM will generate such code after register
+ // allocation.
+ LocIdx Loc = LowerIt->ReadLoc;
+
+ // We have several DBG_PHIs, and a use position (the Here inst). All each
+ // DBG_PHI does is identify a value at a program position. We can treat each
+ // DBG_PHI like it's a Def of a value, and the use position is a Use of a
+ // value, just like SSA. We use the bulk-standard LLVM SSA updater class to
+ // determine which Def is used at the Use, and any PHIs that happen along
+ // the way.
+ // Adapted LLVM SSA Updater:
+ LDVSSAUpdater Updater(Loc, MLiveIns);
+ // Map of which Def or PHI is the current value in each block.
+ DenseMap<LDVSSABlock *, BlockValueNum> AvailableValues;
+ // Set of PHIs that we have created along the way.
+ SmallVector<LDVSSAPhi *, 8> CreatedPHIs;
+
+ // Each existing DBG_PHI is a Def'd value under this model. Record these Defs
+ // for the SSAUpdater.
+ for (const auto &DBG_PHI : DBGPHIRange) {
+ LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB);
+ const ValueIDNum &Num = DBG_PHI.ValueRead;
+ AvailableValues.insert(std::make_pair(Block, Num.asU64()));
+ }
+
+ LDVSSABlock *HereBlock = Updater.getSSALDVBlock(Here.getParent());
+ const auto &AvailIt = AvailableValues.find(HereBlock);
+ if (AvailIt != AvailableValues.end()) {
+ // Actually, we already know what the value is -- the Use is in the same
+ // block as the Def.
+ return ValueIDNum::fromU64(AvailIt->second);
+ }
+
+ // Otherwise, we must use the SSA Updater. It will identify the value number
+ // that we are to use, and the PHIs that must happen along the way.
+ SSAUpdaterImpl<LDVSSAUpdater> Impl(&Updater, &AvailableValues, &CreatedPHIs);
+ BlockValueNum ResultInt = Impl.GetValue(Updater.getSSALDVBlock(Here.getParent()));
+ ValueIDNum Result = ValueIDNum::fromU64(ResultInt);
+
+ // We have the number for a PHI, or possibly live-through value, to be used
+ // at this Use. There are a number of things we have to check about it though:
+ // * Does any PHI use an 'Undef' (like an IMPLICIT_DEF) value? If so, this
+ // Use was not completely dominated by DBG_PHIs and we should abort.
+ // * Are the Defs or PHIs clobbered in a block? SSAUpdater isn't aware that
+ // we've left SSA form. Validate that the inputs to each PHI are the
+ // expected values.
+ // * Is a PHI we've created actually a merging of values, or are all the
+ // predecessor values the same, leading to a non-PHI machine value number?
+ // (SSAUpdater doesn't know that either). Remap validated PHIs into the
+ // the ValidatedValues collection below to sort this out.
+ DenseMap<LDVSSABlock *, ValueIDNum> ValidatedValues;
+
+ // Define all the input DBG_PHI values in ValidatedValues.
+ for (const auto &DBG_PHI : DBGPHIRange) {
+ LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB);
+ const ValueIDNum &Num = DBG_PHI.ValueRead;
+ ValidatedValues.insert(std::make_pair(Block, Num));
+ }
+
+ // Sort PHIs to validate into RPO-order.
+ SmallVector<LDVSSAPhi *, 8> SortedPHIs;
+ for (auto &PHI : CreatedPHIs)
+ SortedPHIs.push_back(PHI);
+
+ std::sort(
+ SortedPHIs.begin(), SortedPHIs.end(), [&](LDVSSAPhi *A, LDVSSAPhi *B) {
+ return BBToOrder[&A->getParent()->BB] < BBToOrder[&B->getParent()->BB];
+ });
+
+ for (auto &PHI : SortedPHIs) {
+ ValueIDNum ThisBlockValueNum =
+ MLiveIns[PHI->ParentBlock->BB.getNumber()][Loc.asU64()];
+
+ // Are all these things actually defined?
+ for (auto &PHIIt : PHI->IncomingValues) {
+ // Any undef input means DBG_PHIs didn't dominate the use point.
+ if (Updater.UndefMap.find(&PHIIt.first->BB) != Updater.UndefMap.end())
+ return None;
+
+ ValueIDNum ValueToCheck;
+ ValueIDNum *BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()];
+
+ auto VVal = ValidatedValues.find(PHIIt.first);
+ if (VVal == ValidatedValues.end()) {
+ // We cross a loop, and this is a backedge. LLVMs tail duplication
+ // happens so late that DBG_PHI instructions should not be able to
+ // migrate into loops -- meaning we can only be live-through this
+ // loop.
+ ValueToCheck = ThisBlockValueNum;
+ } else {
+ // Does the block have as a live-out, in the location we're examining,
+ // the value that we expect? If not, it's been moved or clobbered.
+ ValueToCheck = VVal->second;
+ }
+
+ if (BlockLiveOuts[Loc.asU64()] != ValueToCheck)
+ return None;
+ }
+
+ // Record this value as validated.
+ ValidatedValues.insert({PHI->ParentBlock, ThisBlockValueNum});
+ }
+
+ // All the PHIs are valid: we can return what the SSAUpdater said our value
+ // number was.
+ return Result;
+}
diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 770c46ec8436..38e803d1abb5 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
/// \file LiveDebugValues.cpp
@@ -33,6 +34,12 @@
using namespace llvm;
+static cl::opt<bool>
+ ForceInstrRefLDV("force-instr-ref-livedebugvalues", cl::Hidden,
+ cl::desc("Use instruction-ref based LiveDebugValues with "
+ "normal DBG_VALUE inputs"),
+ cl::init(false));
+
/// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or
/// InstrRefBasedLDV to perform location propagation, via the LDVImpl
/// base class.
@@ -87,6 +94,9 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
InstrRefBased = TM.Options.ValueTrackingVariableLocations;
}
+ // Allow the user to force selection of InstrRef LDV.
+ InstrRefBased |= ForceInstrRefLDV;
+
if (InstrRefBased)
TheImpl = llvm::makeInstrRefBasedLiveDebugValues();
else
diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
index 6b05bc68d74d..9c910f180b9f 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -6,6 +6,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
+#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
+
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -30,3 +33,5 @@ public:
extern LDVImpl *makeVarLocBasedLiveDebugValues();
extern LDVImpl *makeInstrRefBasedLiveDebugValues();
} // namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index e2daa46fe6b9..1e6d65c18953 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -76,20 +76,23 @@
/// that are not through dataflow.
///
/// Within LiveDebugValues: each variable location is represented by a
-/// VarLoc object that identifies the source variable, its current
-/// machine-location, and the DBG_VALUE inst that specifies the location. Each
-/// VarLoc is indexed in the (function-scope) \p VarLocMap, giving each VarLoc a
-/// unique index. Rather than operate directly on machine locations, the
-/// dataflow analysis in this pass identifies locations by their index in the
-/// VarLocMap, meaning all the variable locations in a block can be described
-/// by a sparse vector of VarLocMap indicies.
+/// VarLoc object that identifies the source variable, the set of
+/// machine-locations that currently describe it (a single location for
+/// DBG_VALUE or multiple for DBG_VALUE_LIST), and the DBG_VALUE inst that
+/// specifies the location. Each VarLoc is indexed in the (function-scope) \p
+/// VarLocMap, giving each VarLoc a set of unique indexes, each of which
+/// corresponds to one of the VarLoc's machine-locations and can be used to
+/// lookup the VarLoc in the VarLocMap. Rather than operate directly on machine
+/// locations, the dataflow analysis in this pass identifies locations by their
+/// indices in the VarLocMap, meaning all the variable locations in a block can
+/// be described by a sparse vector of VarLocMap indicies.
///
/// All the storage for the dataflow analysis is local to the ExtendRanges
/// method and passed down to helper methods. "OutLocs" and "InLocs" record the
/// in and out lattice values for each block. "OpenRanges" maintains a list of
/// variable locations and, with the "process" method, evaluates the transfer
-/// function of each block. "flushPendingLocs" installs DBG_VALUEs for each
-/// live-in location at the start of blocks, while "Transfers" records
+/// function of each block. "flushPendingLocs" installs debug value instructions
+/// for each live-in location at the start of blocks, while "Transfers" records
/// transfers of values between machine-locations.
///
/// We avoid explicitly representing the "Unknown" (\top) lattice value in the
@@ -175,17 +178,6 @@ static cl::opt<unsigned> InputDbgValueLimit(
"Maximum input DBG_VALUE insts supported by debug range extension"),
cl::init(50000), cl::Hidden);
-// If @MI is a DBG_VALUE with debug value described by a defined
-// register, returns the number of this register. In the other case, returns 0.
-static Register isDbgValueDescribedByReg(const MachineInstr &MI) {
- assert(MI.isDebugValue() && "expected a DBG_VALUE");
- assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
- // If location of variable is described using a register (directly
- // or indirectly), this register is always a first operand.
- return MI.getDebugOperand(0).isReg() ? MI.getDebugOperand(0).getReg()
- : Register();
-}
-
/// If \p Op is a stack or frame register return true, otherwise return false.
/// This is used to avoid basing the debug entry values on the registers, since
/// we do not support it at the moment.
@@ -210,6 +202,13 @@ namespace {
// this prevents fallback to std::set::count() operations.
using DefinedRegsSet = SmallSet<Register, 32>;
+// The IDs in this set correspond to MachineLocs in VarLocs, as well as VarLocs
+// that represent Entry Values; every VarLoc in the set will also appear
+// exactly once at Location=0.
+// As a result, each VarLoc may appear more than once in this "set", but each
+// range corresponding to a Reg, SpillLoc, or EntryValue type will still be a
+// "true" set (i.e. each VarLoc may appear only once), and the range Location=0
+// is the set of all VarLocs.
using VarLocSet = CoalescingBitVector<uint64_t>;
/// A type-checked pair of {Register Location (or 0), Index}, used to index
@@ -229,11 +228,19 @@ struct LocIndex {
// here to encode non-register locations.
u32_index_t Index;
- /// The first location greater than 0 that is not reserved for VarLocs of
- /// kind RegisterKind.
+ /// The location that has an entry for every VarLoc in the map.
+ static constexpr u32_location_t kUniversalLocation = 0;
+
+ /// The first location that is reserved for VarLocs with locations of kind
+ /// RegisterKind.
+ static constexpr u32_location_t kFirstRegLocation = 1;
+
+ /// The first location greater than 0 that is not reserved for VarLocs with
+ /// locations of kind RegisterKind.
static constexpr u32_location_t kFirstInvalidRegLocation = 1 << 30;
- /// A special location reserved for VarLocs of kind SpillLocKind.
+ /// A special location reserved for VarLocs with locations of kind
+ /// SpillLocKind.
static constexpr u32_location_t kSpillLocation = kFirstInvalidRegLocation;
/// A special location reserved for VarLocs of kind EntryValueBackupKind and
@@ -258,7 +265,7 @@ struct LocIndex {
/// Get the start of the interval reserved for VarLocs of kind RegisterKind
/// which reside in \p Reg. The end is at rawIndexForReg(Reg+1)-1.
- static uint64_t rawIndexForReg(uint32_t Reg) {
+ static uint64_t rawIndexForReg(Register Reg) {
return LocIndex(Reg, 0).getAsRawInteger();
}
@@ -272,6 +279,13 @@ struct LocIndex {
}
};
+// Simple Set for storing all the VarLoc Indices at a Location bucket.
+using VarLocsInRange = SmallSet<LocIndex::u32_index_t, 32>;
+// Vector of all `LocIndex`s for a given VarLoc; the same Location should not
+// appear in any two of these, as each VarLoc appears at most once in any
+// Location bucket.
+using LocIndices = SmallVector<LocIndex, 2>;
+
class VarLocBasedLDV : public LDVImpl {
private:
const TargetRegisterInfo *TRI;
@@ -312,51 +326,130 @@ private:
/// is moved.
const MachineInstr &MI;
- enum VarLocKind {
+ enum class MachineLocKind {
InvalidKind = 0,
RegisterKind,
SpillLocKind,
- ImmediateKind,
+ ImmediateKind
+ };
+
+ enum class EntryValueLocKind {
+ NonEntryValueKind = 0,
EntryValueKind,
EntryValueBackupKind,
EntryValueCopyBackupKind
- } Kind = InvalidKind;
+ } EVKind;
/// The value location. Stored separately to avoid repeatedly
/// extracting it from MI.
- union LocUnion {
+ union MachineLocValue {
uint64_t RegNo;
SpillLoc SpillLocation;
uint64_t Hash;
int64_t Immediate;
const ConstantFP *FPImm;
const ConstantInt *CImm;
- LocUnion() : Hash(0) {}
- } Loc;
+ MachineLocValue() : Hash(0) {}
+ };
+
+ /// A single machine location; its Kind is either a register, spill
+ /// location, or immediate value.
+ /// If the VarLoc is not a NonEntryValueKind, then it will use only a
+ /// single MachineLoc of RegisterKind.
+ struct MachineLoc {
+ MachineLocKind Kind;
+ MachineLocValue Value;
+ bool operator==(const MachineLoc &Other) const {
+ if (Kind != Other.Kind)
+ return false;
+ switch (Kind) {
+ case MachineLocKind::SpillLocKind:
+ return Value.SpillLocation == Other.Value.SpillLocation;
+ case MachineLocKind::RegisterKind:
+ case MachineLocKind::ImmediateKind:
+ return Value.Hash == Other.Value.Hash;
+ default:
+ llvm_unreachable("Invalid kind");
+ }
+ }
+ bool operator<(const MachineLoc &Other) const {
+ switch (Kind) {
+ case MachineLocKind::SpillLocKind:
+ return std::make_tuple(
+ Kind, Value.SpillLocation.SpillBase,
+ Value.SpillLocation.SpillOffset.getFixed(),
+ Value.SpillLocation.SpillOffset.getScalable()) <
+ std::make_tuple(
+ Other.Kind, Other.Value.SpillLocation.SpillBase,
+ Other.Value.SpillLocation.SpillOffset.getFixed(),
+ Other.Value.SpillLocation.SpillOffset.getScalable());
+ case MachineLocKind::RegisterKind:
+ case MachineLocKind::ImmediateKind:
+ return std::tie(Kind, Value.Hash) <
+ std::tie(Other.Kind, Other.Value.Hash);
+ default:
+ llvm_unreachable("Invalid kind");
+ }
+ }
+ };
+
+ /// The set of machine locations used to determine the variable's value, in
+ /// conjunction with Expr. Initially populated with MI's debug operands,
+ /// but may be transformed independently afterwards.
+ SmallVector<MachineLoc, 8> Locs;
+ /// Used to map the index of each location in Locs back to the index of its
+ /// original debug operand in MI. Used when multiple location operands are
+ /// coalesced and the original MI's operands need to be accessed while
+ /// emitting a debug value.
+ SmallVector<unsigned, 8> OrigLocMap;
VarLoc(const MachineInstr &MI, LexicalScopes &LS)
: Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt()),
- Expr(MI.getDebugExpression()), MI(MI) {
+ Expr(MI.getDebugExpression()), MI(MI),
+ EVKind(EntryValueLocKind::NonEntryValueKind) {
assert(MI.isDebugValue() && "not a DBG_VALUE");
- assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
- if (int RegNo = isDbgValueDescribedByReg(MI)) {
- Kind = RegisterKind;
- Loc.RegNo = RegNo;
- } else if (MI.getDebugOperand(0).isImm()) {
- Kind = ImmediateKind;
- Loc.Immediate = MI.getDebugOperand(0).getImm();
- } else if (MI.getDebugOperand(0).isFPImm()) {
- Kind = ImmediateKind;
- Loc.FPImm = MI.getDebugOperand(0).getFPImm();
- } else if (MI.getDebugOperand(0).isCImm()) {
- Kind = ImmediateKind;
- Loc.CImm = MI.getDebugOperand(0).getCImm();
+ assert((MI.isDebugValueList() || MI.getNumOperands() == 4) &&
+ "malformed DBG_VALUE");
+ for (const MachineOperand &Op : MI.debug_operands()) {
+ MachineLoc ML = GetLocForOp(Op);
+ auto It = find(Locs, ML);
+ if (It == Locs.end()) {
+ Locs.push_back(ML);
+ OrigLocMap.push_back(MI.getDebugOperandIndex(&Op));
+ } else {
+ // ML duplicates an element in Locs; replace references to Op
+ // with references to the duplicating element.
+ unsigned OpIdx = Locs.size();
+ unsigned DuplicatingIdx = std::distance(Locs.begin(), It);
+ Expr = DIExpression::replaceArg(Expr, OpIdx, DuplicatingIdx);
+ }
}
- // We create the debug entry values from the factory functions rather than
- // from this ctor.
- assert(Kind != EntryValueKind && !isEntryBackupLoc());
+ // We create the debug entry values from the factory functions rather
+ // than from this ctor.
+ assert(EVKind != EntryValueLocKind::EntryValueKind &&
+ !isEntryBackupLoc());
+ }
+
+ static MachineLoc GetLocForOp(const MachineOperand &Op) {
+ MachineLocKind Kind;
+ MachineLocValue Loc;
+ if (Op.isReg()) {
+ Kind = MachineLocKind::RegisterKind;
+ Loc.RegNo = Op.getReg();
+ } else if (Op.isImm()) {
+ Kind = MachineLocKind::ImmediateKind;
+ Loc.Immediate = Op.getImm();
+ } else if (Op.isFPImm()) {
+ Kind = MachineLocKind::ImmediateKind;
+ Loc.FPImm = Op.getFPImm();
+ } else if (Op.isCImm()) {
+ Kind = MachineLocKind::ImmediateKind;
+ Loc.CImm = Op.getCImm();
+ } else
+ llvm_unreachable("Invalid Op kind for MachineLoc.");
+ return {Kind, Loc};
}
/// Take the variable and machine-location in DBG_VALUE MI, and build an
@@ -364,10 +457,11 @@ private:
static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS,
const DIExpression *EntryExpr, Register Reg) {
VarLoc VL(MI, LS);
- assert(VL.Kind == RegisterKind);
- VL.Kind = EntryValueKind;
+ assert(VL.Locs.size() == 1 &&
+ VL.Locs[0].Kind == MachineLocKind::RegisterKind);
+ VL.EVKind = EntryValueLocKind::EntryValueKind;
VL.Expr = EntryExpr;
- VL.Loc.RegNo = Reg;
+ VL.Locs[0].Value.RegNo = Reg;
return VL;
}
@@ -379,8 +473,9 @@ private:
LexicalScopes &LS,
const DIExpression *EntryExpr) {
VarLoc VL(MI, LS);
- assert(VL.Kind == RegisterKind);
- VL.Kind = EntryValueBackupKind;
+ assert(VL.Locs.size() == 1 &&
+ VL.Locs[0].Kind == MachineLocKind::RegisterKind);
+ VL.EVKind = EntryValueLocKind::EntryValueBackupKind;
VL.Expr = EntryExpr;
return VL;
}
@@ -393,32 +488,40 @@ private:
const DIExpression *EntryExpr,
Register NewReg) {
VarLoc VL(MI, LS);
- assert(VL.Kind == RegisterKind);
- VL.Kind = EntryValueCopyBackupKind;
+ assert(VL.Locs.size() == 1 &&
+ VL.Locs[0].Kind == MachineLocKind::RegisterKind);
+ VL.EVKind = EntryValueLocKind::EntryValueCopyBackupKind;
VL.Expr = EntryExpr;
- VL.Loc.RegNo = NewReg;
+ VL.Locs[0].Value.RegNo = NewReg;
return VL;
}
/// Copy the register location in DBG_VALUE MI, updating the register to
/// be NewReg.
- static VarLoc CreateCopyLoc(const MachineInstr &MI, LexicalScopes &LS,
+ static VarLoc CreateCopyLoc(const VarLoc &OldVL, const MachineLoc &OldML,
Register NewReg) {
- VarLoc VL(MI, LS);
- assert(VL.Kind == RegisterKind);
- VL.Loc.RegNo = NewReg;
- return VL;
+ VarLoc VL = OldVL;
+ for (size_t I = 0, E = VL.Locs.size(); I < E; ++I)
+ if (VL.Locs[I] == OldML) {
+ VL.Locs[I].Kind = MachineLocKind::RegisterKind;
+ VL.Locs[I].Value.RegNo = NewReg;
+ return VL;
+ }
+ llvm_unreachable("Should have found OldML in new VarLoc.");
}
- /// Take the variable described by DBG_VALUE MI, and create a VarLoc
+ /// Take the variable described by DBG_VALUE* MI, and create a VarLoc
/// locating it in the specified spill location.
- static VarLoc CreateSpillLoc(const MachineInstr &MI, unsigned SpillBase,
- StackOffset SpillOffset, LexicalScopes &LS) {
- VarLoc VL(MI, LS);
- assert(VL.Kind == RegisterKind);
- VL.Kind = SpillLocKind;
- VL.Loc.SpillLocation = {SpillBase, SpillOffset};
- return VL;
+ static VarLoc CreateSpillLoc(const VarLoc &OldVL, const MachineLoc &OldML,
+ unsigned SpillBase, StackOffset SpillOffset) {
+ VarLoc VL = OldVL;
+ for (int I = 0, E = VL.Locs.size(); I < E; ++I)
+ if (VL.Locs[I] == OldML) {
+ VL.Locs[I].Kind = MachineLocKind::SpillLocKind;
+ VL.Locs[I].Value.SpillLocation = {SpillBase, SpillOffset};
+ return VL;
+ }
+ llvm_unreachable("Should have found OldML in new VarLoc.");
}
/// Create a DBG_VALUE representing this VarLoc in the given function.
@@ -426,79 +529,143 @@ private:
/// inlining information from the original DBG_VALUE instruction, which may
/// have been several transfers ago.
MachineInstr *BuildDbgValue(MachineFunction &MF) const {
+ assert(!isEntryBackupLoc() &&
+ "Tried to produce DBG_VALUE for backup VarLoc");
const DebugLoc &DbgLoc = MI.getDebugLoc();
bool Indirect = MI.isIndirectDebugValue();
const auto &IID = MI.getDesc();
const DILocalVariable *Var = MI.getDebugVariable();
- const DIExpression *DIExpr = MI.getDebugExpression();
NumInserted++;
- switch (Kind) {
- case EntryValueKind:
- // An entry value is a register location -- but with an updated
- // expression. The register location of such DBG_VALUE is always the one
- // from the entry DBG_VALUE, it does not matter if the entry value was
- // copied in to another register due to some optimizations.
- return BuildMI(MF, DbgLoc, IID, Indirect,
- MI.getDebugOperand(0).getReg(), Var, Expr);
- case RegisterKind:
- // Register locations are like the source DBG_VALUE, but with the
- // register number from this VarLoc.
- return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, DIExpr);
- case SpillLocKind: {
- // Spills are indirect DBG_VALUEs, with a base register and offset.
- // Use the original DBG_VALUEs expression to build the spilt location
- // on top of. FIXME: spill locations created before this pass runs
- // are not recognized, and not handled here.
- auto *TRI = MF.getSubtarget().getRegisterInfo();
- auto *SpillExpr = TRI->prependOffsetExpression(
- DIExpr, DIExpression::ApplyOffset, Loc.SpillLocation.SpillOffset);
- unsigned Base = Loc.SpillLocation.SpillBase;
- return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr);
- }
- case ImmediateKind: {
- MachineOperand MO = MI.getDebugOperand(0);
- return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr);
- }
- case EntryValueBackupKind:
- case EntryValueCopyBackupKind:
- case InvalidKind:
- llvm_unreachable(
- "Tried to produce DBG_VALUE for invalid or backup VarLoc");
+ const DIExpression *DIExpr = Expr;
+ SmallVector<MachineOperand, 8> MOs;
+ for (unsigned I = 0, E = Locs.size(); I < E; ++I) {
+ MachineLocKind LocKind = Locs[I].Kind;
+ MachineLocValue Loc = Locs[I].Value;
+ const MachineOperand &Orig = MI.getDebugOperand(OrigLocMap[I]);
+ switch (LocKind) {
+ case MachineLocKind::RegisterKind:
+ // An entry value is a register location -- but with an updated
+ // expression. The register location of such DBG_VALUE is always the
+ // one from the entry DBG_VALUE, it does not matter if the entry value
+ // was copied in to another register due to some optimizations.
+ // Non-entry value register locations are like the source
+ // DBG_VALUE, but with the register number from this VarLoc.
+ MOs.push_back(MachineOperand::CreateReg(
+ EVKind == EntryValueLocKind::EntryValueKind ? Orig.getReg()
+ : Register(Loc.RegNo),
+ false));
+ MOs.back().setIsDebug();
+ break;
+ case MachineLocKind::SpillLocKind: {
+ // Spills are indirect DBG_VALUEs, with a base register and offset.
+ // Use the original DBG_VALUEs expression to build the spilt location
+ // on top of. FIXME: spill locations created before this pass runs
+ // are not recognized, and not handled here.
+ unsigned Base = Loc.SpillLocation.SpillBase;
+ auto *TRI = MF.getSubtarget().getRegisterInfo();
+ if (MI.isNonListDebugValue()) {
+ DIExpr =
+ TRI->prependOffsetExpression(DIExpr, DIExpression::ApplyOffset,
+ Loc.SpillLocation.SpillOffset);
+ Indirect = true;
+ } else {
+ SmallVector<uint64_t, 4> Ops;
+ TRI->getOffsetOpcodes(Loc.SpillLocation.SpillOffset, Ops);
+ Ops.push_back(dwarf::DW_OP_deref);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, I);
+ }
+ MOs.push_back(MachineOperand::CreateReg(Base, false));
+ MOs.back().setIsDebug();
+ break;
+ }
+ case MachineLocKind::ImmediateKind: {
+ MOs.push_back(Orig);
+ break;
+ }
+ case MachineLocKind::InvalidKind:
+ llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc");
+ }
}
- llvm_unreachable("Unrecognized VarLocBasedLDV.VarLoc.Kind enum");
+ return BuildMI(MF, DbgLoc, IID, Indirect, MOs, Var, DIExpr);
}
/// Is the Loc field a constant or constant object?
- bool isConstant() const { return Kind == ImmediateKind; }
+ bool isConstant(MachineLocKind Kind) const {
+ return Kind == MachineLocKind::ImmediateKind;
+ }
/// Check if the Loc field is an entry backup location.
bool isEntryBackupLoc() const {
- return Kind == EntryValueBackupKind || Kind == EntryValueCopyBackupKind;
+ return EVKind == EntryValueLocKind::EntryValueBackupKind ||
+ EVKind == EntryValueLocKind::EntryValueCopyBackupKind;
}
- /// If this variable is described by a register holding the entry value,
- /// return it, otherwise return 0.
- unsigned getEntryValueBackupReg() const {
- if (Kind == EntryValueBackupKind)
- return Loc.RegNo;
- return 0;
+ /// If this variable is described by register \p Reg holding the entry
+ /// value, return true.
+ bool isEntryValueBackupReg(Register Reg) const {
+ return EVKind == EntryValueLocKind::EntryValueBackupKind && usesReg(Reg);
}
- /// If this variable is described by a register holding the copy of the
- /// entry value, return it, otherwise return 0.
- unsigned getEntryValueCopyBackupReg() const {
- if (Kind == EntryValueCopyBackupKind)
- return Loc.RegNo;
- return 0;
+ /// If this variable is described by register \p Reg holding a copy of the
+ /// entry value, return true.
+ bool isEntryValueCopyBackupReg(Register Reg) const {
+ return EVKind == EntryValueLocKind::EntryValueCopyBackupKind &&
+ usesReg(Reg);
}
- /// If this variable is described by a register, return it,
- /// otherwise return 0.
- unsigned isDescribedByReg() const {
- if (Kind == RegisterKind)
- return Loc.RegNo;
- return 0;
+ /// If this variable is described in whole or part by \p Reg, return true.
+ bool usesReg(Register Reg) const {
+ MachineLoc RegML;
+ RegML.Kind = MachineLocKind::RegisterKind;
+ RegML.Value.RegNo = Reg;
+ return is_contained(Locs, RegML);
+ }
+
+ /// If this variable is described in whole or part by \p Reg, return true.
+ unsigned getRegIdx(Register Reg) const {
+ for (unsigned Idx = 0; Idx < Locs.size(); ++Idx)
+ if (Locs[Idx].Kind == MachineLocKind::RegisterKind &&
+ Locs[Idx].Value.RegNo == Reg)
+ return Idx;
+ llvm_unreachable("Could not find given Reg in Locs");
+ }
+
+ /// If this variable is described in whole or part by 1 or more registers,
+ /// add each of them to \p Regs and return true.
+ bool getDescribingRegs(SmallVectorImpl<uint32_t> &Regs) const {
+ bool AnyRegs = false;
+ for (auto Loc : Locs)
+ if (Loc.Kind == MachineLocKind::RegisterKind) {
+ Regs.push_back(Loc.Value.RegNo);
+ AnyRegs = true;
+ }
+ return AnyRegs;
+ }
+
+ bool containsSpillLocs() const {
+ return any_of(Locs, [](VarLoc::MachineLoc ML) {
+ return ML.Kind == VarLoc::MachineLocKind::SpillLocKind;
+ });
+ }
+
+ /// If this variable is described in whole or part by \p SpillLocation,
+ /// return true.
+ bool usesSpillLoc(SpillLoc SpillLocation) const {
+ MachineLoc SpillML;
+ SpillML.Kind = MachineLocKind::SpillLocKind;
+ SpillML.Value.SpillLocation = SpillLocation;
+ return is_contained(Locs, SpillML);
+ }
+
+ /// If this variable is described in whole or part by \p SpillLocation,
+ /// return the index .
+ unsigned getSpillLocIdx(SpillLoc SpillLocation) const {
+ for (unsigned Idx = 0; Idx < Locs.size(); ++Idx)
+ if (Locs[Idx].Kind == MachineLocKind::SpillLocKind &&
+ Locs[Idx].Value.SpillLocation == SpillLocation)
+ return Idx;
+ llvm_unreachable("Could not find given SpillLoc in Locs");
}
/// Determine whether the lexical scope of this value's debug location
@@ -511,24 +678,26 @@ private:
// TRI can be null.
void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const {
Out << "VarLoc(";
- switch (Kind) {
- case RegisterKind:
- case EntryValueKind:
- case EntryValueBackupKind:
- case EntryValueCopyBackupKind:
- Out << printReg(Loc.RegNo, TRI);
- break;
- case SpillLocKind:
- Out << printReg(Loc.SpillLocation.SpillBase, TRI);
- Out << "[" << Loc.SpillLocation.SpillOffset.getFixed() << " + "
- << Loc.SpillLocation.SpillOffset.getScalable() << "x vscale"
- << "]";
- break;
- case ImmediateKind:
- Out << Loc.Immediate;
- break;
- case InvalidKind:
- llvm_unreachable("Invalid VarLoc in dump method");
+ for (const MachineLoc &MLoc : Locs) {
+ if (Locs.begin() != &MLoc)
+ Out << ", ";
+ switch (MLoc.Kind) {
+ case MachineLocKind::RegisterKind:
+ Out << printReg(MLoc.Value.RegNo, TRI);
+ break;
+ case MachineLocKind::SpillLocKind:
+ Out << printReg(MLoc.Value.SpillLocation.SpillBase, TRI);
+ Out << "[" << MLoc.Value.SpillLocation.SpillOffset.getFixed() << " + "
+ << MLoc.Value.SpillLocation.SpillOffset.getScalable()
+ << "x vscale"
+ << "]";
+ break;
+ case MachineLocKind::ImmediateKind:
+ Out << MLoc.Value.Immediate;
+ break;
+ case MachineLocKind::InvalidKind:
+ llvm_unreachable("Invalid VarLoc in dump method");
+ }
}
Out << ", \"" << Var.getVariable()->getName() << "\", " << *Expr << ", ";
@@ -545,90 +714,76 @@ private:
#endif
bool operator==(const VarLoc &Other) const {
- if (Kind != Other.Kind || !(Var == Other.Var) || Expr != Other.Expr)
- return false;
-
- switch (Kind) {
- case SpillLocKind:
- return Loc.SpillLocation == Other.Loc.SpillLocation;
- case RegisterKind:
- case ImmediateKind:
- case EntryValueKind:
- case EntryValueBackupKind:
- case EntryValueCopyBackupKind:
- return Loc.Hash == Other.Loc.Hash;
- default:
- llvm_unreachable("Invalid kind");
- }
+ return std::tie(EVKind, Var, Expr, Locs) ==
+ std::tie(Other.EVKind, Other.Var, Other.Expr, Other.Locs);
}
/// This operator guarantees that VarLocs are sorted by Variable first.
bool operator<(const VarLoc &Other) const {
- switch (Kind) {
- case SpillLocKind:
- return std::make_tuple(Var, Kind, Loc.SpillLocation.SpillBase,
- Loc.SpillLocation.SpillOffset.getFixed(),
- Loc.SpillLocation.SpillOffset.getScalable(),
- Expr) <
- std::make_tuple(
- Other.Var, Other.Kind, Other.Loc.SpillLocation.SpillBase,
- Other.Loc.SpillLocation.SpillOffset.getFixed(),
- Other.Loc.SpillLocation.SpillOffset.getScalable(),
- Other.Expr);
- case RegisterKind:
- case ImmediateKind:
- case EntryValueKind:
- case EntryValueBackupKind:
- case EntryValueCopyBackupKind:
- return std::tie(Var, Kind, Loc.Hash, Expr) <
- std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr);
- default:
- llvm_unreachable("Invalid kind");
- }
+ return std::tie(Var, EVKind, Locs, Expr) <
+ std::tie(Other.Var, Other.EVKind, Other.Locs, Other.Expr);
}
};
+#ifndef NDEBUG
+ using VarVec = SmallVector<VarLoc, 32>;
+#endif
+
/// VarLocMap is used for two things:
- /// 1) Assigning a unique LocIndex to a VarLoc. This LocIndex can be used to
+ /// 1) Assigning LocIndices to a VarLoc. The LocIndices can be used to
/// virtually insert a VarLoc into a VarLocSet.
/// 2) Given a LocIndex, look up the unique associated VarLoc.
class VarLocMap {
/// Map a VarLoc to an index within the vector reserved for its location
/// within Loc2Vars.
- std::map<VarLoc, LocIndex::u32_index_t> Var2Index;
+ std::map<VarLoc, LocIndices> Var2Indices;
/// Map a location to a vector which holds VarLocs which live in that
/// location.
SmallDenseMap<LocIndex::u32_location_t, std::vector<VarLoc>> Loc2Vars;
- /// Determine the 32-bit location reserved for \p VL, based on its kind.
- static LocIndex::u32_location_t getLocationForVar(const VarLoc &VL) {
- switch (VL.Kind) {
- case VarLoc::RegisterKind:
- assert((VL.Loc.RegNo < LocIndex::kFirstInvalidRegLocation) &&
+ public:
+ /// Retrieve LocIndices for \p VL.
+ LocIndices insert(const VarLoc &VL) {
+ LocIndices &Indices = Var2Indices[VL];
+ // If Indices is not empty, VL is already in the map.
+ if (!Indices.empty())
+ return Indices;
+ SmallVector<LocIndex::u32_location_t, 4> Locations;
+ // LocIndices are determined by EVKind and MLs; each Register has a
+ // unique location, while all SpillLocs use a single bucket, and any EV
+ // VarLocs use only the Backup bucket or none at all (except the
+ // compulsory entry at the universal location index). LocIndices will
+ // always have an index at the universal location index as the last index.
+ if (VL.EVKind == VarLoc::EntryValueLocKind::NonEntryValueKind) {
+ VL.getDescribingRegs(Locations);
+ assert(all_of(Locations,
+ [](auto RegNo) {
+ return RegNo < LocIndex::kFirstInvalidRegLocation;
+ }) &&
"Physreg out of range?");
- return VL.Loc.RegNo;
- case VarLoc::SpillLocKind:
- return LocIndex::kSpillLocation;
- case VarLoc::EntryValueBackupKind:
- case VarLoc::EntryValueCopyBackupKind:
- return LocIndex::kEntryValueBackupLocation;
- default:
- return 0;
+ if (VL.containsSpillLocs()) {
+ LocIndex::u32_location_t Loc = LocIndex::kSpillLocation;
+ Locations.push_back(Loc);
+ }
+ } else if (VL.EVKind != VarLoc::EntryValueLocKind::EntryValueKind) {
+ LocIndex::u32_location_t Loc = LocIndex::kEntryValueBackupLocation;
+ Locations.push_back(Loc);
}
- }
-
- public:
- /// Retrieve a unique LocIndex for \p VL.
- LocIndex insert(const VarLoc &VL) {
- LocIndex::u32_location_t Location = getLocationForVar(VL);
- LocIndex::u32_index_t &Index = Var2Index[VL];
- if (!Index) {
+ Locations.push_back(LocIndex::kUniversalLocation);
+ for (LocIndex::u32_location_t Location : Locations) {
auto &Vars = Loc2Vars[Location];
+ Indices.push_back(
+ {Location, static_cast<LocIndex::u32_index_t>(Vars.size())});
Vars.push_back(VL);
- Index = Vars.size();
}
- return {Location, Index - 1};
+ return Indices;
+ }
+
+ LocIndices getAllIndices(const VarLoc &VL) const {
+ auto IndIt = Var2Indices.find(VL);
+ assert(IndIt != Var2Indices.end() && "VarLoc not tracked");
+ return IndIt->second;
}
/// Retrieve the unique VarLoc associated with \p ID.
@@ -660,6 +815,17 @@ private:
using VarToFragments =
DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+ /// Collects all VarLocs from \p CollectFrom. Each unique VarLoc is added
+ /// to \p Collected once, in order of insertion into \p VarLocIDs.
+ static void collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected,
+ const VarLocSet &CollectFrom,
+ const VarLocMap &VarLocIDs);
+
+ /// Get the registers which are used by VarLocs of kind RegisterKind tracked
+ /// by \p CollectFrom.
+ void getUsedRegs(const VarLocSet &CollectFrom,
+ SmallVectorImpl<Register> &UsedRegs) const;
+
/// This holds the working set of currently open ranges. For fast
/// access, this is done both as a set of VarLocIDs, and a map of
/// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all
@@ -670,39 +836,45 @@ private:
/// we will erase/insert from the EntryValuesBackupVars map, otherwise
/// we perform the operation on the Vars.
class OpenRangesSet {
+ VarLocSet::Allocator &Alloc;
VarLocSet VarLocs;
// Map the DebugVariable to recent primary location ID.
- SmallDenseMap<DebugVariable, LocIndex, 8> Vars;
+ SmallDenseMap<DebugVariable, LocIndices, 8> Vars;
// Map the DebugVariable to recent backup location ID.
- SmallDenseMap<DebugVariable, LocIndex, 8> EntryValuesBackupVars;
+ SmallDenseMap<DebugVariable, LocIndices, 8> EntryValuesBackupVars;
OverlapMap &OverlappingFragments;
public:
OpenRangesSet(VarLocSet::Allocator &Alloc, OverlapMap &_OLapMap)
- : VarLocs(Alloc), OverlappingFragments(_OLapMap) {}
+ : Alloc(Alloc), VarLocs(Alloc), OverlappingFragments(_OLapMap) {}
const VarLocSet &getVarLocs() const { return VarLocs; }
+ // Fetches all VarLocs in \p VarLocIDs and inserts them into \p Collected.
+ // This method is needed to get every VarLoc once, as each VarLoc may have
+ // multiple indices in a VarLocMap (corresponding to each applicable
+ // location), but all VarLocs appear exactly once at the universal location
+ // index.
+ void getUniqueVarLocs(SmallVectorImpl<VarLoc> &Collected,
+ const VarLocMap &VarLocIDs) const {
+ collectAllVarLocs(Collected, VarLocs, VarLocIDs);
+ }
+
/// Terminate all open ranges for VL.Var by removing it from the set.
void erase(const VarLoc &VL);
- /// Terminate all open ranges listed in \c KillSet by removing
- /// them from the set.
- void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs);
+ /// Terminate all open ranges listed as indices in \c KillSet with
+ /// \c Location by removing them from the set.
+ void erase(const VarLocsInRange &KillSet, const VarLocMap &VarLocIDs,
+ LocIndex::u32_location_t Location);
/// Insert a new range into the set.
- void insert(LocIndex VarLocID, const VarLoc &VL);
+ void insert(LocIndices VarLocIDs, const VarLoc &VL);
/// Insert a set of ranges.
- void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) {
- for (uint64_t ID : ToLoad) {
- LocIndex Idx = LocIndex::fromRawInteger(ID);
- const VarLoc &VarL = Map[Idx];
- insert(Idx, VarL);
- }
- }
+ void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map);
- llvm::Optional<LocIndex> getEntryValueBackup(DebugVariable Var);
+ llvm::Optional<LocIndices> getEntryValueBackup(DebugVariable Var);
/// Empty the set.
void clear() {
@@ -725,18 +897,18 @@ private:
getVarLocs().end());
}
- /// Get all set IDs for VarLocs of kind RegisterKind in \p Reg.
+ /// Get all set IDs for VarLocs with MLs of kind RegisterKind in \p Reg.
auto getRegisterVarLocs(Register Reg) const {
return LocIndex::indexRangeForLocation(getVarLocs(), Reg);
}
- /// Get all set IDs for VarLocs of kind SpillLocKind.
+ /// Get all set IDs for VarLocs with MLs of kind SpillLocKind.
auto getSpillVarLocs() const {
return LocIndex::indexRangeForLocation(getVarLocs(),
LocIndex::kSpillLocation);
}
- /// Get all set IDs for VarLocs of kind EntryValueBackupKind or
+ /// Get all set IDs for VarLocs of EVKind EntryValueBackupKind or
/// EntryValueCopyBackupKind.
auto getEntryValueBackupVarLocs() const {
return LocIndex::indexRangeForLocation(
@@ -744,16 +916,14 @@ private:
}
};
- /// Collect all VarLoc IDs from \p CollectFrom for VarLocs of kind
- /// RegisterKind which are located in any reg in \p Regs. Insert collected IDs
- /// into \p Collected.
- void collectIDsForRegs(VarLocSet &Collected, const DefinedRegsSet &Regs,
- const VarLocSet &CollectFrom) const;
-
- /// Get the registers which are used by VarLocs of kind RegisterKind tracked
- /// by \p CollectFrom.
- void getUsedRegs(const VarLocSet &CollectFrom,
- SmallVectorImpl<uint32_t> &UsedRegs) const;
+ /// Collect all VarLoc IDs from \p CollectFrom for VarLocs with MLs of kind
+ /// RegisterKind which are located in any reg in \p Regs. The IDs for each
+ /// VarLoc correspond to entries in the universal location bucket, which every
+ /// VarLoc has exactly 1 entry for. Insert collected IDs into \p Collected.
+ static void collectIDsForRegs(VarLocsInRange &Collected,
+ const DefinedRegsSet &Regs,
+ const VarLocSet &CollectFrom,
+ const VarLocMap &VarLocIDs);
VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, VarLocInMBB &Locs) {
std::unique_ptr<VarLocSet> &VLS = Locs[MBB];
@@ -800,6 +970,7 @@ private:
void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges,
TransferMap &Transfers, VarLocMap &VarLocIDs,
LocIndex OldVarID, TransferKind Kind,
+ const VarLoc::MachineLoc &OldLoc,
Register NewReg = Register());
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
@@ -810,7 +981,7 @@ private:
VarLocMap &VarLocIDs, const VarLoc &EntryVL);
void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers,
- VarLocSet &KillSet);
+ VarLocsInRange &KillSet);
void recordEntryValue(const MachineInstr &MI,
const DefinedRegsSet &DefinedRegs,
OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs);
@@ -871,8 +1042,9 @@ void VarLocBasedLDV::OpenRangesSet::erase(const VarLoc &VL) {
auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
auto It = EraseFrom->find(VarToErase);
if (It != EraseFrom->end()) {
- LocIndex ID = It->second;
- VarLocs.reset(ID.getAsRawInteger());
+ LocIndices IDs = It->second;
+ for (LocIndex ID : IDs)
+ VarLocs.reset(ID.getAsRawInteger());
EraseFrom->erase(It);
}
};
@@ -899,26 +1071,46 @@ void VarLocBasedLDV::OpenRangesSet::erase(const VarLoc &VL) {
}
}
-void VarLocBasedLDV::OpenRangesSet::erase(const VarLocSet &KillSet,
- const VarLocMap &VarLocIDs) {
- VarLocs.intersectWithComplement(KillSet);
- for (uint64_t ID : KillSet) {
- const VarLoc *VL = &VarLocIDs[LocIndex::fromRawInteger(ID)];
- auto *EraseFrom = VL->isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
- EraseFrom->erase(VL->Var);
+void VarLocBasedLDV::OpenRangesSet::erase(const VarLocsInRange &KillSet,
+ const VarLocMap &VarLocIDs,
+ LocIndex::u32_location_t Location) {
+ VarLocSet RemoveSet(Alloc);
+ for (LocIndex::u32_index_t ID : KillSet) {
+ const VarLoc &VL = VarLocIDs[LocIndex(Location, ID)];
+ auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+ EraseFrom->erase(VL.Var);
+ LocIndices VLI = VarLocIDs.getAllIndices(VL);
+ for (LocIndex ID : VLI)
+ RemoveSet.set(ID.getAsRawInteger());
+ }
+ VarLocs.intersectWithComplement(RemoveSet);
+}
+
+void VarLocBasedLDV::OpenRangesSet::insertFromLocSet(const VarLocSet &ToLoad,
+ const VarLocMap &Map) {
+ VarLocsInRange UniqueVarLocIDs;
+ DefinedRegsSet Regs;
+ Regs.insert(LocIndex::kUniversalLocation);
+ collectIDsForRegs(UniqueVarLocIDs, Regs, ToLoad, Map);
+ for (uint64_t ID : UniqueVarLocIDs) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VarL = Map[Idx];
+ const LocIndices Indices = Map.getAllIndices(VarL);
+ insert(Indices, VarL);
}
}
-void VarLocBasedLDV::OpenRangesSet::insert(LocIndex VarLocID,
- const VarLoc &VL) {
+void VarLocBasedLDV::OpenRangesSet::insert(LocIndices VarLocIDs,
+ const VarLoc &VL) {
auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
- VarLocs.set(VarLocID.getAsRawInteger());
- InsertInto->insert({VL.Var, VarLocID});
+ for (LocIndex ID : VarLocIDs)
+ VarLocs.set(ID.getAsRawInteger());
+ InsertInto->insert({VL.Var, VarLocIDs});
}
/// Return the Loc ID of an entry value backup location, if it exists for the
/// variable.
-llvm::Optional<LocIndex>
+llvm::Optional<LocIndices>
VarLocBasedLDV::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
auto It = EntryValuesBackupVars.find(Var);
if (It != EntryValuesBackupVars.end())
@@ -927,26 +1119,35 @@ VarLocBasedLDV::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
return llvm::None;
}
-void VarLocBasedLDV::collectIDsForRegs(VarLocSet &Collected,
- const DefinedRegsSet &Regs,
- const VarLocSet &CollectFrom) const {
+void VarLocBasedLDV::collectIDsForRegs(VarLocsInRange &Collected,
+ const DefinedRegsSet &Regs,
+ const VarLocSet &CollectFrom,
+ const VarLocMap &VarLocIDs) {
assert(!Regs.empty() && "Nothing to collect");
- SmallVector<uint32_t, 32> SortedRegs;
- for (Register Reg : Regs)
- SortedRegs.push_back(Reg);
+ SmallVector<Register, 32> SortedRegs;
+ append_range(SortedRegs, Regs);
array_pod_sort(SortedRegs.begin(), SortedRegs.end());
auto It = CollectFrom.find(LocIndex::rawIndexForReg(SortedRegs.front()));
auto End = CollectFrom.end();
- for (uint32_t Reg : SortedRegs) {
- // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains all
- // possible VarLoc IDs for VarLocs of kind RegisterKind which live in Reg.
+ for (Register Reg : SortedRegs) {
+ // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains
+ // all possible VarLoc IDs for VarLocs with MLs of kind RegisterKind which
+ // live in Reg.
uint64_t FirstIndexForReg = LocIndex::rawIndexForReg(Reg);
uint64_t FirstInvalidIndex = LocIndex::rawIndexForReg(Reg + 1);
It.advanceToLowerBound(FirstIndexForReg);
// Iterate through that half-open interval and collect all the set IDs.
- for (; It != End && *It < FirstInvalidIndex; ++It)
- Collected.set(*It);
+ for (; It != End && *It < FirstInvalidIndex; ++It) {
+ LocIndex ItIdx = LocIndex::fromRawInteger(*It);
+ const VarLoc &VL = VarLocIDs[ItIdx];
+ LocIndices LI = VarLocIDs.getAllIndices(VL);
+ // For now, the back index is always the universal location index.
+ assert(LI.back().Location == LocIndex::kUniversalLocation &&
+ "Unexpected order of LocIndices for VarLoc; was it inserted into "
+ "the VarLocMap correctly?");
+ Collected.insert(LI.back().Index);
+ }
if (It == End)
return;
@@ -954,10 +1155,11 @@ void VarLocBasedLDV::collectIDsForRegs(VarLocSet &Collected,
}
void VarLocBasedLDV::getUsedRegs(const VarLocSet &CollectFrom,
- SmallVectorImpl<uint32_t> &UsedRegs) const {
+ SmallVectorImpl<Register> &UsedRegs) const {
// All register-based VarLocs are assigned indices greater than or equal to
// FirstRegIndex.
- uint64_t FirstRegIndex = LocIndex::rawIndexForReg(1);
+ uint64_t FirstRegIndex =
+ LocIndex::rawIndexForReg(LocIndex::kFirstRegLocation);
uint64_t FirstInvalidIndex =
LocIndex::rawIndexForReg(LocIndex::kFirstInvalidRegLocation);
for (auto It = CollectFrom.find(FirstRegIndex),
@@ -995,9 +1197,10 @@ void VarLocBasedLDV::printVarLocInMBB(const MachineFunction &MF,
const VarLocSet &L = getVarLocsInMBB(&BB, V);
if (L.empty())
continue;
+ SmallVector<VarLoc, 32> VarLocs;
+ collectAllVarLocs(VarLocs, L, VarLocIDs);
Out << "MBB: " << BB.getNumber() << ":\n";
- for (uint64_t VLL : L) {
- const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(VLL)];
+ for (const VarLoc &VL : VarLocs) {
Out << " Var: " << VL.Var.getVariable()->getName();
Out << " MI: ";
VL.dump(TRI, Out);
@@ -1044,11 +1247,11 @@ bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
// If the DBG_VALUE comes from a copy instruction that copies the entry value,
// it means the parameter's value has not changed and we should be able to use
// its entry value.
- bool TrySalvageEntryValue = false;
Register Reg = MI.getDebugOperand(0).getReg();
auto I = std::next(MI.getReverseIterator());
const MachineOperand *SrcRegOp, *DestRegOp;
if (I != MI.getParent()->rend()) {
+
// TODO: Try to keep tracking of an entry value if we encounter a propagated
// DBG_VALUE describing the copy of the entry value. (Propagated entry value
// does not indicate the parameter modification.)
@@ -1060,13 +1263,11 @@ bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
DestRegOp = DestSrc->Destination;
if (Reg != DestRegOp->getReg())
return true;
- TrySalvageEntryValue = true;
- }
- if (TrySalvageEntryValue) {
for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)];
- if (VL.getEntryValueCopyBackupReg() == Reg &&
+ if (VL.isEntryValueCopyBackupReg(Reg) &&
+ // Entry Values should not be variadic.
VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg())
return false;
}
@@ -1095,7 +1296,7 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
// If that is the case, we should stop tracking its entry value.
auto EntryValBackupID = OpenRanges.getEntryValueBackup(V);
if (Var->isParameter() && EntryValBackupID) {
- const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID];
+ const VarLoc &EntryVL = VarLocIDs[EntryValBackupID->back()];
if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) {
LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
MI.print(dbgs(), /*IsStandalone*/ false,
@@ -1105,59 +1306,79 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
}
}
- if (isDbgValueDescribedByReg(MI) || MI.getDebugOperand(0).isImm() ||
- MI.getDebugOperand(0).isFPImm() || MI.getDebugOperand(0).isCImm()) {
+ if (all_of(MI.debug_operands(), [](const MachineOperand &MO) {
+ return (MO.isReg() && MO.getReg()) || MO.isImm() || MO.isFPImm() ||
+ MO.isCImm();
+ })) {
// Use normal VarLoc constructor for registers and immediates.
VarLoc VL(MI, LS);
// End all previous ranges of VL.Var.
OpenRanges.erase(VL);
- LocIndex ID = VarLocIDs.insert(VL);
+ LocIndices IDs = VarLocIDs.insert(VL);
// Add the VarLoc to OpenRanges from this DBG_VALUE.
- OpenRanges.insert(ID, VL);
- } else if (MI.hasOneMemOperand()) {
+ OpenRanges.insert(IDs, VL);
+ } else if (MI.memoperands().size() > 0) {
llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?");
} else {
// This must be an undefined location. If it has an open range, erase it.
- assert(MI.getDebugOperand(0).isReg() &&
- MI.getDebugOperand(0).getReg() == 0 &&
+ assert(MI.isUndefDebugValue() &&
"Unexpected non-undef DBG_VALUE encountered");
VarLoc VL(MI, LS);
OpenRanges.erase(VL);
}
}
+// This should be removed later, doesn't fit the new design.
+void VarLocBasedLDV::collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected,
+ const VarLocSet &CollectFrom,
+ const VarLocMap &VarLocIDs) {
+ // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains all
+ // possible VarLoc IDs for VarLocs with MLs of kind RegisterKind which live
+ // in Reg.
+ uint64_t FirstIndex = LocIndex::rawIndexForReg(LocIndex::kUniversalLocation);
+ uint64_t FirstInvalidIndex =
+ LocIndex::rawIndexForReg(LocIndex::kUniversalLocation + 1);
+ // Iterate through that half-open interval and collect all the set IDs.
+ for (auto It = CollectFrom.find(FirstIndex), End = CollectFrom.end();
+ It != End && *It < FirstInvalidIndex; ++It) {
+ LocIndex RegIdx = LocIndex::fromRawInteger(*It);
+ Collected.push_back(VarLocIDs[RegIdx]);
+ }
+}
+
/// Turn the entry value backup locations into primary locations.
void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs,
- TransferMap &Transfers,
- VarLocSet &KillSet) {
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ TransferMap &Transfers,
+ VarLocsInRange &KillSet) {
// Do not insert entry value locations after a terminator.
if (MI.isTerminator())
return;
- for (uint64_t ID : KillSet) {
- LocIndex Idx = LocIndex::fromRawInteger(ID);
+ for (uint32_t ID : KillSet) {
+ // The KillSet IDs are indices for the universal location bucket.
+ LocIndex Idx = LocIndex(LocIndex::kUniversalLocation, ID);
const VarLoc &VL = VarLocIDs[Idx];
if (!VL.Var.getVariable()->isParameter())
continue;
auto DebugVar = VL.Var;
- Optional<LocIndex> EntryValBackupID =
+ Optional<LocIndices> EntryValBackupIDs =
OpenRanges.getEntryValueBackup(DebugVar);
// If the parameter has the entry value backup, it means we should
// be able to use its entry value.
- if (!EntryValBackupID)
+ if (!EntryValBackupIDs)
continue;
- const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID];
- VarLoc EntryLoc =
- VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, EntryVL.Loc.RegNo);
- LocIndex EntryValueID = VarLocIDs.insert(EntryLoc);
- Transfers.push_back({&MI, EntryValueID});
- OpenRanges.insert(EntryValueID, EntryLoc);
+ const VarLoc &EntryVL = VarLocIDs[EntryValBackupIDs->back()];
+ VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr,
+ EntryVL.Locs[0].Value.RegNo);
+ LocIndices EntryValueIDs = VarLocIDs.insert(EntryLoc);
+ Transfers.push_back({&MI, EntryValueIDs.back()});
+ OpenRanges.insert(EntryValueIDs, EntryLoc);
}
}
@@ -1169,20 +1390,20 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
void VarLocBasedLDV::insertTransferDebugPair(
MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers,
VarLocMap &VarLocIDs, LocIndex OldVarID, TransferKind Kind,
- Register NewReg) {
- const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
+ const VarLoc::MachineLoc &OldLoc, Register NewReg) {
+ const VarLoc &OldVarLoc = VarLocIDs[OldVarID];
auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &VarLocIDs](VarLoc &VL) {
- LocIndex LocId = VarLocIDs.insert(VL);
+ LocIndices LocIds = VarLocIDs.insert(VL);
// Close this variable's previous location range.
OpenRanges.erase(VL);
// Record the new location as an open range, and a postponed transfer
// inserting a DBG_VALUE for this location.
- OpenRanges.insert(LocId, VL);
+ OpenRanges.insert(LocIds, VL);
assert(!MI.isTerminator() && "Cannot insert DBG_VALUE after terminator");
- TransferDebugPair MIP = {&MI, LocId};
+ TransferDebugPair MIP = {&MI, LocIds.back()};
Transfers.push_back(MIP);
};
@@ -1194,7 +1415,7 @@ void VarLocBasedLDV::insertTransferDebugPair(
"No register supplied when handling a copy of a debug value");
// Create a DBG_VALUE instruction to describe the Var in its new
// register location.
- VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg);
+ VarLoc VL = VarLoc::CreateCopyLoc(OldVarLoc, OldLoc, NewReg);
ProcessVarLoc(VL);
LLVM_DEBUG({
dbgs() << "Creating VarLoc for register copy:";
@@ -1206,8 +1427,8 @@ void VarLocBasedLDV::insertTransferDebugPair(
// Create a DBG_VALUE instruction to describe the Var in its spilled
// location.
VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
- VarLoc VL = VarLoc::CreateSpillLoc(*DebugInstr, SpillLocation.SpillBase,
- SpillLocation.SpillOffset, LS);
+ VarLoc VL = VarLoc::CreateSpillLoc(
+ OldVarLoc, OldLoc, SpillLocation.SpillBase, SpillLocation.SpillOffset);
ProcessVarLoc(VL);
LLVM_DEBUG({
dbgs() << "Creating VarLoc for spill:";
@@ -1220,7 +1441,7 @@ void VarLocBasedLDV::insertTransferDebugPair(
"No register supplied when handling a restore of a debug value");
// DebugInstr refers to the pre-spill location, therefore we can reuse
// its expression.
- VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg);
+ VarLoc VL = VarLoc::CreateCopyLoc(OldVarLoc, OldLoc, NewReg);
ProcessVarLoc(VL);
LLVM_DEBUG({
dbgs() << "Creating VarLoc for restore:";
@@ -1267,9 +1488,9 @@ void VarLocBasedLDV::transferRegisterDef(
// reasons, it's critical to not iterate over the full set of open VarLocs.
// Iterate over the set of dying/used regs instead.
if (!RegMasks.empty()) {
- SmallVector<uint32_t, 32> UsedRegs;
+ SmallVector<Register, 32> UsedRegs;
getUsedRegs(OpenRanges.getVarLocs(), UsedRegs);
- for (uint32_t Reg : UsedRegs) {
+ for (Register Reg : UsedRegs) {
// Remove ranges of all clobbered registers. Register masks don't usually
// list SP as preserved. Assume that call instructions never clobber SP,
// because some backends (e.g., AArch64) never list SP in the regmask.
@@ -1290,9 +1511,9 @@ void VarLocBasedLDV::transferRegisterDef(
if (DeadRegs.empty())
return;
- VarLocSet KillSet(Alloc);
- collectIDsForRegs(KillSet, DeadRegs, OpenRanges.getVarLocs());
- OpenRanges.erase(KillSet, VarLocIDs);
+ VarLocsInRange KillSet;
+ collectIDsForRegs(KillSet, DeadRegs, OpenRanges.getVarLocs(), VarLocIDs);
+ OpenRanges.erase(KillSet, VarLocIDs, LocIndex::kUniversalLocation);
if (TPC) {
auto &TM = TPC->getTM<TargetMachine>();
@@ -1390,14 +1611,14 @@ void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI,
// First, if there are any DBG_VALUEs pointing at a spill slot that is
// written to, then close the variable location. The value in memory
// will have changed.
- VarLocSet KillSet(Alloc);
+ VarLocsInRange KillSet;
if (isSpillInstruction(MI, MF)) {
Loc = extractSpillBaseRegAndOffset(MI);
for (uint64_t ID : OpenRanges.getSpillVarLocs()) {
LocIndex Idx = LocIndex::fromRawInteger(ID);
const VarLoc &VL = VarLocIDs[Idx];
- assert(VL.Kind == VarLoc::SpillLocKind && "Broken VarLocSet?");
- if (VL.Loc.SpillLocation == *Loc) {
+ assert(VL.containsSpillLocs() && "Broken VarLocSet?");
+ if (VL.usesSpillLoc(*Loc)) {
// This location is overwritten by the current instruction -- terminate
// the open range, and insert an explicit DBG_VALUE $noreg.
//
@@ -1408,13 +1629,15 @@ void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI,
//
// At this stage, we already know which DBG_VALUEs are for spills and
// where they are located; it's best to fix handle overwrites now.
- KillSet.set(ID);
- VarLoc UndefVL = VarLoc::CreateCopyLoc(VL.MI, LS, 0);
- LocIndex UndefLocID = VarLocIDs.insert(UndefVL);
- Transfers.push_back({&MI, UndefLocID});
+ KillSet.insert(ID);
+ unsigned SpillLocIdx = VL.getSpillLocIdx(*Loc);
+ VarLoc::MachineLoc OldLoc = VL.Locs[SpillLocIdx];
+ VarLoc UndefVL = VarLoc::CreateCopyLoc(VL, OldLoc, 0);
+ LocIndices UndefLocIDs = VarLocIDs.insert(UndefVL);
+ Transfers.push_back({&MI, UndefLocIDs.back()});
}
}
- OpenRanges.erase(KillSet, VarLocIDs);
+ OpenRanges.erase(KillSet, VarLocIDs, LocIndex::kSpillLocation);
}
// Try to recognise spill and restore instructions that may create a new
@@ -1441,21 +1664,25 @@ void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI,
for (uint64_t ID : TransferCandidates) {
LocIndex Idx = LocIndex::fromRawInteger(ID);
const VarLoc &VL = VarLocIDs[Idx];
+ unsigned LocIdx;
if (TKind == TransferKind::TransferSpill) {
- assert(VL.isDescribedByReg() == Reg && "Broken VarLocSet?");
+ assert(VL.usesReg(Reg) && "Broken VarLocSet?");
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
<< VL.Var.getVariable()->getName() << ")\n");
+ LocIdx = VL.getRegIdx(Reg);
} else {
- assert(TKind == TransferKind::TransferRestore &&
- VL.Kind == VarLoc::SpillLocKind && "Broken VarLocSet?");
- if (VL.Loc.SpillLocation != *Loc)
+ assert(TKind == TransferKind::TransferRestore && VL.containsSpillLocs() &&
+ "Broken VarLocSet?");
+ if (!VL.usesSpillLoc(*Loc))
// The spill location is not the location of a debug value.
continue;
LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
<< VL.Var.getVariable()->getName() << ")\n");
+ LocIdx = VL.getSpillLocIdx(*Loc);
}
+ VarLoc::MachineLoc MLoc = VL.Locs[LocIdx];
insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx, TKind,
- Reg);
+ MLoc, Reg);
// FIXME: A comment should explain why it's correct to return early here,
// if that is in fact correct.
return;
@@ -1504,17 +1731,16 @@ void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI,
for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
LocIndex Idx = LocIndex::fromRawInteger(ID);
const VarLoc &VL = VarLocIDs[Idx];
- if (VL.getEntryValueBackupReg() == SrcReg) {
+ if (VL.isEntryValueBackupReg(SrcReg)) {
LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump(););
VarLoc EntryValLocCopyBackup =
VarLoc::CreateEntryCopyBackupLoc(VL.MI, LS, VL.Expr, DestReg);
-
// Stop tracking the original entry value.
OpenRanges.erase(VL);
// Start tracking the entry value copy.
- LocIndex EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup);
- OpenRanges.insert(EntryValCopyLocID, EntryValLocCopyBackup);
+ LocIndices EntryValCopyLocIDs = VarLocIDs.insert(EntryValLocCopyBackup);
+ OpenRanges.insert(EntryValCopyLocIDs, EntryValLocCopyBackup);
break;
}
}
@@ -1525,9 +1751,12 @@ void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI,
for (uint64_t ID : OpenRanges.getRegisterVarLocs(SrcReg)) {
LocIndex Idx = LocIndex::fromRawInteger(ID);
- assert(VarLocIDs[Idx].isDescribedByReg() == SrcReg && "Broken VarLocSet?");
+ assert(VarLocIDs[Idx].usesReg(SrcReg) && "Broken VarLocSet?");
+ VarLoc::MachineLocValue Loc;
+ Loc.RegNo = SrcReg;
+ VarLoc::MachineLoc MLoc{VarLoc::MachineLocKind::RegisterKind, Loc};
insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx,
- TransferKind::TransferCopy, DestReg);
+ TransferKind::TransferCopy, MLoc, DestReg);
// FIXME: A comment should explain why it's correct to return early here,
// if that is in fact correct.
return;
@@ -1540,12 +1769,14 @@ bool VarLocBasedLDV::transferTerminator(MachineBasicBlock *CurMBB,
VarLocInMBB &OutLocs,
const VarLocMap &VarLocIDs) {
bool Changed = false;
-
- LLVM_DEBUG(for (uint64_t ID
- : OpenRanges.getVarLocs()) {
- // Copy OpenRanges to OutLocs, if not already present.
- dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": ";
- VarLocIDs[LocIndex::fromRawInteger(ID)].dump(TRI);
+ LLVM_DEBUG({
+ VarVec VarLocs;
+ OpenRanges.getUniqueVarLocs(VarLocs, VarLocIDs);
+ for (VarLoc &VL : VarLocs) {
+ // Copy OpenRanges to OutLocs, if not already present.
+ dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": ";
+ VL.dump(TRI);
+ }
});
VarLocSet &VLS = getVarLocsInMBB(CurMBB, OutLocs);
Changed = VLS != OpenRanges.getVarLocs();
@@ -1668,12 +1899,11 @@ bool VarLocBasedLDV::join(
LLVM_DEBUG({
if (!InLocsT.empty()) {
- for (uint64_t ID : InLocsT)
+ VarVec VarLocs;
+ collectAllVarLocs(VarLocs, InLocsT, VarLocIDs);
+ for (const VarLoc &VL : VarLocs)
dbgs() << " gathered candidate incoming var: "
- << VarLocIDs[LocIndex::fromRawInteger(ID)]
- .Var.getVariable()
- ->getName()
- << "\n";
+ << VL.Var.getVariable()->getName() << "\n";
}
});
@@ -1722,10 +1952,12 @@ void VarLocBasedLDV::flushPendingLocs(VarLocInMBB &PendingInLocs,
auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first);
VarLocSet &Pending = *Iter.second.get();
- for (uint64_t ID : Pending) {
+ SmallVector<VarLoc, 32> VarLocs;
+ collectAllVarLocs(VarLocs, Pending, VarLocIDs);
+
+ for (VarLoc DiffIt : VarLocs) {
// The ID location is live-in to MBB -- work out what kind of machine
// location it is and create a DBG_VALUE.
- const VarLoc &DiffIt = VarLocIDs[LocIndex::fromRawInteger(ID)];
if (DiffIt.isEntryBackupLoc())
continue;
MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent());
@@ -1810,8 +2042,8 @@ void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI,
DIExpression *NewExpr =
DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue);
VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr);
- LocIndex EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup);
- OpenRanges.insert(EntryValLocID, EntryValLocAsBackup);
+ LocIndices EntryValLocIDs = VarLocIDs.insert(EntryValLocAsBackup);
+ OpenRanges.insert(EntryValLocIDs, EntryValLocAsBackup);
}
/// Calculate the liveness information for the given machine function and
@@ -1896,9 +2128,9 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
unsigned int RPONumber = 0;
- for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
- OrderToBB[RPONumber] = *RI;
- BBToOrder[*RI] = RPONumber;
+ for (MachineBasicBlock *MBB : RPOT) {
+ OrderToBB[RPONumber] = MBB;
+ BBToOrder[MBB] = RPONumber;
Worklist.push(RPONumber);
++RPONumber;
}
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 2325341070a3..54058a547928 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -38,9 +38,11 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
@@ -56,6 +58,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -100,30 +103,134 @@ namespace {
/// with some flags about the original usage of the location.
class DbgVariableValue {
public:
- DbgVariableValue(unsigned LocNo, bool WasIndirect,
- const DIExpression &Expression)
- : LocNo(LocNo), WasIndirect(WasIndirect), Expression(&Expression) {
- assert(getLocNo() == LocNo && "location truncation");
+ DbgVariableValue(ArrayRef<unsigned> NewLocs, bool WasIndirect, bool WasList,
+ const DIExpression &Expr)
+ : WasIndirect(WasIndirect), WasList(WasList), Expression(&Expr) {
+ assert(!(WasIndirect && WasList) &&
+ "DBG_VALUE_LISTs should not be indirect.");
+ SmallVector<unsigned> LocNoVec;
+ for (unsigned LocNo : NewLocs) {
+ auto It = find(LocNoVec, LocNo);
+ if (It == LocNoVec.end())
+ LocNoVec.push_back(LocNo);
+ else {
+ // Loc duplicates an element in LocNos; replace references to Op
+ // with references to the duplicating element.
+ unsigned OpIdx = LocNoVec.size();
+ unsigned DuplicatingIdx = std::distance(LocNoVec.begin(), It);
+ Expression =
+ DIExpression::replaceArg(Expression, OpIdx, DuplicatingIdx);
+ }
+ }
+ // FIXME: Debug values referencing 64+ unique machine locations are rare and
+ // currently unsupported for performance reasons. If we can verify that
+ // performance is acceptable for such debug values, we can increase the
+ // bit-width of LocNoCount to 14 to enable up to 16384 unique machine
+ // locations. We will also need to verify that this does not cause issues
+ // with LiveDebugVariables' use of IntervalMap.
+ if (LocNoVec.size() < 64) {
+ LocNoCount = LocNoVec.size();
+ if (LocNoCount > 0) {
+ LocNos = std::make_unique<unsigned[]>(LocNoCount);
+ std::copy(LocNoVec.begin(), LocNoVec.end(), loc_nos_begin());
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "Found debug value with 64+ unique machine "
+ "locations, dropping...\n");
+ LocNoCount = 1;
+ // Turn this into an undef debug value list; right now, the simplest form
+ // of this is an expression with one arg, and an undef debug operand.
+ Expression =
+ DIExpression::get(Expr.getContext(), {dwarf::DW_OP_LLVM_arg, 0,
+ dwarf::DW_OP_stack_value});
+ if (auto FragmentInfoOpt = Expr.getFragmentInfo())
+ Expression = *DIExpression::createFragmentExpression(
+ Expression, FragmentInfoOpt->OffsetInBits,
+ FragmentInfoOpt->SizeInBits);
+ LocNos = std::make_unique<unsigned[]>(LocNoCount);
+ LocNos[0] = UndefLocNo;
+ }
}
- DbgVariableValue() : LocNo(0), WasIndirect(0) {}
+ DbgVariableValue() : LocNoCount(0), WasIndirect(0), WasList(0) {}
+ DbgVariableValue(const DbgVariableValue &Other)
+ : LocNoCount(Other.LocNoCount), WasIndirect(Other.getWasIndirect()),
+ WasList(Other.getWasList()), Expression(Other.getExpression()) {
+ if (Other.getLocNoCount()) {
+ LocNos.reset(new unsigned[Other.getLocNoCount()]);
+ std::copy(Other.loc_nos_begin(), Other.loc_nos_end(), loc_nos_begin());
+ }
+ }
+
+ DbgVariableValue &operator=(const DbgVariableValue &Other) {
+ if (this == &Other)
+ return *this;
+ if (Other.getLocNoCount()) {
+ LocNos.reset(new unsigned[Other.getLocNoCount()]);
+ std::copy(Other.loc_nos_begin(), Other.loc_nos_end(), loc_nos_begin());
+ } else {
+ LocNos.release();
+ }
+ LocNoCount = Other.getLocNoCount();
+ WasIndirect = Other.getWasIndirect();
+ WasList = Other.getWasList();
+ Expression = Other.getExpression();
+ return *this;
+ }
const DIExpression *getExpression() const { return Expression; }
- unsigned getLocNo() const {
- // Fix up the undef location number, which gets truncated.
- return LocNo == INT_MAX ? UndefLocNo : LocNo;
+ uint8_t getLocNoCount() const { return LocNoCount; }
+ bool containsLocNo(unsigned LocNo) const {
+ return is_contained(loc_nos(), LocNo);
}
bool getWasIndirect() const { return WasIndirect; }
- bool isUndef() const { return getLocNo() == UndefLocNo; }
+ bool getWasList() const { return WasList; }
+ bool isUndef() const { return LocNoCount == 0 || containsLocNo(UndefLocNo); }
+
+ DbgVariableValue decrementLocNosAfterPivot(unsigned Pivot) const {
+ SmallVector<unsigned, 4> NewLocNos;
+ for (unsigned LocNo : loc_nos())
+ NewLocNos.push_back(LocNo != UndefLocNo && LocNo > Pivot ? LocNo - 1
+ : LocNo);
+ return DbgVariableValue(NewLocNos, WasIndirect, WasList, *Expression);
+ }
- DbgVariableValue changeLocNo(unsigned NewLocNo) const {
- return DbgVariableValue(NewLocNo, WasIndirect, *Expression);
+ DbgVariableValue remapLocNos(ArrayRef<unsigned> LocNoMap) const {
+ SmallVector<unsigned> NewLocNos;
+ for (unsigned LocNo : loc_nos())
+ // Undef values don't exist in locations (and thus not in LocNoMap
+ // either) so skip over them. See getLocationNo().
+ NewLocNos.push_back(LocNo == UndefLocNo ? UndefLocNo : LocNoMap[LocNo]);
+ return DbgVariableValue(NewLocNos, WasIndirect, WasList, *Expression);
+ }
+
+ DbgVariableValue changeLocNo(unsigned OldLocNo, unsigned NewLocNo) const {
+ SmallVector<unsigned> NewLocNos;
+ NewLocNos.assign(loc_nos_begin(), loc_nos_end());
+ auto OldLocIt = find(NewLocNos, OldLocNo);
+ assert(OldLocIt != NewLocNos.end() && "Old location must be present.");
+ *OldLocIt = NewLocNo;
+ return DbgVariableValue(NewLocNos, WasIndirect, WasList, *Expression);
+ }
+
+ bool hasLocNoGreaterThan(unsigned LocNo) const {
+ return any_of(loc_nos(),
+ [LocNo](unsigned ThisLocNo) { return ThisLocNo > LocNo; });
+ }
+
+ void printLocNos(llvm::raw_ostream &OS) const {
+ for (const unsigned &Loc : loc_nos())
+ OS << (&Loc == loc_nos_begin() ? " " : ", ") << Loc;
}
friend inline bool operator==(const DbgVariableValue &LHS,
const DbgVariableValue &RHS) {
- return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect &&
- LHS.Expression == RHS.Expression;
+ if (std::tie(LHS.LocNoCount, LHS.WasIndirect, LHS.WasList,
+ LHS.Expression) !=
+ std::tie(RHS.LocNoCount, RHS.WasIndirect, RHS.WasList, RHS.Expression))
+ return false;
+ return std::equal(LHS.loc_nos_begin(), LHS.loc_nos_end(),
+ RHS.loc_nos_begin());
}
friend inline bool operator!=(const DbgVariableValue &LHS,
@@ -131,9 +238,24 @@ public:
return !(LHS == RHS);
}
+ unsigned *loc_nos_begin() { return LocNos.get(); }
+ const unsigned *loc_nos_begin() const { return LocNos.get(); }
+ unsigned *loc_nos_end() { return LocNos.get() + LocNoCount; }
+ const unsigned *loc_nos_end() const { return LocNos.get() + LocNoCount; }
+ ArrayRef<unsigned> loc_nos() const {
+ return ArrayRef<unsigned>(LocNos.get(), LocNoCount);
+ }
+
private:
- unsigned LocNo : 31;
- unsigned WasIndirect : 1;
+ // IntervalMap requires the value object to be very small, to the extent
+ // that we do not have enough room for an std::vector. Using a C-style array
+ // (with a unique_ptr wrapper for convenience) allows us to optimize for this
+ // specific case by packing the array size into only 6 bits (it is highly
+ // unlikely that any debug value will need 64+ locations).
+ std::unique_ptr<unsigned[]> LocNos;
+ uint8_t LocNoCount : 6;
+ bool WasIndirect : 1;
+ bool WasList : 1;
const DIExpression *Expression = nullptr;
};
} // namespace
@@ -145,6 +267,14 @@ using LocMap = IntervalMap<SlotIndex, DbgVariableValue, 4>;
/// Non-spilled locations are not added to the map.
using SpillOffsetMap = DenseMap<unsigned, unsigned>;
+/// Cache to save the location where it can be used as the starting
+/// position as input for calling MachineBasicBlock::SkipPHIsLabelsAndDebug.
+/// This is to prevent MachineBasicBlock::SkipPHIsLabelsAndDebug from
+/// repeatedly searching the same set of PHIs/Labels/Debug instructions
+/// if it is called many times for the same block.
+using BlockSkipInstsMap =
+ DenseMap<MachineBasicBlock *, MachineBasicBlock::iterator>;
+
namespace {
class LDVImpl;
@@ -179,9 +309,11 @@ class UserValue {
/// Insert a DBG_VALUE into MBB at Idx for DbgValue.
void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
SlotIndex StopIdx, DbgVariableValue DbgValue,
- bool Spilled, unsigned SpillOffset, LiveIntervals &LIS,
+ ArrayRef<bool> LocSpills,
+ ArrayRef<unsigned> SpillOffsets, LiveIntervals &LIS,
const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI);
+ const TargetRegisterInfo &TRI,
+ BlockSkipInstsMap &BBSkipInstsMap);
/// Replace OldLocNo ranges with NewRegs ranges where NewRegs
/// is live. Returns true if any changes were made.
@@ -264,17 +396,17 @@ public:
void removeLocationIfUnused(unsigned LocNo) {
// Bail out if LocNo still is used.
for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
- DbgVariableValue DbgValue = I.value();
- if (DbgValue.getLocNo() == LocNo)
+ const DbgVariableValue &DbgValue = I.value();
+ if (DbgValue.containsLocNo(LocNo))
return;
}
// Remove the entry in the locations vector, and adjust all references to
// location numbers above the removed entry.
locations.erase(locations.begin() + LocNo);
for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
- DbgVariableValue DbgValue = I.value();
- if (!DbgValue.isUndef() && DbgValue.getLocNo() > LocNo)
- I.setValueUnchecked(DbgValue.changeLocNo(DbgValue.getLocNo() - 1));
+ const DbgVariableValue &DbgValue = I.value();
+ if (DbgValue.hasLocNoGreaterThan(LocNo))
+ I.setValueUnchecked(DbgValue.decrementLocNosAfterPivot(LocNo));
}
}
@@ -282,16 +414,19 @@ public:
void mapVirtRegs(LDVImpl *LDV);
/// Add a definition point to this user value.
- void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect,
- const DIExpression &Expr) {
- DbgVariableValue DbgValue(getLocationNo(LocMO), IsIndirect, Expr);
+ void addDef(SlotIndex Idx, ArrayRef<MachineOperand> LocMOs, bool IsIndirect,
+ bool IsList, const DIExpression &Expr) {
+ SmallVector<unsigned> Locs;
+ for (MachineOperand Op : LocMOs)
+ Locs.push_back(getLocationNo(Op));
+ DbgVariableValue DbgValue(Locs, IsIndirect, IsList, Expr);
// Add a singular (Idx,Idx) -> value mapping.
LocMap::iterator I = locInts.find(Idx);
if (!I.valid() || I.start() != Idx)
- I.insert(Idx, Idx.getNextSlot(), DbgValue);
+ I.insert(Idx, Idx.getNextSlot(), std::move(DbgValue));
else
// A later DBG_VALUE at the same SlotIndex overrides the old location.
- I.setValue(DbgValue);
+ I.setValue(std::move(DbgValue));
}
/// Extend the current definition as far as possible down.
@@ -304,25 +439,30 @@ public:
///
/// \param Idx Starting point for the definition.
/// \param DbgValue value to propagate.
- /// \param LR Restrict liveness to where LR has the value VNI. May be null.
- /// \param VNI When LR is not null, this is the value to restrict to.
+ /// \param LiveIntervalInfo For each location number key in this map,
+ /// restricts liveness to where the LiveRange has the value equal to the\
+ /// VNInfo.
/// \param [out] Kills Append end points of VNI's live range to Kills.
/// \param LIS Live intervals analysis.
- void extendDef(SlotIndex Idx, DbgVariableValue DbgValue, LiveRange *LR,
- const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
+ void extendDef(SlotIndex Idx, DbgVariableValue DbgValue,
+ SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>>
+ &LiveIntervalInfo,
+ Optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills,
LiveIntervals &LIS);
/// The value in LI may be copies to other registers. Determine if
/// any of the copies are available at the kill points, and add defs if
/// possible.
///
- /// \param LI Scan for copies of the value in LI->reg.
/// \param DbgValue Location number of LI->reg, and DIExpression.
- /// \param Kills Points where the range of DbgValue could be extended.
+ /// \param LocIntervals Scan for copies of the value for each location in the
+ /// corresponding LiveInterval->reg.
+ /// \param KilledAt The point where the range of DbgValue could be extended.
/// \param [in,out] NewDefs Append (Idx, DbgValue) of inserted defs here.
void addDefsFromCopies(
- LiveInterval *LI, DbgVariableValue DbgValue,
- const SmallVectorImpl<SlotIndex> &Kills,
+ DbgVariableValue DbgValue,
+ SmallVectorImpl<std::pair<unsigned, LiveInterval *>> &LocIntervals,
+ SlotIndex KilledAt,
SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS);
@@ -348,10 +488,11 @@ public:
void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI,
- const SpillOffsetMap &SpillOffsets);
+ const SpillOffsetMap &SpillOffsets,
+ BlockSkipInstsMap &BBSkipInstsMap);
/// Return DebugLoc of this UserValue.
- DebugLoc getDebugLoc() { return dl;}
+ const DebugLoc &getDebugLoc() { return dl; }
void print(raw_ostream &, const TargetRegisterInfo *);
};
@@ -365,7 +506,8 @@ class UserLabel {
/// Insert a DBG_LABEL into MBB at Idx.
void insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
- LiveIntervals &LIS, const TargetInstrInfo &TII);
+ LiveIntervals &LIS, const TargetInstrInfo &TII,
+ BlockSkipInstsMap &BBSkipInstsMap);
public:
/// Create a new UserLabel.
@@ -379,10 +521,11 @@ public:
}
/// Recreate DBG_LABEL instruction from data structures.
- void emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII);
+ void emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII,
+ BlockSkipInstsMap &BBSkipInstsMap);
/// Return DebugLoc of this UserLabel.
- DebugLoc getDebugLoc() { return dl; }
+ const DebugLoc &getDebugLoc() { return dl; }
void print(raw_ostream &, const TargetRegisterInfo *);
};
@@ -395,10 +538,31 @@ class LDVImpl {
LiveIntervals *LIS;
const TargetRegisterInfo *TRI;
- using StashedInstrRef =
- std::tuple<unsigned, unsigned, const DILocalVariable *,
- const DIExpression *, DebugLoc>;
- std::map<SlotIndex, std::vector<StashedInstrRef>> StashedInstrReferences;
+ /// Position and VReg of a PHI instruction during register allocation.
+ struct PHIValPos {
+ SlotIndex SI; /// Slot where this PHI occurs.
+ Register Reg; /// VReg this PHI occurs in.
+ unsigned SubReg; /// Qualifiying subregister for Reg.
+ };
+
+ /// Map from debug instruction number to PHI position during allocation.
+ std::map<unsigned, PHIValPos> PHIValToPos;
+ /// Index of, for each VReg, which debug instruction numbers and corresponding
+ /// PHIs are sensitive to splitting. Each VReg may have multiple PHI defs,
+ /// at different positions.
+ DenseMap<Register, std::vector<unsigned>> RegToPHIIdx;
+
+ /// Record for any debug instructions unlinked from their blocks during
+ /// regalloc. Stores the instr and it's location, so that they can be
+ /// re-inserted after regalloc is over.
+ struct InstrPos {
+ MachineInstr *MI; ///< Debug instruction, unlinked from it's block.
+ SlotIndex Idx; ///< Slot position where MI should be re-inserted.
+ MachineBasicBlock *MBB; ///< Block that MI was in.
+ };
+
+ /// Collection of stored debug instructions, preserved until after regalloc.
+ SmallVector<InstrPos, 32> StashedDebugInstrs;
/// Whether emitDebugValues is called.
bool EmitDone = false;
@@ -436,15 +600,18 @@ class LDVImpl {
/// \returns True if the DBG_VALUE instruction should be deleted.
bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
- /// Track a DBG_INSTR_REF. This needs to be removed from the MachineFunction
- /// during regalloc -- but there's no need to maintain live ranges, as we
- /// refer to a value rather than a location.
+ /// Track variable location debug instructions while using the instruction
+ /// referencing implementation. Such debug instructions do not need to be
+ /// updated during regalloc because they identify instructions rather than
+ /// register locations. However, they needs to be removed from the
+ /// MachineFunction during regalloc, then re-inserted later, to avoid
+ /// disrupting the allocator.
///
- /// \param MI DBG_INSTR_REF instruction
+ /// \param MI Any DBG_VALUE / DBG_INSTR_REF / DBG_PHI instruction
/// \param Idx Last valid SlotIndex before instruction
///
- /// \returns True if the DBG_VALUE instruction should be deleted.
- bool handleDebugInstrRef(MachineInstr &MI, SlotIndex Idx);
+ /// \returns Iterator to continue processing from after unlinking.
+ MachineBasicBlock::iterator handleDebugInstr(MachineInstr &MI, SlotIndex Idx);
/// Add DBG_LABEL instruction to UserLabel.
///
@@ -458,9 +625,11 @@ class LDVImpl {
/// for each instruction.
///
/// \param mf MachineFunction to be scanned.
+ /// \param InstrRef Whether to operate in instruction referencing mode. If
+ /// true, most of LiveDebugVariables doesn't run.
///
/// \returns True if any debug values were found.
- bool collectDebugValues(MachineFunction &mf);
+ bool collectDebugValues(MachineFunction &mf, bool InstrRef);
/// Compute the live intervals of all user values after collecting all
/// their def points.
@@ -469,12 +638,14 @@ class LDVImpl {
public:
LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
- bool runOnMachineFunction(MachineFunction &mf);
+ bool runOnMachineFunction(MachineFunction &mf, bool InstrRef);
/// Release all memory.
void clear() {
MF = nullptr;
- StashedInstrReferences.clear();
+ PHIValToPos.clear();
+ RegToPHIIdx.clear();
+ StashedDebugInstrs.clear();
userValues.clear();
userLabels.clear();
virtRegToEqClass.clear();
@@ -489,6 +660,10 @@ public:
/// Map virtual register to an equivalence class.
void mapVirtReg(Register VirtReg, UserValue *EC);
+ /// Replace any PHI referring to OldReg with its corresponding NewReg, if
+ /// present.
+ void splitPHIRegister(Register OldReg, ArrayRef<Register> NewRegs);
+
/// Replace all references to OldReg with NewRegs.
void splitRegister(Register OldReg, ArrayRef<Register> NewRegs);
@@ -555,11 +730,13 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
OS << " [" << I.start() << ';' << I.stop() << "):";
if (I.value().isUndef())
- OS << "undef";
+ OS << " undef";
else {
- OS << I.value().getLocNo();
+ I.value().printLocNos(OS);
if (I.value().getWasIndirect())
OS << " ind";
+ else if (I.value().getWasList())
+ OS << " list";
}
}
for (unsigned i = 0, e = locations.size(); i != e; ++i) {
@@ -623,11 +800,21 @@ UserValue *LDVImpl::lookupVirtReg(Register VirtReg) {
}
bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
- // DBG_VALUE loc, offset, variable
- if (MI.getNumOperands() != 4 ||
- !(MI.getDebugOffset().isReg() || MI.getDebugOffset().isImm()) ||
- !MI.getDebugVariableOp().isMetadata()) {
- LLVM_DEBUG(dbgs() << "Can't handle " << MI);
+ // DBG_VALUE loc, offset, variable, expr
+ // DBG_VALUE_LIST variable, expr, locs...
+ if (!MI.isDebugValue()) {
+ LLVM_DEBUG(dbgs() << "Can't handle non-DBG_VALUE*: " << MI);
+ return false;
+ }
+ if (!MI.getDebugVariableOp().isMetadata()) {
+ LLVM_DEBUG(dbgs() << "Can't handle DBG_VALUE* with invalid variable: "
+ << MI);
+ return false;
+ }
+ if (MI.isNonListDebugValue() &&
+ (MI.getNumOperands() != 4 ||
+ !(MI.getDebugOffset().isImm() || MI.getDebugOffset().isReg()))) {
+ LLVM_DEBUG(dbgs() << "Can't handle malformed DBG_VALUE: " << MI);
return false;
}
@@ -639,27 +826,28 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// (and if the machine verifier is improved to catch this), then these checks
// could be removed or replaced by asserts.
bool Discard = false;
- if (MI.getDebugOperand(0).isReg() &&
- Register::isVirtualRegister(MI.getDebugOperand(0).getReg())) {
- const Register Reg = MI.getDebugOperand(0).getReg();
- if (!LIS->hasInterval(Reg)) {
- // The DBG_VALUE is described by a virtual register that does not have a
- // live interval. Discard the DBG_VALUE.
- Discard = true;
- LLVM_DEBUG(dbgs() << "Discarding debug info (no LIS interval): " << Idx
- << " " << MI);
- } else {
- // The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg
- // is defined dead at Idx (where Idx is the slot index for the instruction
- // preceding the DBG_VALUE).
- const LiveInterval &LI = LIS->getInterval(Reg);
- LiveQueryResult LRQ = LI.Query(Idx);
- if (!LRQ.valueOutOrDead()) {
- // We have found a DBG_VALUE with the value in a virtual register that
- // is not live. Discard the DBG_VALUE.
+ for (const MachineOperand &Op : MI.debug_operands()) {
+ if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
+ const Register Reg = Op.getReg();
+ if (!LIS->hasInterval(Reg)) {
+ // The DBG_VALUE is described by a virtual register that does not have a
+ // live interval. Discard the DBG_VALUE.
Discard = true;
- LLVM_DEBUG(dbgs() << "Discarding debug info (reg not live): " << Idx
+ LLVM_DEBUG(dbgs() << "Discarding debug info (no LIS interval): " << Idx
<< " " << MI);
+ } else {
+ // The DBG_VALUE is only valid if either Reg is live out from Idx, or
+ // Reg is defined dead at Idx (where Idx is the slot index for the
+ // instruction preceding the DBG_VALUE).
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ LiveQueryResult LRQ = LI.Query(Idx);
+ if (!LRQ.valueOutOrDead()) {
+ // We have found a DBG_VALUE with the value in a virtual register that
+ // is not live. Discard the DBG_VALUE.
+ Discard = true;
+ LLVM_DEBUG(dbgs() << "Discarding debug info (reg not live): " << Idx
+ << " " << MI);
+ }
}
}
}
@@ -669,30 +857,42 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
if (IsIndirect)
assert(MI.getDebugOffset().getImm() == 0 &&
"DBG_VALUE with nonzero offset");
+ bool IsList = MI.isDebugValueList();
const DILocalVariable *Var = MI.getDebugVariable();
const DIExpression *Expr = MI.getDebugExpression();
UserValue *UV = getUserValue(Var, Expr->getFragmentInfo(), MI.getDebugLoc());
if (!Discard)
- UV->addDef(Idx, MI.getDebugOperand(0), IsIndirect, *Expr);
+ UV->addDef(Idx,
+ ArrayRef<MachineOperand>(MI.debug_operands().begin(),
+ MI.debug_operands().end()),
+ IsIndirect, IsList, *Expr);
else {
MachineOperand MO = MachineOperand::CreateReg(0U, false);
MO.setIsDebug();
- UV->addDef(Idx, MO, false, *Expr);
+ // We should still pass a list the same size as MI.debug_operands() even if
+ // all MOs are undef, so that DbgVariableValue can correctly adjust the
+ // expression while removing the duplicated undefs.
+ SmallVector<MachineOperand, 4> UndefMOs(MI.getNumDebugOperands(), MO);
+ UV->addDef(Idx, UndefMOs, false, IsList, *Expr);
}
return true;
}
-bool LDVImpl::handleDebugInstrRef(MachineInstr &MI, SlotIndex Idx) {
- assert(MI.isDebugRef());
- unsigned InstrNum = MI.getOperand(0).getImm();
- unsigned OperandNum = MI.getOperand(1).getImm();
- auto *Var = MI.getDebugVariable();
- auto *Expr = MI.getDebugExpression();
- auto &DL = MI.getDebugLoc();
- StashedInstrRef Stashed =
- std::make_tuple(InstrNum, OperandNum, Var, Expr, DL);
- StashedInstrReferences[Idx].push_back(Stashed);
- return true;
+MachineBasicBlock::iterator LDVImpl::handleDebugInstr(MachineInstr &MI,
+ SlotIndex Idx) {
+ assert(MI.isDebugValue() || MI.isDebugRef() || MI.isDebugPHI());
+
+ // In instruction referencing mode, there should be no DBG_VALUE instructions
+ // that refer to virtual registers. They might still refer to constants.
+ if (MI.isDebugValue())
+ assert(!MI.getOperand(0).isReg() || !MI.getOperand(0).getReg().isVirtual());
+
+ // Unlink the instruction, store it in the debug instructions collection.
+ auto NextInst = std::next(MI.getIterator());
+ auto *MBB = MI.getParent();
+ MI.removeFromParent();
+ StashedDebugInstrs.push_back({&MI, Idx, MBB});
+ return NextInst;
}
bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
@@ -718,62 +918,71 @@ bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
return true;
}
-bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+bool LDVImpl::collectDebugValues(MachineFunction &mf, bool InstrRef) {
bool Changed = false;
- for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
- ++MFI) {
- MachineBasicBlock *MBB = &*MFI;
- for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ for (MachineBasicBlock &MBB : mf) {
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
MBBI != MBBE;) {
// Use the first debug instruction in the sequence to get a SlotIndex
// for following consecutive debug instructions.
- if (!MBBI->isDebugInstr()) {
+ if (!MBBI->isDebugOrPseudoInstr()) {
++MBBI;
continue;
}
// Debug instructions has no slot index. Use the previous
// non-debug instruction's SlotIndex as its SlotIndex.
SlotIndex Idx =
- MBBI == MBB->begin()
- ? LIS->getMBBStartIdx(MBB)
+ MBBI == MBB.begin()
+ ? LIS->getMBBStartIdx(&MBB)
: LIS->getInstructionIndex(*std::prev(MBBI)).getRegSlot();
// Handle consecutive debug instructions with the same slot index.
do {
- // Only handle DBG_VALUE in handleDebugValue(). Skip all other
- // kinds of debug instructions.
- if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) ||
- (MBBI->isDebugRef() && handleDebugInstrRef(*MBBI, Idx)) ||
- (MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) {
- MBBI = MBB->erase(MBBI);
+ // In instruction referencing mode, pass each instr to handleDebugInstr
+ // to be unlinked. Ignore DBG_VALUE_LISTs -- they refer to vregs, and
+ // need to go through the normal live interval splitting process.
+ if (InstrRef && (MBBI->isNonListDebugValue() || MBBI->isDebugPHI() ||
+ MBBI->isDebugRef())) {
+ MBBI = handleDebugInstr(*MBBI, Idx);
+ Changed = true;
+ // In normal debug mode, use the dedicated DBG_VALUE / DBG_LABEL handler
+ // to track things through register allocation, and erase the instr.
+ } else if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) ||
+ (MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) {
+ MBBI = MBB.erase(MBBI);
Changed = true;
} else
++MBBI;
- } while (MBBI != MBBE && MBBI->isDebugInstr());
+ } while (MBBI != MBBE && MBBI->isDebugOrPseudoInstr());
}
}
return Changed;
}
-void UserValue::extendDef(SlotIndex Idx, DbgVariableValue DbgValue, LiveRange *LR,
- const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
- LiveIntervals &LIS) {
+void UserValue::extendDef(
+ SlotIndex Idx, DbgVariableValue DbgValue,
+ SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>>
+ &LiveIntervalInfo,
+ Optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills,
+ LiveIntervals &LIS) {
SlotIndex Start = Idx;
MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
SlotIndex Stop = LIS.getMBBEndIdx(MBB);
LocMap::iterator I = locInts.find(Start);
- // Limit to VNI's live range.
- bool ToEnd = true;
- if (LR && VNI) {
+ // Limit to the intersection of the VNIs' live ranges.
+ for (auto &LII : LiveIntervalInfo) {
+ LiveRange *LR = LII.second.first;
+ assert(LR && LII.second.second && "Missing range info for Idx.");
LiveInterval::Segment *Segment = LR->getSegmentContaining(Start);
- if (!Segment || Segment->valno != VNI) {
- if (Kills)
- Kills->push_back(Start);
- return;
- }
+ assert(Segment && Segment->valno == LII.second.second &&
+ "Invalid VNInfo for Idx given?");
if (Segment->end < Stop) {
Stop = Segment->end;
- ToEnd = false;
+ Kills = {Stop, {LII.first}};
+ } else if (Segment->end == Stop && Kills.hasValue()) {
+ // If multiple locations end at the same place, track all of them in
+ // Kills.
+ Kills->second.push_back(LII.first);
}
}
@@ -781,94 +990,116 @@ void UserValue::extendDef(SlotIndex Idx, DbgVariableValue DbgValue, LiveRange *L
if (I.valid() && I.start() <= Start) {
// Stop when meeting a different location or an already extended interval.
Start = Start.getNextSlot();
- if (I.value() != DbgValue || I.stop() != Start)
+ if (I.value() != DbgValue || I.stop() != Start) {
+ // Clear `Kills`, as we have a new def available.
+ Kills = None;
return;
+ }
// This is a one-slot placeholder. Just skip it.
++I;
}
// Limited by the next def.
- if (I.valid() && I.start() < Stop)
+ if (I.valid() && I.start() < Stop) {
Stop = I.start();
- // Limited by VNI's live range.
- else if (!ToEnd && Kills)
- Kills->push_back(Stop);
+ // Clear `Kills`, as we have a new def available.
+ Kills = None;
+ }
- if (Start < Stop)
- I.insert(Start, Stop, DbgValue);
+ if (Start < Stop) {
+ DbgVariableValue ExtDbgValue(DbgValue);
+ I.insert(Start, Stop, std::move(ExtDbgValue));
+ }
}
void UserValue::addDefsFromCopies(
- LiveInterval *LI, DbgVariableValue DbgValue,
- const SmallVectorImpl<SlotIndex> &Kills,
+ DbgVariableValue DbgValue,
+ SmallVectorImpl<std::pair<unsigned, LiveInterval *>> &LocIntervals,
+ SlotIndex KilledAt,
SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS) {
- if (Kills.empty())
- return;
// Don't track copies from physregs, there are too many uses.
- if (!Register::isVirtualRegister(LI->reg()))
+ if (any_of(LocIntervals, [](auto LocI) {
+ return !Register::isVirtualRegister(LocI.second->reg());
+ }))
return;
// Collect all the (vreg, valno) pairs that are copies of LI.
- SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
- for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg())) {
- MachineInstr *MI = MO.getParent();
- // Copies of the full value.
- if (MO.getSubReg() || !MI->isCopy())
- continue;
- Register DstReg = MI->getOperand(0).getReg();
+ SmallDenseMap<unsigned,
+ SmallVector<std::pair<LiveInterval *, const VNInfo *>, 4>>
+ CopyValues;
+ for (auto &LocInterval : LocIntervals) {
+ unsigned LocNo = LocInterval.first;
+ LiveInterval *LI = LocInterval.second;
+ for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg())) {
+ MachineInstr *MI = MO.getParent();
+ // Copies of the full value.
+ if (MO.getSubReg() || !MI->isCopy())
+ continue;
+ Register DstReg = MI->getOperand(0).getReg();
- // Don't follow copies to physregs. These are usually setting up call
- // arguments, and the argument registers are always call clobbered. We are
- // better off in the source register which could be a callee-saved register,
- // or it could be spilled.
- if (!Register::isVirtualRegister(DstReg))
- continue;
+ // Don't follow copies to physregs. These are usually setting up call
+ // arguments, and the argument registers are always call clobbered. We are
+ // better off in the source register which could be a callee-saved
+ // register, or it could be spilled.
+ if (!Register::isVirtualRegister(DstReg))
+ continue;
- // Is the value extended to reach this copy? If not, another def may be
- // blocking it, or we are looking at a wrong value of LI.
- SlotIndex Idx = LIS.getInstructionIndex(*MI);
- LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
- if (!I.valid() || I.value() != DbgValue)
- continue;
+ // Is the value extended to reach this copy? If not, another def may be
+ // blocking it, or we are looking at a wrong value of LI.
+ SlotIndex Idx = LIS.getInstructionIndex(*MI);
+ LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
+ if (!I.valid() || I.value() != DbgValue)
+ continue;
- if (!LIS.hasInterval(DstReg))
- continue;
- LiveInterval *DstLI = &LIS.getInterval(DstReg);
- const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
- assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
- CopyValues.push_back(std::make_pair(DstLI, DstVNI));
+ if (!LIS.hasInterval(DstReg))
+ continue;
+ LiveInterval *DstLI = &LIS.getInterval(DstReg);
+ const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
+ CopyValues[LocNo].push_back(std::make_pair(DstLI, DstVNI));
+ }
}
if (CopyValues.empty())
return;
- LLVM_DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI
- << '\n');
+#if !defined(NDEBUG)
+ for (auto &LocInterval : LocIntervals)
+ LLVM_DEBUG(dbgs() << "Got " << CopyValues[LocInterval.first].size()
+ << " copies of " << *LocInterval.second << '\n');
+#endif
- // Try to add defs of the copied values for each kill point.
- for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
- SlotIndex Idx = Kills[i];
- for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) {
- LiveInterval *DstLI = CopyValues[j].first;
- const VNInfo *DstVNI = CopyValues[j].second;
- if (DstLI->getVNInfoAt(Idx) != DstVNI)
- continue;
- // Check that there isn't already a def at Idx
- LocMap::iterator I = locInts.find(Idx);
- if (I.valid() && I.start() <= Idx)
+ // Try to add defs of the copied values for the kill point. Check that there
+ // isn't already a def at Idx.
+ LocMap::iterator I = locInts.find(KilledAt);
+ if (I.valid() && I.start() <= KilledAt)
+ return;
+ DbgVariableValue NewValue(DbgValue);
+ for (auto &LocInterval : LocIntervals) {
+ unsigned LocNo = LocInterval.first;
+ bool FoundCopy = false;
+ for (auto &LIAndVNI : CopyValues[LocNo]) {
+ LiveInterval *DstLI = LIAndVNI.first;
+ const VNInfo *DstVNI = LIAndVNI.second;
+ if (DstLI->getVNInfoAt(KilledAt) != DstVNI)
continue;
- LLVM_DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
+ LLVM_DEBUG(dbgs() << "Kill at " << KilledAt << " covered by valno #"
<< DstVNI->id << " in " << *DstLI << '\n');
MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
- unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
- DbgVariableValue NewValue = DbgValue.changeLocNo(LocNo);
- I.insert(Idx, Idx.getNextSlot(), NewValue);
- NewDefs.push_back(std::make_pair(Idx, NewValue));
+ unsigned NewLocNo = getLocationNo(CopyMI->getOperand(0));
+ NewValue = NewValue.changeLocNo(LocNo, NewLocNo);
+ FoundCopy = true;
break;
}
+ // If there are any killed locations we can't find a copy for, we can't
+ // extend the variable value.
+ if (!FoundCopy)
+ return;
}
+ I.insert(KilledAt, KilledAt.getNextSlot(), NewValue);
+ NewDefs.push_back(std::make_pair(KilledAt, NewValue));
}
void UserValue::computeIntervals(MachineRegisterInfo &MRI,
@@ -885,34 +1116,54 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
for (unsigned i = 0; i != Defs.size(); ++i) {
SlotIndex Idx = Defs[i].first;
DbgVariableValue DbgValue = Defs[i].second;
- const MachineOperand &LocMO = locations[DbgValue.getLocNo()];
-
- if (!LocMO.isReg()) {
- extendDef(Idx, DbgValue, nullptr, nullptr, nullptr, LIS);
- continue;
- }
-
- // Register locations are constrained to where the register value is live.
- if (Register::isVirtualRegister(LocMO.getReg())) {
+ SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>> LIs;
+ SmallVector<const VNInfo *, 4> VNIs;
+ bool ShouldExtendDef = false;
+ for (unsigned LocNo : DbgValue.loc_nos()) {
+ const MachineOperand &LocMO = locations[LocNo];
+ if (!LocMO.isReg() || !Register::isVirtualRegister(LocMO.getReg())) {
+ ShouldExtendDef |= !LocMO.isReg();
+ continue;
+ }
+ ShouldExtendDef = true;
LiveInterval *LI = nullptr;
const VNInfo *VNI = nullptr;
if (LIS.hasInterval(LocMO.getReg())) {
LI = &LIS.getInterval(LocMO.getReg());
VNI = LI->getVNInfoAt(Idx);
}
- SmallVector<SlotIndex, 16> Kills;
- extendDef(Idx, DbgValue, LI, VNI, &Kills, LIS);
- // FIXME: Handle sub-registers in addDefsFromCopies. The problem is that
- // if the original location for example is %vreg0:sub_hi, and we find a
- // full register copy in addDefsFromCopies (at the moment it only handles
- // full register copies), then we must add the sub1 sub-register index to
- // the new location. However, that is only possible if the new virtual
- // register is of the same regclass (or if there is an equivalent
- // sub-register in that regclass). For now, simply skip handling copies if
- // a sub-register is involved.
- if (LI && !LocMO.getSubReg())
- addDefsFromCopies(LI, DbgValue, Kills, Defs, MRI, LIS);
- continue;
+ if (LI && VNI)
+ LIs[LocNo] = {LI, VNI};
+ }
+ if (ShouldExtendDef) {
+ Optional<std::pair<SlotIndex, SmallVector<unsigned>>> Kills;
+ extendDef(Idx, DbgValue, LIs, Kills, LIS);
+
+ if (Kills) {
+ SmallVector<std::pair<unsigned, LiveInterval *>, 2> KilledLocIntervals;
+ bool AnySubreg = false;
+ for (unsigned LocNo : Kills->second) {
+ const MachineOperand &LocMO = this->locations[LocNo];
+ if (LocMO.getSubReg()) {
+ AnySubreg = true;
+ break;
+ }
+ LiveInterval *LI = &LIS.getInterval(LocMO.getReg());
+ KilledLocIntervals.push_back({LocNo, LI});
+ }
+
+ // FIXME: Handle sub-registers in addDefsFromCopies. The problem is that
+ // if the original location for example is %vreg0:sub_hi, and we find a
+ // full register copy in addDefsFromCopies (at the moment it only
+ // handles full register copies), then we must add the sub1 sub-register
+ // index to the new location. However, that is only possible if the new
+ // virtual register is of the same regclass (or if there is an
+ // equivalent sub-register in that regclass). For now, simply skip
+ // handling copies if a sub-register is involved.
+ if (!AnySubreg)
+ addDefsFromCopies(DbgValue, KilledLocIntervals, Kills->first, Defs,
+ MRI, LIS);
+ }
}
// For physregs, we only mark the start slot idx. DwarfDebug will see it
@@ -927,7 +1178,11 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
// location's lexical scope. In this case, splitting of an interval
// can result in an interval outside of the scope being created,
// causing extra unnecessary DBG_VALUEs to be emitted. To prevent
- // this, trim the intervals to the lexical scope.
+ // this, trim the intervals to the lexical scope in the case of inlined
+ // variables, since heavy inlining may cause production of dramatically big
+ // number of DBG_VALUEs to be generated.
+ if (!dl.getInlinedAt())
+ return;
LexicalScope *Scope = LS.findLexicalScope(dl);
if (!Scope)
@@ -1007,7 +1262,7 @@ void LDVImpl::computeIntervals() {
}
}
-bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) {
clear();
MF = &mf;
LIS = &pass.getAnalysis<LiveIntervals>();
@@ -1015,9 +1270,24 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
<< mf.getName() << " **********\n");
- bool Changed = collectDebugValues(mf);
+ bool Changed = collectDebugValues(mf, InstrRef);
computeIntervals();
LLVM_DEBUG(print(dbgs()));
+
+ // Collect the set of VReg / SlotIndexs where PHIs occur; index the sensitive
+ // VRegs too, for when we're notified of a range split.
+ SlotIndexes *Slots = LIS->getSlotIndexes();
+ for (const auto &PHIIt : MF->DebugPHIPositions) {
+ const MachineFunction::DebugPHIRegallocPos &Position = PHIIt.second;
+ MachineBasicBlock *MBB = Position.MBB;
+ Register Reg = Position.Reg;
+ unsigned SubReg = Position.SubReg;
+ SlotIndex SI = Slots->getMBBStartIdx(MBB);
+ PHIValPos VP = {SI, Reg, SubReg};
+ PHIValToPos.insert(std::make_pair(PHIIt.first, VP));
+ RegToPHIIdx[Reg].push_back(PHIIt.first);
+ }
+
ModifiedMF = Changed;
return Changed;
}
@@ -1041,9 +1311,19 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
removeDebugInstrs(mf);
return false;
}
+
+ // Have we been asked to track variable locations using instruction
+ // referencing?
+ bool InstrRef = false;
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (TPC) {
+ auto &TM = TPC->getTM<TargetMachine>();
+ InstrRef = TM.Options.ValueTrackingVariableLocations;
+ }
+
if (!pImpl)
pImpl = new LDVImpl(this);
- return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+ return static_cast<LDVImpl *>(pImpl)->runOnMachineFunction(mf, InstrRef);
}
void LiveDebugVariables::releaseMemory() {
@@ -1091,7 +1371,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs,
break;
// Now LII->end > LocMapI.start(). Do we have an overlap?
- if (LocMapI.value().getLocNo() == OldLocNo &&
+ if (LocMapI.value().containsLocNo(OldLocNo) &&
LII->start < LocMapI.stop()) {
// Overlapping correct location. Allocate NewLocNo now.
if (NewLocNo == UndefLocNo) {
@@ -1112,7 +1392,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs,
LocMapI.setStopUnchecked(LII->end);
// Change the value in the overlap. This may trigger coalescing.
- LocMapI.setValue(OldDbgValue.changeLocNo(NewLocNo));
+ LocMapI.setValue(OldDbgValue.changeLocNo(OldLocNo, NewLocNo));
// Re-insert any removed OldDbgValue ranges.
if (LStart < LocMapI.start()) {
@@ -1176,7 +1456,50 @@ UserValue::splitRegister(Register OldReg, ArrayRef<Register> NewRegs,
return DidChange;
}
+void LDVImpl::splitPHIRegister(Register OldReg, ArrayRef<Register> NewRegs) {
+ auto RegIt = RegToPHIIdx.find(OldReg);
+ if (RegIt == RegToPHIIdx.end())
+ return;
+
+ std::vector<std::pair<Register, unsigned>> NewRegIdxes;
+ // Iterate over all the debug instruction numbers affected by this split.
+ for (unsigned InstrID : RegIt->second) {
+ auto PHIIt = PHIValToPos.find(InstrID);
+ assert(PHIIt != PHIValToPos.end());
+ const SlotIndex &Slot = PHIIt->second.SI;
+ assert(OldReg == PHIIt->second.Reg);
+
+ // Find the new register that covers this position.
+ for (auto NewReg : NewRegs) {
+ const LiveInterval &LI = LIS->getInterval(NewReg);
+ auto LII = LI.find(Slot);
+ if (LII != LI.end() && LII->start <= Slot) {
+ // This new register covers this PHI position, record this for indexing.
+ NewRegIdxes.push_back(std::make_pair(NewReg, InstrID));
+ // Record that this value lives in a different VReg now.
+ PHIIt->second.Reg = NewReg;
+ break;
+ }
+ }
+
+ // If we do not find a new register covering this PHI, then register
+ // allocation has dropped its location, for example because it's not live.
+ // The old VReg will not be mapped to a physreg, and the instruction
+ // number will have been optimized out.
+ }
+
+ // Re-create register index using the new register numbers.
+ RegToPHIIdx.erase(RegIt);
+ for (auto &RegAndInstr : NewRegIdxes)
+ RegToPHIIdx[RegAndInstr.first].push_back(RegAndInstr.second);
+}
+
void LDVImpl::splitRegister(Register OldReg, ArrayRef<Register> NewRegs) {
+ // Consider whether this split range affects any PHI locations.
+ splitPHIRegister(OldReg, NewRegs);
+
+ // Check whether any intervals mapped by a DBG_VALUE were split and need
+ // updating.
bool DidChange = false;
for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
@@ -1269,21 +1592,15 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
// DBG_VALUE intervals with different vregs that were allocated to the same
// physical register.
for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
- DbgVariableValue DbgValue = I.value();
- // Undef values don't exist in locations (and thus not in LocNoMap either)
- // so skip over them. See getLocationNo().
- if (DbgValue.isUndef())
- continue;
- unsigned NewLocNo = LocNoMap[DbgValue.getLocNo()];
- I.setValueUnchecked(DbgValue.changeLocNo(NewLocNo));
+ I.setValueUnchecked(I.value().remapLocNos(LocNoMap));
I.setStart(I.start());
}
}
/// Find an iterator for inserting a DBG_VALUE instruction.
static MachineBasicBlock::iterator
-findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
- LiveIntervals &LIS) {
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, LiveIntervals &LIS,
+ BlockSkipInstsMap &BBSkipInstsMap) {
SlotIndex Start = LIS.getMBBStartIdx(MBB);
Idx = Idx.getBaseIndex();
@@ -1292,7 +1609,29 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
while (!(MI = LIS.getInstructionFromIndex(Idx))) {
// We've reached the beginning of MBB.
if (Idx == Start) {
- MachineBasicBlock::iterator I = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
+ // Retrieve the last PHI/Label/Debug location found when calling
+ // SkipPHIsLabelsAndDebug last time. Start searching from there.
+ //
+ // Note the iterator kept in BBSkipInstsMap is one step back based
+ // on the iterator returned by SkipPHIsLabelsAndDebug last time.
+ // One exception is when SkipPHIsLabelsAndDebug returns MBB->begin(),
+ // BBSkipInstsMap won't save it. This is to consider the case that
+ // new instructions may be inserted at the beginning of MBB after
+ // last call of SkipPHIsLabelsAndDebug. If we save MBB->begin() in
+ // BBSkipInstsMap, after new non-phi/non-label/non-debug instructions
+ // are inserted at the beginning of the MBB, the iterator in
+ // BBSkipInstsMap won't point to the beginning of the MBB anymore.
+ // Therefore The next search in SkipPHIsLabelsAndDebug will skip those
+ // newly added instructions and that is unwanted.
+ MachineBasicBlock::iterator BeginIt;
+ auto MapIt = BBSkipInstsMap.find(MBB);
+ if (MapIt == BBSkipInstsMap.end())
+ BeginIt = MBB->begin();
+ else
+ BeginIt = std::next(MapIt->second);
+ auto I = MBB->SkipPHIsLabelsAndDebug(BeginIt);
+ if (I != BeginIt)
+ BBSkipInstsMap[MBB] = std::prev(I);
return I;
}
Idx = Idx.getPrevIndex();
@@ -1306,21 +1645,24 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
/// Find an iterator for inserting the next DBG_VALUE instruction
/// (or end if no more insert locations found).
static MachineBasicBlock::iterator
-findNextInsertLocation(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- SlotIndex StopIdx, MachineOperand &LocMO,
- LiveIntervals &LIS,
- const TargetRegisterInfo &TRI) {
- if (!LocMO.isReg())
+findNextInsertLocation(MachineBasicBlock *MBB, MachineBasicBlock::iterator I,
+ SlotIndex StopIdx, ArrayRef<MachineOperand> LocMOs,
+ LiveIntervals &LIS, const TargetRegisterInfo &TRI) {
+ SmallVector<Register, 4> Regs;
+ for (const MachineOperand &LocMO : LocMOs)
+ if (LocMO.isReg())
+ Regs.push_back(LocMO.getReg());
+ if (Regs.empty())
return MBB->instr_end();
- Register Reg = LocMO.getReg();
// Find the next instruction in the MBB that define the register Reg.
while (I != MBB->end() && !I->isTerminator()) {
if (!LIS.isNotInMIMap(*I) &&
SlotIndex::isEarlierEqualInstr(StopIdx, LIS.getInstructionIndex(*I)))
break;
- if (I->definesRegister(Reg, &TRI))
+ if (any_of(Regs, [&I, &TRI](Register &Reg) {
+ return I->definesRegister(Reg, &TRI);
+ }))
// The insert location is directly after the instruction/bundle.
return std::next(I);
++I;
@@ -1330,23 +1672,30 @@ findNextInsertLocation(MachineBasicBlock *MBB,
void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
SlotIndex StopIdx, DbgVariableValue DbgValue,
- bool Spilled, unsigned SpillOffset,
+ ArrayRef<bool> LocSpills,
+ ArrayRef<unsigned> SpillOffsets,
LiveIntervals &LIS, const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ BlockSkipInstsMap &BBSkipInstsMap) {
SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB);
// Only search within the current MBB.
StopIdx = (MBBEndIdx < StopIdx) ? MBBEndIdx : StopIdx;
- MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS);
+ MachineBasicBlock::iterator I =
+ findInsertLocation(MBB, StartIdx, LIS, BBSkipInstsMap);
// Undef values don't exist in locations so create new "noreg" register MOs
// for them. See getLocationNo().
- MachineOperand MO =
- !DbgValue.isUndef()
- ? locations[DbgValue.getLocNo()]
- : MachineOperand::CreateReg(
- /* Reg */ 0, /* isDef */ false, /* isImp */ false,
- /* isKill */ false, /* isDead */ false,
- /* isUndef */ false, /* isEarlyClobber */ false,
- /* SubReg */ 0, /* isDebug */ true);
+ SmallVector<MachineOperand, 8> MOs;
+ if (DbgValue.isUndef()) {
+ MOs.assign(DbgValue.loc_nos().size(),
+ MachineOperand::CreateReg(
+ /* Reg */ 0, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true));
+ } else {
+ for (unsigned LocNo : DbgValue.loc_nos())
+ MOs.push_back(locations[LocNo]);
+ }
++NumInsertedDebugValues;
@@ -1359,32 +1708,45 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
// that the original virtual register was a pointer. Also, add the stack slot
// offset for the spilled register to the expression.
const DIExpression *Expr = DbgValue.getExpression();
- uint8_t DIExprFlags = DIExpression::ApplyOffset;
bool IsIndirect = DbgValue.getWasIndirect();
- if (Spilled) {
- if (IsIndirect)
- DIExprFlags |= DIExpression::DerefAfter;
- Expr =
- DIExpression::prepend(Expr, DIExprFlags, SpillOffset);
- IsIndirect = true;
- }
+ bool IsList = DbgValue.getWasList();
+ for (unsigned I = 0, E = LocSpills.size(); I != E; ++I) {
+ if (LocSpills[I]) {
+ if (!IsList) {
+ uint8_t DIExprFlags = DIExpression::ApplyOffset;
+ if (IsIndirect)
+ DIExprFlags |= DIExpression::DerefAfter;
+ Expr = DIExpression::prepend(Expr, DIExprFlags, SpillOffsets[I]);
+ IsIndirect = true;
+ } else {
+ SmallVector<uint64_t, 4> Ops;
+ DIExpression::appendOffset(Ops, SpillOffsets[I]);
+ Ops.push_back(dwarf::DW_OP_deref);
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, I);
+ }
+ }
- assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index");
+ assert((!LocSpills[I] || MOs[I].isFI()) &&
+ "a spilled location must be a frame index");
+ }
+ unsigned DbgValueOpcode =
+ IsList ? TargetOpcode::DBG_VALUE_LIST : TargetOpcode::DBG_VALUE;
do {
- BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect, MO, Variable, Expr);
+ BuildMI(*MBB, I, getDebugLoc(), TII.get(DbgValueOpcode), IsIndirect, MOs,
+ Variable, Expr);
- // Continue and insert DBG_VALUES after every redefinition of register
+ // Continue and insert DBG_VALUES after every redefinition of a register
// associated with the debug value within the range
- I = findNextInsertLocation(MBB, I, StopIdx, MO, LIS, TRI);
+ I = findNextInsertLocation(MBB, I, StopIdx, MOs, LIS, TRI);
} while (I != MBB->end());
}
void UserLabel::insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
- LiveIntervals &LIS,
- const TargetInstrInfo &TII) {
- MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ LiveIntervals &LIS, const TargetInstrInfo &TII,
+ BlockSkipInstsMap &BBSkipInstsMap) {
+ MachineBasicBlock::iterator I =
+ findInsertLocation(MBB, Idx, LIS, BBSkipInstsMap);
++NumInsertedDebugLabels;
BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_LABEL))
.addMetadata(Label);
@@ -1393,17 +1755,24 @@ void UserLabel::insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI,
- const SpillOffsetMap &SpillOffsets) {
+ const SpillOffsetMap &SpillOffsets,
+ BlockSkipInstsMap &BBSkipInstsMap) {
MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
SlotIndex Start = I.start();
SlotIndex Stop = I.stop();
DbgVariableValue DbgValue = I.value();
- auto SpillIt = !DbgValue.isUndef() ? SpillOffsets.find(DbgValue.getLocNo())
- : SpillOffsets.end();
- bool Spilled = SpillIt != SpillOffsets.end();
- unsigned SpillOffset = Spilled ? SpillIt->second : 0;
+
+ SmallVector<bool> SpilledLocs;
+ SmallVector<unsigned> LocSpillOffsets;
+ for (unsigned LocNo : DbgValue.loc_nos()) {
+ auto SpillIt =
+ !DbgValue.isUndef() ? SpillOffsets.find(LocNo) : SpillOffsets.end();
+ bool Spilled = SpillIt != SpillOffsets.end();
+ SpilledLocs.push_back(Spilled);
+ LocSpillOffsets.push_back(Spilled ? SpillIt->second : 0);
+ }
// If the interval start was trimmed to the lexical scope insert the
// DBG_VALUE at the previous index (otherwise it appears after the
@@ -1411,14 +1780,14 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
if (trimmedDefs.count(Start))
Start = Start.getPrevIndex();
- LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop
- << "):" << DbgValue.getLocNo());
+ LLVM_DEBUG(auto &dbg = dbgs(); dbg << "\t[" << Start << ';' << Stop << "):";
+ DbgValue.printLocNos(dbg));
MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
- insertDebugValue(&*MBB, Start, Stop, DbgValue, Spilled, SpillOffset, LIS,
- TII, TRI);
+ insertDebugValue(&*MBB, Start, Stop, DbgValue, SpilledLocs, LocSpillOffsets,
+ LIS, TII, TRI, BBSkipInstsMap);
// This interval may span multiple basic blocks.
// Insert a DBG_VALUE into each one.
while (Stop > MBBEnd) {
@@ -1428,8 +1797,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
break;
MBBEnd = LIS.getMBBEndIdx(&*MBB);
LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
- insertDebugValue(&*MBB, Start, Stop, DbgValue, Spilled, SpillOffset, LIS,
- TII, TRI);
+ insertDebugValue(&*MBB, Start, Stop, DbgValue, SpilledLocs,
+ LocSpillOffsets, LIS, TII, TRI, BBSkipInstsMap);
}
LLVM_DEBUG(dbgs() << '\n');
if (MBB == MFEnd)
@@ -1439,12 +1808,13 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
}
}
-void UserLabel::emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII) {
+void UserLabel::emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII,
+ BlockSkipInstsMap &BBSkipInstsMap) {
LLVM_DEBUG(dbgs() << "\t" << loc);
MachineFunction::iterator MBB = LIS.getMBBFromIndex(loc)->getIterator();
LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB));
- insertDebugLabel(&*MBB, loc, LIS, TII);
+ insertDebugLabel(&*MBB, loc, LIS, TII, BBSkipInstsMap);
LLVM_DEBUG(dbgs() << '\n');
}
@@ -1453,41 +1823,111 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
if (!MF)
return;
+
+ BlockSkipInstsMap BBSkipInstsMap;
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
SpillOffsetMap SpillOffsets;
for (auto &userValue : userValues) {
LLVM_DEBUG(userValue->print(dbgs(), TRI));
userValue->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
- userValue->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets);
+ userValue->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets,
+ BBSkipInstsMap);
}
LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG LABELS **********\n");
for (auto &userLabel : userLabels) {
LLVM_DEBUG(userLabel->print(dbgs(), TRI));
- userLabel->emitDebugLabel(*LIS, *TII);
+ userLabel->emitDebugLabel(*LIS, *TII, BBSkipInstsMap);
}
- LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n");
+ LLVM_DEBUG(dbgs() << "********** EMITTING DEBUG PHIS **********\n");
- // Re-insert any DBG_INSTR_REFs back in the position they were. Ordering
- // is preserved by vector.
auto Slots = LIS->getSlotIndexes();
- const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
- for (auto &P : StashedInstrReferences) {
- const SlotIndex &Idx = P.first;
- auto *MBB = Slots->getMBBFromIndex(Idx);
- MachineBasicBlock::iterator insertPos = findInsertLocation(MBB, Idx, *LIS);
- for (auto &Stashed : P.second) {
- auto MIB = BuildMI(*MF, std::get<4>(Stashed), RefII);
- MIB.addImm(std::get<0>(Stashed));
- MIB.addImm(std::get<1>(Stashed));
- MIB.addMetadata(std::get<2>(Stashed));
- MIB.addMetadata(std::get<3>(Stashed));
- MachineInstr *New = MIB;
- MBB->insert(insertPos, New);
+ for (auto &It : PHIValToPos) {
+ // For each ex-PHI, identify its physreg location or stack slot, and emit
+ // a DBG_PHI for it.
+ unsigned InstNum = It.first;
+ auto Slot = It.second.SI;
+ Register Reg = It.second.Reg;
+ unsigned SubReg = It.second.SubReg;
+
+ MachineBasicBlock *OrigMBB = Slots->getMBBFromIndex(Slot);
+ if (VRM->isAssignedReg(Reg) &&
+ Register::isPhysicalRegister(VRM->getPhys(Reg))) {
+ unsigned PhysReg = VRM->getPhys(Reg);
+ if (SubReg != 0)
+ PhysReg = TRI->getSubReg(PhysReg, SubReg);
+
+ auto Builder = BuildMI(*OrigMBB, OrigMBB->begin(), DebugLoc(),
+ TII->get(TargetOpcode::DBG_PHI));
+ Builder.addReg(PhysReg);
+ Builder.addImm(InstNum);
+ } else if (VRM->getStackSlot(Reg) != VirtRegMap::NO_STACK_SLOT) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
+ unsigned SpillSize, SpillOffset;
+
+ // Test whether this location is legal with the given subreg.
+ bool Success =
+ TII->getStackSlotRange(TRC, SubReg, SpillSize, SpillOffset, *MF);
+
+ if (Success) {
+ auto Builder = BuildMI(*OrigMBB, OrigMBB->begin(), DebugLoc(),
+ TII->get(TargetOpcode::DBG_PHI));
+ Builder.addFrameIndex(VRM->getStackSlot(Reg));
+ Builder.addImm(InstNum);
+ }
+ }
+ // If there was no mapping for a value ID, it's optimized out. Create no
+ // DBG_PHI, and any variables using this value will become optimized out.
+ }
+ MF->DebugPHIPositions.clear();
+
+ LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n");
+
+ // Re-insert any debug instrs back in the position they were. Ordering
+ // is preserved by vector. We must re-insert in the same order to ensure that
+ // debug instructions don't swap, which could re-order assignments.
+ for (auto &P : StashedDebugInstrs) {
+ SlotIndex Idx = P.Idx;
+
+ // Start block index: find the first non-debug instr in the block, and
+ // insert before it.
+ if (Idx == Slots->getMBBStartIdx(P.MBB)) {
+ MachineBasicBlock::iterator InsertPos =
+ findInsertLocation(P.MBB, Idx, *LIS, BBSkipInstsMap);
+ P.MBB->insert(InsertPos, P.MI);
+ continue;
+ }
+
+ if (MachineInstr *Pos = Slots->getInstructionFromIndex(Idx)) {
+ // Insert at the end of any debug instructions.
+ auto PostDebug = std::next(Pos->getIterator());
+ PostDebug = skipDebugInstructionsForward(PostDebug, P.MBB->instr_end());
+ P.MBB->insert(PostDebug, P.MI);
+ } else {
+ // Insert position disappeared; walk forwards through slots until we
+ // find a new one.
+ SlotIndex End = Slots->getMBBEndIdx(P.MBB);
+ for (; Idx < End; Idx = Slots->getNextNonNullIndex(Idx)) {
+ Pos = Slots->getInstructionFromIndex(Idx);
+ if (Pos) {
+ P.MBB->insert(Pos->getIterator(), P.MI);
+ break;
+ }
+ }
+
+ // We have reached the end of the block and didn't find anywhere to
+ // insert! It's not safe to discard any debug instructions; place them
+ // in front of the first terminator, or in front of end().
+ if (Idx >= End) {
+ auto TermIt = P.MBB->getFirstTerminator();
+ P.MBB->insert(TermIt, P.MI);
+ }
}
}
EmitDone = true;
+ BBSkipInstsMap.clear();
}
void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp
index ce0e58772068..1eed0ec5bbbe 100644
--- a/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/llvm/lib/CodeGen/LiveInterval.cpp
@@ -487,7 +487,7 @@ bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP,
/// by [Start, End).
bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const {
assert(Start < End && "Invalid range");
- const_iterator I = std::lower_bound(begin(), end(), End);
+ const_iterator I = lower_bound(*this, End);
return I != begin() && (--I)->end > Start;
}
@@ -1336,9 +1336,8 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) {
const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
assert(MBB && "Phi-def has no defining MBB");
// Connect to values live out of predecessors.
- for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI)
- if (const VNInfo *PVNI = LR.getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
+ for (MachineBasicBlock *Pred : MBB->predecessors())
+ if (const VNInfo *PVNI = LR.getVNInfoBefore(LIS.getMBBEndIdx(Pred)))
EqClass.join(VNI->id, PVNI->id);
} else {
// Normal value defined by an instruction. Check for two-addr redef.
@@ -1361,12 +1360,9 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) {
void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
MachineRegisterInfo &MRI) {
// Rewrite instructions.
- for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg()),
- RE = MRI.reg_end();
- RI != RE;) {
- MachineOperand &MO = *RI;
- MachineInstr *MI = RI->getParent();
- ++RI;
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI.reg_operands(LI.reg()))) {
+ MachineInstr *MI = MO.getParent();
const VNInfo *VNI;
if (MI->isDebugValue()) {
// DBG_VALUE instructions don't have slot indexes, so get the index of
diff --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 7ccb8df4bc05..dfa523d4bf41 100644
--- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const {
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
- return is_contained(InterferingVRegs, VirtReg);
+ return is_contained(*InterferingVRegs, VirtReg);
}
// Collect virtual registers in this union that interfere with this
@@ -126,9 +126,12 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
//
unsigned LiveIntervalUnion::Query::
collectInterferingVRegs(unsigned MaxInterferingRegs) {
+ if (!InterferingVRegs)
+ InterferingVRegs.emplace();
+
// Fast path return if we already have the desired information.
- if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
- return InterferingVRegs.size();
+ if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs)
+ return InterferingVRegs->size();
// Set up iterators on the first call.
if (!CheckedFirstInterference) {
@@ -157,14 +160,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
LiveInterval *VReg = LiveUnionI.value();
if (VReg != RecentReg && !isSeenInterference(VReg)) {
RecentReg = VReg;
- InterferingVRegs.push_back(VReg);
- if (InterferingVRegs.size() >= MaxInterferingRegs)
- return InterferingVRegs.size();
+ InterferingVRegs->push_back(VReg);
+ if (InterferingVRegs->size() >= MaxInterferingRegs)
+ return InterferingVRegs->size();
}
// This LiveUnion segment is no longer interesting.
if (!(++LiveUnionI).valid()) {
SeenAllInterferences = true;
- return InterferingVRegs.size();
+ return InterferingVRegs->size();
}
}
@@ -185,7 +188,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
LiveUnionI.advanceTo(LRI->start);
}
SeenAllInterferences = true;
- return InterferingVRegs.size();
+ return InterferingVRegs->size();
}
void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index a32b486240c8..23036c2b115f 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -47,6 +48,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/StackMaps.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -473,7 +475,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Visit all instructions reading li->reg().
Register Reg = li->reg();
for (MachineInstr &UseMI : MRI->reg_instructions(Reg)) {
- if (UseMI.isDebugValue() || !UseMI.readsVirtualRegister(Reg))
+ if (UseMI.isDebugInstr() || !UseMI.readsVirtualRegister(Reg))
continue;
SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
LiveQueryResult LRQ = li->Query(Idx);
@@ -702,9 +704,6 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Keep track of regunit ranges.
SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU;
- // Keep track of subregister ranges.
- SmallVector<std::pair<const LiveInterval::SubRange*,
- LiveRange::const_iterator>, 4> SRs;
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
Register Reg = Register::index2VirtReg(i);
@@ -714,24 +713,21 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
if (LI.empty())
continue;
+ // Target may have not allocated this yet.
+ Register PhysReg = VRM->getPhys(Reg);
+ if (!PhysReg)
+ continue;
+
// Find the regunit intervals for the assigned register. They may overlap
// the virtual register live range, cancelling any kills.
RU.clear();
- for (MCRegUnitIterator Unit(VRM->getPhys(Reg), TRI); Unit.isValid();
+ for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid();
++Unit) {
const LiveRange &RURange = getRegUnit(*Unit);
if (RURange.empty())
continue;
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
}
-
- if (MRI->subRegLivenessEnabled()) {
- SRs.clear();
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end)));
- }
- }
-
// Every instruction that kills Reg corresponds to a segment range end
// point.
for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE;
@@ -776,20 +772,18 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// are actually never written by %2. After assignment the <kill>
// flag at the read instruction is invalid.
LaneBitmask DefinedLanesMask;
- if (!SRs.empty()) {
+ if (LI.hasSubRanges()) {
// Compute a mask of lanes that are defined.
DefinedLanesMask = LaneBitmask::getNone();
- for (auto &SRP : SRs) {
- const LiveInterval::SubRange &SR = *SRP.first;
- LiveRange::const_iterator &I = SRP.second;
- if (I == SR.end())
- continue;
- I = SR.advanceTo(I, RI->end);
- if (I == SR.end() || I->start >= RI->end)
- continue;
- // I is overlapping RI
- DefinedLanesMask |= SR.LaneMask;
- }
+ for (const LiveInterval::SubRange &SR : LI.subranges())
+ for (const LiveRange::Segment &Segment : SR.segments) {
+ if (Segment.start >= RI->end)
+ break;
+ if (Segment.end == RI->end) {
+ DefinedLanesMask |= SR.LaneMask;
+ break;
+ }
+ }
} else
DefinedLanesMask = LaneBitmask::getAll();
@@ -799,7 +793,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
continue;
if (MO.isUse()) {
// Reading any undefined lanes?
- LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask UseMask = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
+ : MRI->getMaxLaneMaskForVReg(Reg);
if ((UseMask & ~DefinedLanesMask).any())
goto CancelKill;
} else if (MO.getSubReg() == 0) {
@@ -897,6 +893,23 @@ LiveIntervals::addSegmentToEndOfBlock(Register Reg, MachineInstr &startInst) {
//===----------------------------------------------------------------------===//
// Register mask functions
//===----------------------------------------------------------------------===//
+/// Check whether use of reg in MI is live-through. Live-through means that
+/// the value is alive on exit from Machine instruction. The example of such
+/// use is a deopt value in statepoint instruction.
+static bool hasLiveThroughUse(const MachineInstr *MI, Register Reg) {
+ if (MI->getOpcode() != TargetOpcode::STATEPOINT)
+ return false;
+ StatepointOpers SO(MI);
+ if (SO.getFlags() & (uint64_t)StatepointFlags::DeoptLiveIn)
+ return false;
+ for (unsigned Idx = SO.getNumDeoptArgsIdx(), E = SO.getNumGCPtrIdx(); Idx < E;
+ ++Idx) {
+ const MachineOperand &MO = MI->getOperand(Idx);
+ if (MO.isReg() && MO.getReg() == Reg)
+ return true;
+ }
+ return false;
+}
bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
BitVector &UsableRegs) {
@@ -925,11 +938,8 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
return false;
bool Found = false;
- while (true) {
- assert(*SlotI >= LiveI->start);
- // Loop over all slots overlapping this segment.
- while (*SlotI < LiveI->end) {
- // *SlotI overlaps LI. Collect mask bits.
+ // Utility to union regmasks.
+ auto unionBitMask = [&](unsigned Idx) {
if (!Found) {
// This is the first overlap. Initialize UsableRegs to all ones.
UsableRegs.clear();
@@ -937,14 +947,28 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
Found = true;
}
// Remove usable registers clobbered by this mask.
- UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]);
+ UsableRegs.clearBitsNotInMask(Bits[Idx]);
+ };
+ while (true) {
+ assert(*SlotI >= LiveI->start);
+ // Loop over all slots overlapping this segment.
+ while (*SlotI < LiveI->end) {
+ // *SlotI overlaps LI. Collect mask bits.
+ unionBitMask(SlotI - Slots.begin());
if (++SlotI == SlotE)
return Found;
}
+ // If segment ends with live-through use we need to collect its regmask.
+ if (*SlotI == LiveI->end)
+ if (MachineInstr *MI = getInstructionFromIndex(*SlotI))
+ if (hasLiveThroughUse(MI, LI.reg()))
+ unionBitMask(SlotI++ - Slots.begin());
// *SlotI is beyond the current LI segment.
- LiveI = LI.advanceTo(LiveI, *SlotI);
- if (LiveI == LiveE)
+ // Special advance implementation to not miss next LiveI->end.
+ if (++LiveI == LiveE || SlotI == SlotE || *SlotI > LI.endIndex())
return Found;
+ while (LiveI->end < *SlotI)
+ ++LiveI;
// Advance SlotI until it overlaps.
while (*SlotI < LiveI->start)
if (++SlotI == SlotE)
@@ -1465,7 +1489,7 @@ private:
MachineBasicBlock::iterator Begin = MBB->begin();
while (MII != Begin) {
- if ((--MII)->isDebugInstr())
+ if ((--MII)->isDebugOrPseudoInstr())
continue;
SlotIndex Idx = Indexes->getInstructionIndex(*MII);
@@ -1560,7 +1584,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
- if (MI.isDebugInstr())
+ if (MI.isDebugOrPseudoInstr())
continue;
SlotIndex instrIdx = getInstructionIndex(MI);
@@ -1657,7 +1681,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
- if (MI.isDebugInstr())
+ if (MI.isDebugOrPseudoInstr())
continue;
for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
MOE = MI.operands_end();
diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp
index 547970e7ab5d..c0c7848139e4 100644
--- a/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -125,8 +125,8 @@ void LivePhysRegs::print(raw_ostream &OS) const {
return;
}
- for (const_iterator I = begin(), E = end(); I != E; ++I)
- OS << " " << printReg(*I, TRI);
+ for (MCPhysReg R : *this)
+ OS << " " << printReg(R, TRI);
OS << "\n";
}
@@ -239,6 +239,10 @@ void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
addBlockLiveIns(MBB);
}
+void LivePhysRegs::addLiveInsNoPristines(const MachineBasicBlock &MBB) {
+ addBlockLiveIns(MBB);
+}
+
void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp
index e9c9b70d29a9..3ef28042acb0 100644
--- a/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -158,8 +158,7 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
// If LR has a segment S that starts at the next block, i.e. [End, ...),
// std::upper_bound will return the segment following S. Instead,
// S should be treated as the first segment that does not overlap B.
- LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(),
- End.getPrevSlot());
+ LiveRange::iterator UB = upper_bound(LR, End.getPrevSlot());
if (UB != LR.begin()) {
LiveRange::Segment &Seg = *std::prev(UB);
if (Seg.end > Begin) {
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 037cb5426235..64a2dd275643 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -113,9 +113,10 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
- // We can't remat physreg uses, unless it is a constant.
+ // We can't remat physreg uses, unless it is a constant or target wants
+ // to ignore this use.
if (Register::isPhysicalRegister(MO.getReg())) {
- if (MRI.isConstantPhysReg(MO.getReg()))
+ if (MRI.isConstantPhysReg(MO.getReg()) || TII.isIgnorableUse(MO))
continue;
return false;
}
@@ -458,11 +459,8 @@ LiveRangeEdit::MRI_NoteNewVirtualRegister(Register VReg) {
NewRegs.push_back(VReg);
}
-void
-LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
- const MachineLoopInfo &Loops,
- const MachineBlockFrequencyInfo &MBFI) {
- VirtRegAuxInfo VRAI(MF, LIS, *VRM, Loops, MBFI);
+void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
+ VirtRegAuxInfo &VRAI) {
for (unsigned I = 0, Size = size(); I < Size; ++I) {
LiveInterval &LI = LIS.getInterval(get(I));
if (MRI.recomputeRegClass(LI.reg()))
diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 26439a656917..054f4370b609 100644
--- a/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -130,7 +130,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) {
MachineInstr &MI = *Next;
++Next;
- if (MI.isPHI() || MI.isDebugInstr())
+ if (MI.isPHI() || MI.isDebugOrPseudoInstr())
continue;
if (MI.mayStore())
SawStore = true;
@@ -156,7 +156,8 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
// If MI has side effects, it should become a barrier for code motion.
// IOM is rebuild from the next instruction to prevent later
// instructions from being moved before this MI.
- if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
+ if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() &&
+ Next != MBB.end()) {
BuildInstOrderMap(Next, IOM);
SawStore = false;
}
@@ -218,7 +219,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) {
MachineBasicBlock::iterator I = std::next(Insert->getIterator());
// Skip all the PHI and debug instructions.
- while (I != MBB.end() && (I->isPHI() || I->isDebugInstr()))
+ while (I != MBB.end() && (I->isPHI() || I->isDebugOrPseudoInstr()))
I = std::next(I);
if (I == MI.getIterator())
continue;
@@ -234,7 +235,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock::iterator EndIter = std::next(MI.getIterator());
if (MI.getOperand(0).isReg())
for (; EndIter != MBB.end() && EndIter->isDebugValue() &&
- EndIter->getDebugOperandForReg(MI.getOperand(0).getReg());
+ EndIter->hasDebugOperandForReg(MI.getOperand(0).getReg());
++EndIter, ++Next)
IOM[&*EndIter] = NewOrder;
MBB.splice(I, &MBB, MI.getIterator(), EndIter);
diff --git a/llvm/lib/CodeGen/LiveRangeUtils.h b/llvm/lib/CodeGen/LiveRangeUtils.h
index 0e6bfeb0d4a5..dace05f1ad95 100644
--- a/llvm/lib/CodeGen/LiveRangeUtils.h
+++ b/llvm/lib/CodeGen/LiveRangeUtils.h
@@ -5,9 +5,9 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
+/// \file
/// This file contains helper functions to modify live ranges.
-//
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_CODEGEN_LIVERANGEUTILS_H
diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index a69aa6557e46..4c0172a930b5 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -216,7 +216,21 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
// Check for interference with that segment
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- if (query(LR, *Units).checkInterference())
+ // LR is stack-allocated. LiveRegMatrix caches queries by a key that
+ // includes the address of the live range. If (for the same reg unit) this
+ // checkInterference overload is called twice, without any other query()
+ // calls in between (on heap-allocated LiveRanges) - which would invalidate
+ // the cached query - the LR address seen the second time may well be the
+ // same as that seen the first time, while the Start/End/valno may not - yet
+ // the same cached result would be fetched. To avoid that, we don't cache
+ // this query.
+ //
+ // FIXME: the usability of the Query API needs to be improved to avoid
+ // subtle bugs due to query identity. Avoiding caching, for example, would
+ // greatly simplify things.
+ LiveIntervalUnion::Query Q;
+ Q.reset(UserTag, LR, Matrix[*Units]);
+ if (Q.checkInterference())
return true;
}
return false;
diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp
index ea2075bc139d..d8d8bd5d61a2 100644
--- a/llvm/lib/CodeGen/LiveRegUnits.cpp
+++ b/llvm/lib/CodeGen/LiveRegUnits.cpp
@@ -81,8 +81,17 @@ static void addBlockLiveIns(LiveRegUnits &LiveUnits,
static void addCalleeSavedRegs(LiveRegUnits &LiveUnits,
const MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
- for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR)
- LiveUnits.addReg(*CSR);
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) {
+ const unsigned N = *CSR;
+
+ const auto &CSI = MFI.getCalleeSavedInfo();
+ auto Info =
+ llvm::find_if(CSI, [N](auto Info) { return Info.getReg() == N; });
+ // If we have no info for this callee-saved register, assume it is liveout
+ if (Info == CSI.end() || Info->isRestored())
+ LiveUnits.addReg(N);
+ }
}
void LiveRegUnits::addPristines(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index 49b880c30936..7181dbc9c870 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -67,9 +67,8 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
dbgs() << " Alive in blocks: ";
- for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
- E = AliveBlocks.end(); I != E; ++I)
- dbgs() << *I << ", ";
+ for (unsigned AB : AliveBlocks)
+ dbgs() << AB << ", ";
dbgs() << "\n Killed by:";
if (Kills.empty())
dbgs() << " No instructions.\n";
@@ -173,9 +172,8 @@ void LiveVariables::HandleVirtRegUse(Register Reg, MachineBasicBlock *MBB,
VRInfo.Kills.push_back(&MI);
// Update all dominating blocks to mark them as "known live".
- for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
- E = MBB->pred_end(); PI != E; ++PI)
- MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(Reg)->getParent(), *PI);
+ for (MachineBasicBlock *Pred : MBB->predecessors())
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(Reg)->getParent(), Pred);
}
void LiveVariables::HandleVirtRegDef(Register Reg, MachineInstr &MI) {
@@ -499,7 +497,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
void LiveVariables::runOnInstr(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
- assert(!MI.isDebugInstr());
+ assert(!MI.isDebugOrPseudoInstr());
// Process all of the operands of the instruction...
unsigned NumOperandsToProcess = MI.getNumOperands();
@@ -574,7 +572,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
DistanceMap.clear();
unsigned Dist = 0;
for (MachineInstr &MI : *MBB) {
- if (MI.isDebugInstr())
+ if (MI.isDebugOrPseudoInstr())
continue;
DistanceMap.insert(std::make_pair(&MI, Dist++));
@@ -588,19 +586,16 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
if (!PHIVarInfo[MBB->getNumber()].empty()) {
SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()];
- for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(),
- E = VarInfoVec.end(); I != E; ++I)
+ for (unsigned I : VarInfoVec)
// Mark it alive only in the block we are representing.
- MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+ MarkVirtRegAliveInBlock(getVarInfo(I), MRI->getVRegDef(I)->getParent(),
MBB);
}
// MachineCSE may CSE instructions which write to non-allocatable physical
// registers across MBBs. Remember if any reserved register is liveout.
SmallSet<unsigned, 4> LiveOuts;
- for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *SuccMBB = *SI;
+ for (const MachineBasicBlock *SuccMBB : MBB->successors()) {
if (SuccMBB->isEHPad())
continue;
for (const auto &LI : SuccMBB->liveins()) {
@@ -665,8 +660,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// function. If so, it is due to a bug in the instruction selector or some
// other part of the code generator if this happens.
#ifndef NDEBUG
- for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
- assert(Visited.contains(&*i) && "unreachable basic block found");
+ for (const MachineBasicBlock &MBB : *MF)
+ assert(Visited.contains(&MBB) && "unreachable basic block found");
#endif
PhysRegDef.clear();
@@ -779,13 +774,12 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
// Record all vreg defs and kills of all instructions in SuccBB.
for (; BBI != BBE; ++BBI) {
- for (MachineInstr::mop_iterator I = BBI->operands_begin(),
- E = BBI->operands_end(); I != E; ++I) {
- if (I->isReg() && Register::isVirtualRegister(I->getReg())) {
- if (I->isDef())
- Defs.insert(I->getReg());
- else if (I->isKill())
- Kills.insert(I->getReg());
+ for (const MachineOperand &Op : BBI->operands()) {
+ if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
+ if (Op.isDef())
+ Defs.insert(Op.getReg());
+ else if (Op.isKill())
+ Kills.insert(Op.getReg());
}
}
}
@@ -817,8 +811,8 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
const unsigned NumNew = BB->getNumber();
SparseBitVector<> &BV = LiveInSets[SuccBB->getNumber()];
- for (auto R = BV.begin(), E = BV.end(); R != E; R++) {
- Register VirtReg = Register::index2VirtReg(*R);
+ for (unsigned R : BV) {
+ Register VirtReg = Register::index2VirtReg(R);
LiveVariables::VarInfo &VI = getVarInfo(VirtReg);
VI.AliveBlocks.set(NumNew);
}
diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index ec6e693e8a46..2e99c8595cbd 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -176,9 +176,7 @@ void LocalStackSlotPass::AssignProtectedObjSet(
const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs,
MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset,
Align &MaxAlign) {
- for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
- E = UnassignedObjs.end(); I != E; ++I) {
- int i = *I;
+ for (int i : UnassignedObjs) {
AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
ProtectedObjs.insert(i);
}
diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp
index 2bda586db8c7..62e9c6b629d3 100644
--- a/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/llvm/lib/CodeGen/LowLevelType.cpp
@@ -20,11 +20,11 @@ using namespace llvm;
LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
if (auto VTy = dyn_cast<VectorType>(&Ty)) {
- auto NumElements = cast<FixedVectorType>(VTy)->getNumElements();
+ auto EC = VTy->getElementCount();
LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL);
- if (NumElements == 1)
+ if (EC.isScalar())
return ScalarTy;
- return LLT::vector(NumElements, ScalarTy);
+ return LLT::vector(EC, ScalarTy);
}
if (auto PTy = dyn_cast<PointerType>(&Ty)) {
@@ -56,8 +56,8 @@ LLT llvm::getLLTForMVT(MVT Ty) {
if (!Ty.isVector())
return LLT::scalar(Ty.getSizeInBits());
- return LLT::vector(Ty.getVectorNumElements(),
- Ty.getVectorElementType().getSizeInBits());
+ return LLT::scalarOrVector(Ty.getVectorElementCount(),
+ Ty.getVectorElementType().getSizeInBits());
}
const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
diff --git a/llvm/lib/CodeGen/MBFIWrapper.cpp b/llvm/lib/CodeGen/MBFIWrapper.cpp
index 4755defec793..efebb18c9908 100644
--- a/llvm/lib/CodeGen/MBFIWrapper.cpp
+++ b/llvm/lib/CodeGen/MBFIWrapper.cpp
@@ -11,8 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
new file mode 100644
index 000000000000..bf78594e9b23
--- /dev/null
+++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -0,0 +1,137 @@
+//===-------- MIRFSDiscriminator.cpp: Flow Sensitive Discriminator --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of a machine pass that adds the flow
+// sensitive discriminator to the instruction debug information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRFSDiscriminator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+#include <unordered_map>
+
+using namespace llvm;
+using namespace sampleprof;
+using namespace sampleprofutil;
+
+#define DEBUG_TYPE "mirfs-discriminators"
+
+char MIRAddFSDiscriminators::ID = 0;
+
+INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE,
+ "Add MIR Flow Sensitive Discriminators",
+ /* cfg = */ false, /* is_analysis = */ false)
+
+char &llvm::MIRAddFSDiscriminatorsID = MIRAddFSDiscriminators::ID;
+
+FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(FSDiscriminatorPass P) {
+ return new MIRAddFSDiscriminators(P);
+}
+
+// Compute a hash value using debug line number, and the line numbers from the
+// inline stack.
+static uint64_t getCallStackHash(const MachineBasicBlock &BB,
+ const MachineInstr &MI,
+ const DILocation *DIL) {
+ auto updateHash = [](const StringRef &Str) -> uint64_t {
+ if (Str.empty())
+ return 0;
+ return MD5Hash(Str);
+ };
+ uint64_t Ret = updateHash(std::to_string(DIL->getLine()));
+ Ret ^= updateHash(BB.getName());
+ Ret ^= updateHash(DIL->getScope()->getSubprogram()->getLinkageName());
+ for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
+ Ret ^= updateHash(std::to_string(DIL->getLine()));
+ Ret ^= updateHash(DIL->getScope()->getSubprogram()->getLinkageName());
+ }
+ return Ret;
+}
+
+// Traverse the CFG and assign FD discriminators. If two instructions
+// have the same lineno and discriminator, but residing in different BBs,
+// the latter instruction will get a new discriminator value. The new
+// discriminator keeps the existing discriminator value but sets new bits
+// b/w LowBit and HighBit.
+bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
+ if (!EnableFSDiscriminator)
+ return false;
+
+ bool Changed = false;
+ using LocationDiscriminator = std::tuple<StringRef, unsigned, unsigned>;
+ using BBSet = DenseSet<const MachineBasicBlock *>;
+ using LocationDiscriminatorBBMap = DenseMap<LocationDiscriminator, BBSet>;
+ using LocationDiscriminatorCurrPassMap =
+ DenseMap<LocationDiscriminator, unsigned>;
+
+ LocationDiscriminatorBBMap LDBM;
+ LocationDiscriminatorCurrPassMap LDCM;
+
+ // Mask of discriminators before this pass.
+ unsigned BitMaskBefore = getN1Bits(LowBit);
+ // Mask of discriminators including this pass.
+ unsigned BitMaskNow = getN1Bits(HighBit);
+ // Mask of discriminators for bits specific to this pass.
+ unsigned BitMaskThisPass = BitMaskNow ^ BitMaskBefore;
+ unsigned NumNewD = 0;
+
+ LLVM_DEBUG(dbgs() << "MIRAddFSDiscriminators working on Func: "
+ << MF.getFunction().getName() << "\n");
+ for (MachineBasicBlock &BB : MF) {
+ for (MachineInstr &I : BB) {
+ const DILocation *DIL = I.getDebugLoc().get();
+ if (!DIL)
+ continue;
+ unsigned LineNo = DIL->getLine();
+ if (LineNo == 0)
+ continue;
+ unsigned Discriminator = DIL->getDiscriminator();
+ LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator};
+ auto &BBMap = LDBM[LD];
+ auto R = BBMap.insert(&BB);
+ if (BBMap.size() == 1)
+ continue;
+
+ unsigned DiscriminatorCurrPass;
+ DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD];
+ DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit;
+ DiscriminatorCurrPass += getCallStackHash(BB, I, DIL);
+ DiscriminatorCurrPass &= BitMaskThisPass;
+ unsigned NewD = Discriminator | DiscriminatorCurrPass;
+ const auto *const NewDIL = DIL->cloneWithDiscriminator(NewD);
+ if (!NewDIL) {
+ LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
+ << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":" << Discriminator << " "
+ << I << "\n");
+ continue;
+ }
+
+ I.setDebugLoc(NewDIL);
+ NumNewD++;
+ LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ": add FS discriminator, from "
+ << Discriminator << " -> " << NewD << "\n");
+ Changed = true;
+ }
+ }
+
+ if (Changed) {
+ createFSDiscriminatorVariable(MF.getFunction().getParent());
+ LLVM_DEBUG(dbgs() << "Num of FS Discriminators: " << NumNewD << "\n");
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index b86fd6b41318..87fde7d39a60 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -226,6 +226,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset)
.Case("escape", MIToken::kw_cfi_escape)
.Case("def_cfa", MIToken::kw_cfi_def_cfa)
+ .Case("llvm_def_aspace_cfa", MIToken::kw_cfi_llvm_def_aspace_cfa)
.Case("remember_state", MIToken::kw_cfi_remember_state)
.Case("restore", MIToken::kw_cfi_restore)
.Case("restore_state", MIToken::kw_cfi_restore_state)
@@ -271,6 +272,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker)
.Case("bbsections", MIToken::kw_bbsections)
.Case("unknown-size", MIToken::kw_unknown_size)
+ .Case("unknown-address", MIToken::kw_unknown_address)
+ .Case("distinct", MIToken::kw_distinct)
.Default(MIToken::Identifier);
}
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index 452eda721331..68425b41c3fb 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -83,6 +83,7 @@ struct MIToken {
kw_cfi_adjust_cfa_offset,
kw_cfi_escape,
kw_cfi_def_cfa,
+ kw_cfi_llvm_def_aspace_cfa,
kw_cfi_register,
kw_cfi_remember_state,
kw_cfi_restore,
@@ -126,6 +127,10 @@ struct MIToken {
kw_heap_alloc_marker,
kw_bbsections,
kw_unknown_size,
+ kw_unknown_address,
+
+ // Metadata types.
+ kw_distinct,
// Named metadata keywords
md_tbaa,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index fe979b981886..34e1f9225d42 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -395,6 +395,7 @@ class MIParser {
MachineFunction &MF;
SMDiagnostic &Error;
StringRef Source, CurrentSource;
+ SMRange SourceRange;
MIToken Token;
PerFunctionMIParsingState &PFS;
/// Maps from slot numbers to function's unnamed basic blocks.
@@ -403,6 +404,8 @@ class MIParser {
public:
MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
StringRef Source);
+ MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ StringRef Source, SMRange SourceRange);
/// \p SkipChar gives the number of characters to skip before looking
/// for the next token.
@@ -428,6 +431,10 @@ public:
bool parseStandaloneRegister(Register &Reg);
bool parseStandaloneStackObject(int &FI);
bool parseStandaloneMDNode(MDNode *&Node);
+ bool parseMachineMetadata();
+ bool parseMDTuple(MDNode *&MD, bool IsDistinct);
+ bool parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts);
+ bool parseMetadata(Metadata *&MD);
bool
parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
@@ -472,6 +479,7 @@ public:
bool parseMetadataOperand(MachineOperand &Dest);
bool parseCFIOffset(int &Offset);
bool parseCFIRegister(Register &Reg);
+ bool parseCFIAddressSpace(unsigned &AddressSpace);
bool parseCFIEscapeValues(std::string& Values);
bool parseCFIOperand(MachineOperand &Dest);
bool parseIRBlock(BasicBlock *&BB, const Function &F);
@@ -549,6 +557,10 @@ private:
/// parseStringConstant
/// ::= StringConstant
bool parseStringConstant(std::string &Result);
+
+ /// Map the location in the MI string to the corresponding location specified
+ /// in `SourceRange`.
+ SMLoc mapSMLoc(StringRef::iterator Loc);
};
} // end anonymous namespace
@@ -558,6 +570,11 @@ MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
: MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS)
{}
+MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ StringRef Source, SMRange SourceRange)
+ : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source),
+ SourceRange(SourceRange), PFS(PFS) {}
+
void MIParser::lex(unsigned SkipChar) {
CurrentSource = lexMIToken(
CurrentSource.slice(SkipChar, StringRef::npos), Token,
@@ -583,6 +600,13 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
return true;
}
+SMLoc MIParser::mapSMLoc(StringRef::iterator Loc) {
+ assert(SourceRange.isValid() && "Invalid source range");
+ assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
+ return SMLoc::getFromPointer(SourceRange.Start.getPointer() +
+ (Loc - Source.data()));
+}
+
typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
ErrorCallbackType;
@@ -987,7 +1011,9 @@ bool MIParser::parse(MachineInstr *&MI) {
Optional<unsigned> TiedDefIdx;
if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))
return true;
- if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg())
+ if ((OpCode == TargetOpcode::DBG_VALUE ||
+ OpCode == TargetOpcode::DBG_VALUE_LIST) &&
+ MO.isReg())
MO.setIsDebug();
Operands.push_back(
ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
@@ -1168,6 +1194,130 @@ bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
return false;
}
+bool MIParser::parseMachineMetadata() {
+ lex();
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected a metadata node");
+
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected metadata id after '!'");
+ unsigned ID = 0;
+ if (getUnsigned(ID))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::equal))
+ return true;
+ bool IsDistinct = Token.is(MIToken::kw_distinct);
+ if (IsDistinct)
+ lex();
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected a metadata node");
+ lex();
+
+ MDNode *MD;
+ if (parseMDTuple(MD, IsDistinct))
+ return true;
+
+ auto FI = PFS.MachineForwardRefMDNodes.find(ID);
+ if (FI != PFS.MachineForwardRefMDNodes.end()) {
+ FI->second.first->replaceAllUsesWith(MD);
+ PFS.MachineForwardRefMDNodes.erase(FI);
+
+ assert(PFS.MachineMetadataNodes[ID] == MD && "Tracking VH didn't work");
+ } else {
+ if (PFS.MachineMetadataNodes.count(ID))
+ return error("Metadata id is already used");
+ PFS.MachineMetadataNodes[ID].reset(MD);
+ }
+
+ return false;
+}
+
+bool MIParser::parseMDTuple(MDNode *&MD, bool IsDistinct) {
+ SmallVector<Metadata *, 16> Elts;
+ if (parseMDNodeVector(Elts))
+ return true;
+ MD = (IsDistinct ? MDTuple::getDistinct
+ : MDTuple::get)(MF.getFunction().getContext(), Elts);
+ return false;
+}
+
+bool MIParser::parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts) {
+ if (Token.isNot(MIToken::lbrace))
+ return error("expected '{' here");
+ lex();
+
+ if (Token.is(MIToken::rbrace)) {
+ lex();
+ return false;
+ }
+
+ do {
+ Metadata *MD;
+ if (parseMetadata(MD))
+ return true;
+
+ Elts.push_back(MD);
+
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ } while (true);
+
+ if (Token.isNot(MIToken::rbrace))
+ return error("expected end of metadata node");
+ lex();
+
+ return false;
+}
+
+// ::= !42
+// ::= !"string"
+bool MIParser::parseMetadata(Metadata *&MD) {
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected '!' here");
+ lex();
+
+ if (Token.is(MIToken::StringConstant)) {
+ std::string Str;
+ if (parseStringConstant(Str))
+ return true;
+ MD = MDString::get(MF.getFunction().getContext(), Str);
+ return false;
+ }
+
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected metadata id after '!'");
+
+ SMLoc Loc = mapSMLoc(Token.location());
+
+ unsigned ID = 0;
+ if (getUnsigned(ID))
+ return true;
+ lex();
+
+ auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID);
+ if (NodeInfo != PFS.IRSlots.MetadataNodes.end()) {
+ MD = NodeInfo->second.get();
+ return false;
+ }
+ // Check machine metadata.
+ NodeInfo = PFS.MachineMetadataNodes.find(ID);
+ if (NodeInfo != PFS.MachineMetadataNodes.end()) {
+ MD = NodeInfo->second.get();
+ return false;
+ }
+ // Forward reference.
+ auto &FwdRef = PFS.MachineForwardRefMDNodes[ID];
+ FwdRef = std::make_pair(
+ MDTuple::getTemporary(MF.getFunction().getContext(), None), Loc);
+ PFS.MachineMetadataNodes[ID].reset(FwdRef.first.get());
+ MD = FwdRef.first.get();
+
+ return false;
+}
+
static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
assert(MO.isImplicit());
return MO.isDef() ? "implicit-def" : "implicit";
@@ -1726,7 +1876,7 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
return error(Loc, "expected <M x sN> or <M x pA> for vector type");
lex();
- Ty = LLT::vector(NumElements, Ty);
+ Ty = LLT::fixed_vector(NumElements, Ty);
return false;
}
@@ -2010,8 +2160,11 @@ bool MIParser::parseMDNode(MDNode *&Node) {
if (getUnsigned(ID))
return true;
auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID);
- if (NodeInfo == PFS.IRSlots.MetadataNodes.end())
- return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'");
+ if (NodeInfo == PFS.IRSlots.MetadataNodes.end()) {
+ NodeInfo = PFS.MachineMetadataNodes.find(ID);
+ if (NodeInfo == PFS.MachineMetadataNodes.end())
+ return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'");
+ }
lex();
Node = NodeInfo->second.get();
return false;
@@ -2205,6 +2358,16 @@ bool MIParser::parseCFIRegister(Register &Reg) {
return false;
}
+bool MIParser::parseCFIAddressSpace(unsigned &AddressSpace) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected a cfi address space literal");
+ if (Token.integerValue().isSigned())
+ return error("expected an unsigned integer (cfi address space)");
+ AddressSpace = Token.integerValue().getZExtValue();
+ lex();
+ return false;
+}
+
bool MIParser::parseCFIEscapeValues(std::string &Values) {
do {
if (Token.isNot(MIToken::HexLiteral))
@@ -2225,6 +2388,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
lex();
int Offset;
Register Reg;
+ unsigned AddressSpace;
unsigned CFIIndex;
switch (Kind) {
case MIToken::kw_cfi_same_value:
@@ -2271,6 +2435,14 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
CFIIndex =
MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, Offset));
break;
+ case MIToken::kw_cfi_llvm_def_aspace_cfa:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset) || expectAndConsume(MIToken::comma) ||
+ parseCFIAddressSpace(AddressSpace))
+ return true;
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMDefAspaceCfa(
+ nullptr, Reg, Offset, AddressSpace));
+ break;
case MIToken::kw_cfi_remember_state:
CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
break;
@@ -2618,6 +2790,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
case MIToken::kw_cfi_adjust_cfa_offset:
case MIToken::kw_cfi_escape:
case MIToken::kw_cfi_def_cfa:
+ case MIToken::kw_cfi_llvm_def_aspace_cfa:
case MIToken::kw_cfi_register:
case MIToken::kw_cfi_remember_state:
case MIToken::kw_cfi_restore:
@@ -2788,6 +2961,9 @@ static bool parseIRValue(const MIToken &Token, PerFunctionMIParsingState &PFS,
V = C;
break;
}
+ case MIToken::kw_unknown_address:
+ V = nullptr;
+ return false;
default:
llvm_unreachable("The current token should be an IR block reference");
}
@@ -2948,12 +3124,13 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) &&
Token.isNot(MIToken::GlobalValue) &&
Token.isNot(MIToken::NamedGlobalValue) &&
- Token.isNot(MIToken::QuotedIRValue))
+ Token.isNot(MIToken::QuotedIRValue) &&
+ Token.isNot(MIToken::kw_unknown_address))
return error("expected an IR value reference");
const Value *V = nullptr;
if (parseIRValue(V))
return true;
- if (!V->getType()->isPointerTy())
+ if (V && !V->getType()->isPointerTy())
return error("expected a pointer IR value");
lex();
int64_t Offset = 0;
@@ -3041,18 +3218,34 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseOptionalAtomicOrdering(FailureOrder))
return true;
+ LLT MemoryType;
if (Token.isNot(MIToken::IntegerLiteral) &&
- Token.isNot(MIToken::kw_unknown_size))
- return error("expected the size integer literal or 'unknown-size' after "
+ Token.isNot(MIToken::kw_unknown_size) &&
+ Token.isNot(MIToken::lparen))
+ return error("expected memory LLT, the size integer literal or 'unknown-size' after "
"memory operation");
- uint64_t Size;
+
+ uint64_t Size = MemoryLocation::UnknownSize;
if (Token.is(MIToken::IntegerLiteral)) {
if (getUint64(Size))
return true;
+
+ // Convert from bytes to bits for storage.
+ MemoryType = LLT::scalar(8 * Size);
+ lex();
} else if (Token.is(MIToken::kw_unknown_size)) {
Size = MemoryLocation::UnknownSize;
+ lex();
+ } else {
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (parseLowLevelType(Token.location(), MemoryType))
+ return true;
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ Size = MemoryType.getSizeInBytes();
}
- lex();
MachinePointerInfo Ptr = MachinePointerInfo();
if (Token.is(MIToken::Identifier)) {
@@ -3068,7 +3261,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseMachinePointerInfo(Ptr))
return true;
}
- unsigned BaseAlignment = (Size != MemoryLocation::UnknownSize ? Size : 1);
+ unsigned BaseAlignment =
+ (Size != MemoryLocation::UnknownSize ? PowerOf2Ceil(Size) : 1);
AAMDNodes AAInfo;
MDNode *Range = nullptr;
while (consumeIfPresent(MIToken::comma)) {
@@ -3115,8 +3309,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
}
if (expectAndConsume(MIToken::rparen))
return true;
- Dest = MF.getMachineMemOperand(Ptr, Flags, Size, Align(BaseAlignment), AAInfo,
- Range, SSID, Order, FailureOrder);
+ Dest = MF.getMachineMemOperand(Ptr, Flags, MemoryType, Align(BaseAlignment),
+ AAInfo, Range, SSID, Order, FailureOrder);
return false;
}
@@ -3252,6 +3446,11 @@ bool llvm::parseMDNode(PerFunctionMIParsingState &PFS,
return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
}
+bool llvm::parseMachineMetadata(PerFunctionMIParsingState &PFS, StringRef Src,
+ SMRange SrcRange, SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src, SrcRange).parseMachineMetadata();
+}
+
bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF,
PerFunctionMIParsingState &PFS, const Value *&V,
ErrorCallbackType ErrorCallback) {
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index ffa9aeb21edb..d77104752880 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -51,9 +51,9 @@ namespace llvm {
/// file.
class MIRParserImpl {
SourceMgr SM;
+ LLVMContext &Context;
yaml::Input In;
StringRef Filename;
- LLVMContext &Context;
SlotMapping IRSlots;
std::unique_ptr<PerTargetMIParsingState> Target;
@@ -143,6 +143,10 @@ public:
bool initializeJumpTableInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineJumpTable &YamlJTI);
+ bool parseMachineMetadataNodes(PerFunctionMIParsingState &PFS,
+ MachineFunction &MF,
+ const yaml::MachineFunction &YMF);
+
private:
bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node,
const yaml::StringValue &Source);
@@ -151,6 +155,9 @@ private:
MachineBasicBlock *&MBB,
const yaml::StringValue &Source);
+ bool parseMachineMetadata(PerFunctionMIParsingState &PFS,
+ const yaml::StringValue &Source);
+
/// Return a MIR diagnostic converted from an MI string diagnostic.
SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error,
SMRange SourceRange);
@@ -176,10 +183,11 @@ MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
StringRef Filename, LLVMContext &Context,
std::function<void(Function &)> Callback)
: SM(),
+ Context(Context),
In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))
->getBuffer(),
nullptr, handleYAMLDiag, this),
- Filename(Filename), Context(Context), ProcessIRFunction(Callback) {
+ Filename(Filename), ProcessIRFunction(Callback) {
In.setContext(&In);
}
@@ -417,8 +425,8 @@ void MIRParserImpl::setupDebugValueTracking(
// Load any substitutions.
for (auto &Sub : YamlMF.DebugValueSubstitutions) {
- MF.makeDebugValueSubstitution(std::make_pair(Sub.SrcInst, Sub.SrcOp),
- std::make_pair(Sub.DstInst, Sub.DstOp));
+ MF.makeDebugValueSubstitution({Sub.SrcInst, Sub.SrcOp},
+ {Sub.DstInst, Sub.DstOp}, Sub.Subreg);
}
}
@@ -456,6 +464,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (initializeConstantPool(PFS, *ConstantPool, YamlMF))
return true;
}
+ if (!YamlMF.MachineMetadataNodes.empty() &&
+ parseMachineMetadataNodes(PFS, MF, YamlMF))
+ return true;
StringRef BlockStr = YamlMF.Body.Value.Value;
SMDiagnostic Error;
@@ -646,10 +657,9 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
}
};
- for (auto I = PFS.VRegInfosNamed.begin(), E = PFS.VRegInfosNamed.end();
- I != E; I++) {
- const VRegInfo &Info = *I->second;
- populateVRegInfo(Info, Twine(I->first()));
+ for (const auto &P : PFS.VRegInfosNamed) {
+ const VRegInfo &Info = *P.second;
+ populateVRegInfo(Info, Twine(P.first()));
}
for (auto P : PFS.VRegInfos) {
@@ -700,6 +710,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
MFI.setHasVAStart(YamlMFI.HasVAStart);
MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
+ MFI.setHasTailCall(YamlMFI.HasTailCall);
MFI.setLocalFrameSize(YamlMFI.LocalFrameSize);
if (!YamlMFI.SavePoint.Value.empty()) {
MachineBasicBlock *MBB = nullptr;
@@ -919,6 +930,29 @@ bool MIRParserImpl::parseMBBReference(PerFunctionMIParsingState &PFS,
return false;
}
+bool MIRParserImpl::parseMachineMetadata(PerFunctionMIParsingState &PFS,
+ const yaml::StringValue &Source) {
+ SMDiagnostic Error;
+ if (llvm::parseMachineMetadata(PFS, Source.Value, Source.SourceRange, Error))
+ return error(Error, Source.SourceRange);
+ return false;
+}
+
+bool MIRParserImpl::parseMachineMetadataNodes(
+ PerFunctionMIParsingState &PFS, MachineFunction &MF,
+ const yaml::MachineFunction &YMF) {
+ for (auto &MDS : YMF.MachineMetadataNodes) {
+ if (parseMachineMetadata(PFS, MDS))
+ return true;
+ }
+ // Report missing definitions from forward referenced nodes.
+ if (!PFS.MachineForwardRefMDNodes.empty())
+ return error(PFS.MachineForwardRefMDNodes.begin()->second.second,
+ "use of undefined metadata '!" +
+ Twine(PFS.MachineForwardRefMDNodes.begin()->first) + "'");
+ return false;
+}
+
SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
SMRange SourceRange) {
assert(SourceRange.isValid() && "Invalid source range");
@@ -983,7 +1017,7 @@ bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(
StringRef Filename, SMDiagnostic &Error, LLVMContext &Context,
std::function<void(Function &)> ProcessIRFunction) {
- auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
+ auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
if (std::error_code EC = FileOrErr.getError()) {
Error = SMDiagnostic(Filename, SourceMgr::DK_Error,
"Could not open input file: " + EC.message());
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index eae174019b56..2a78bb62762a 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -29,13 +29,14 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleSlotTracker.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -135,6 +136,9 @@ public:
void convertCallSiteObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF,
ModuleSlotTracker &MST);
+ void convertMachineMetadataNodes(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ MachineModuleSlotTracker &MST);
private:
void initRegisterMaskIds(const MachineFunction &MF);
@@ -215,15 +219,19 @@ void MIRPrinter::print(const MachineFunction &MF) {
MachineFunctionProperties::Property::FailedISel);
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
- ModuleSlotTracker MST(MF.getFunction().getParent());
+ MachineModuleSlotTracker MST(&MF);
MST.incorporateFunction(MF.getFunction());
convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
convertStackObjects(YamlMF, MF, MST);
convertCallSiteObjects(YamlMF, MF, MST);
- for (auto &Sub : MF.DebugValueSubstitutions)
- YamlMF.DebugValueSubstitutions.push_back({Sub.first.first, Sub.first.second,
- Sub.second.first,
- Sub.second.second});
+ for (const auto &Sub : MF.DebugValueSubstitutions) {
+ const auto &SubSrc = Sub.Src;
+ const auto &SubDest = Sub.Dest;
+ YamlMF.DebugValueSubstitutions.push_back({SubSrc.first, SubSrc.second,
+ SubDest.first,
+ SubDest.second,
+ Sub.Subreg});
+ }
if (const auto *ConstantPool = MF.getConstantPool())
convert(YamlMF, *ConstantPool);
if (const auto *JumpTableInfo = MF.getJumpTableInfo())
@@ -243,6 +251,10 @@ void MIRPrinter::print(const MachineFunction &MF) {
IsNewlineNeeded = true;
}
StrOS.flush();
+ // Convert machine metadata collected during the print of the machine
+ // function.
+ convertMachineMetadataNodes(YamlMF, MF, MST);
+
yaml::Output Out(OS);
if (!SimplifyMIR)
Out.setWriteDefaultValues(true);
@@ -351,6 +363,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
YamlMFI.HasVAStart = MFI.hasVAStart();
YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
+ YamlMFI.HasTailCall = MFI.hasTailCall();
YamlMFI.LocalFrameSize = MFI.getLocalFrameSize();
if (MFI.getSavePoint()) {
raw_string_ostream StrOS(YamlMFI.SavePoint.Value);
@@ -524,6 +537,19 @@ void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF,
});
}
+void MIRPrinter::convertMachineMetadataNodes(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ MachineModuleSlotTracker &MST) {
+ MachineModuleSlotTracker::MachineMDNodeListType MDList;
+ MST.collectMachineMDNodes(MDList);
+ for (auto &MD : MDList) {
+ std::string NS;
+ raw_string_ostream StrOS(NS);
+ MD.second->print(StrOS, MST, MF.getFunction().getParent());
+ YMF.MachineMetadataNodes.push_back(StrOS.str());
+ }
+}
+
void MIRPrinter::convert(yaml::MachineFunction &MF,
const MachineConstantPool &ConstantPool) {
unsigned ID = 0;
diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index 3d4f66f31174..5862504109f0 100644
--- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -125,7 +125,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
MIOperands.push_back((unsigned)Op->getSize());
MIOperands.push_back((unsigned)Op->getFlags());
MIOperands.push_back((unsigned)Op->getOffset());
- MIOperands.push_back((unsigned)Op->getOrdering());
+ MIOperands.push_back((unsigned)Op->getSuccessOrdering());
MIOperands.push_back((unsigned)Op->getAddrSpace());
MIOperands.push_back((unsigned)Op->getSyncScopeID());
MIOperands.push_back((unsigned)Op->getBaseAlign().value());
diff --git a/llvm/lib/CodeGen/MIRYamlMapping.cpp b/llvm/lib/CodeGen/MIRYamlMapping.cpp
new file mode 100644
index 000000000000..b1a538cad8a0
--- /dev/null
+++ b/llvm/lib/CodeGen/MIRYamlMapping.cpp
@@ -0,0 +1,43 @@
+//===- MIRYamlMapping.cpp - Describe mapping between MIR and YAML ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mapping between various MIR data structures and
+// their corresponding YAML representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
+
+using namespace llvm;
+using namespace llvm::yaml;
+
+FrameIndex::FrameIndex(int FI, const llvm::MachineFrameInfo &MFI) {
+ IsFixed = MFI.isFixedObjectIndex(FI);
+ if (IsFixed)
+ FI -= MFI.getObjectIndexBegin();
+ this->FI = FI;
+}
+
+// Returns the value and if the frame index is fixed or not.
+Expected<int> FrameIndex::getFI(const llvm::MachineFrameInfo &MFI) const {
+ int FI = this->FI;
+ if (IsFixed) {
+ if (unsigned(FI) >= MFI.getNumFixedObjects())
+ return make_error<StringError>(
+ formatv("invalid fixed frame index {0}", FI).str(),
+ inconvertibleErrorCode());
+ FI += MFI.getObjectIndexBegin();
+ }
+ if (unsigned(FI + MFI.getNumFixedObjects()) >= MFI.getNumObjects())
+ return make_error<StringError>(formatv("invalid frame index {0}", FI).str(),
+ inconvertibleErrorCode());
+ return FI;
+}
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index b4187af02975..c6914dcd0e54 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
@@ -87,6 +88,17 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
return CachedMCSymbol;
}
+MCSymbol *MachineBasicBlock::getEHCatchretSymbol() const {
+ if (!CachedEHCatchretMCSymbol) {
+ const MachineFunction *MF = getParent();
+ SmallString<128> SymbolName;
+ raw_svector_ostream(SymbolName)
+ << "$ehgcr_" << MF->getFunctionNumber() << '_' << getNumber();
+ CachedEHCatchretMCSymbol = MF->getContext().getOrCreateSymbol(SymbolName);
+ }
+ return CachedEHCatchretMCSymbol;
+}
+
MCSymbol *MachineBasicBlock::getEndSymbol() const {
if (!CachedEndMCSymbol) {
const MachineFunction *MF = getParent();
@@ -210,11 +222,13 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
}
MachineBasicBlock::iterator
-MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
+MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I,
+ bool SkipPseudoOp) {
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
iterator E = end();
while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() ||
+ (SkipPseudoOp && I->isPseudoProbe()) ||
TII->isBasicBlockPrologue(*I)))
++I;
// FIXME: This needs to change if we wish to bundle labels / dbg_values
@@ -243,12 +257,14 @@ MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
return I;
}
-MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr() {
+MachineBasicBlock::iterator
+MachineBasicBlock::getFirstNonDebugInstr(bool SkipPseudoOp) {
// Skip over begin-of-block dbg_value instructions.
- return skipDebugInstructionsForward(begin(), end());
+ return skipDebugInstructionsForward(begin(), end(), SkipPseudoOp);
}
-MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+MachineBasicBlock::iterator
+MachineBasicBlock::getLastNonDebugInstr(bool SkipPseudoOp) {
// Skip over end-of-block dbg_value instructions.
instr_iterator B = instr_begin(), I = instr_end();
while (I != B) {
@@ -256,6 +272,8 @@ MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
// Return instruction that starts a bundle.
if (I->isDebugInstr() || I->isInsideBundle())
continue;
+ if (SkipPseudoOp && I->isPseudoProbe())
+ continue;
return I;
}
// The block is all debug values.
@@ -1075,10 +1093,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
I != E; ++I)
NewTerminators.push_back(&*I);
- for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
- E = Terminators.end(); I != E; ++I) {
- if (!is_contained(NewTerminators, *I))
- Indexes->removeMachineInstrFromMaps(**I);
+ for (MachineInstr *Terminator : Terminators) {
+ if (!is_contained(NewTerminators, Terminator))
+ Indexes->removeMachineInstrFromMaps(*Terminator);
}
}
@@ -1361,6 +1378,14 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
return {};
}
+DebugLoc MachineBasicBlock::rfindDebugLoc(reverse_instr_iterator MBBI) {
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ MBBI = skipDebugInstructionsBackward(MBBI, instr_rbegin());
+ if (!MBBI->isDebugInstr())
+ return MBBI->getDebugLoc();
+ return {};
+}
+
/// Find the previous valid DebugLoc preceding MBBI, skipping and DBG_VALUE
/// instructions. Return UnknownLoc if there is none.
DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) {
@@ -1371,6 +1396,16 @@ DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) {
return {};
}
+DebugLoc MachineBasicBlock::rfindPrevDebugLoc(reverse_instr_iterator MBBI) {
+ if (MBBI == instr_rend())
+ return {};
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ MBBI = next_nodbg(MBBI, instr_rend());
+ if (MBBI != instr_rend())
+ return MBBI->getDebugLoc();
+ return {};
+}
+
/// Find and return the merged DebugLoc of the branch instructions of the block.
/// Return UnknownLoc if there is none.
DebugLoc
@@ -1455,7 +1490,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
// Try searching forwards from Before, looking for reads or defs.
const_iterator I(Before);
for (; I != end() && N > 0; ++I) {
- if (I->isDebugInstr())
+ if (I->isDebugOrPseudoInstr())
continue;
--N;
@@ -1493,7 +1528,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
do {
--I;
- if (I->isDebugInstr())
+ if (I->isDebugOrPseudoInstr())
continue;
--N;
@@ -1527,7 +1562,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
// If all the instructions before this in the block are debug instructions,
// skip over them.
- while (I != begin() && std::prev(I)->isDebugInstr())
+ while (I != begin() && std::prev(I)->isDebugOrPseudoInstr())
--I;
// Did we get to the start of the block?
@@ -1569,6 +1604,23 @@ MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
return LiveIns.begin();
}
+MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const {
+ const MachineFunction &MF = *getParent();
+ assert(MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksLiveness) &&
+ "Liveness information is accurate");
+
+ const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+ MCPhysReg ExceptionPointer = 0, ExceptionSelector = 0;
+ if (MF.getFunction().hasPersonalityFn()) {
+ auto PersonalityFn = MF.getFunction().getPersonalityFn();
+ ExceptionPointer = TLI.getExceptionPointerRegister(PersonalityFn);
+ ExceptionSelector = TLI.getExceptionSelectorRegister(PersonalityFn);
+ }
+
+ return liveout_iterator(*this, ExceptionPointer, ExceptionSelector, false);
+}
+
const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold);
const MBBSectionID
MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception);
diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 54e0a14e0555..c569f0350366 100644
--- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -29,6 +29,7 @@ using namespace llvm;
#define DEBUG_TYPE "machine-block-freq"
+namespace llvm {
static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"view-machine-block-freq-propagation-dags", cl::Hidden,
cl::desc("Pop up a window to show a dag displaying how machine block "
@@ -75,6 +76,7 @@ static cl::opt<bool> PrintMachineBlockFreq(
// Command line option to specify the name of the function for block frequency
// dump. Defined in Analysis/BlockFrequencyInfo.cpp.
extern cl::opt<std::string> PrintBlockFreqFuncName;
+} // namespace llvm
static GVDAGType getGVDT() {
if (ViewBlockLayoutWithBFI != GVDT_None)
@@ -231,14 +233,20 @@ MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const {
Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
const MachineBasicBlock *MBB) const {
+ if (!MBFI)
+ return None;
+
const Function &F = MBFI->getFunction()->getFunction();
- return MBFI ? MBFI->getBlockProfileCount(F, MBB) : None;
+ return MBFI->getBlockProfileCount(F, MBB);
}
Optional<uint64_t>
MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
+ if (!MBFI)
+ return None;
+
const Function &F = MBFI->getFunction()->getFunction();
- return MBFI ? MBFI->getProfileCountFromFreq(F, Freq) : None;
+ return MBFI->getProfileCountFromFreq(F, Freq);
}
bool MachineBlockFrequencyInfo::isIrrLoopHeader(
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 048baa460e49..f61142d202eb 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -193,6 +193,7 @@ static cl::opt<unsigned> TriangleChainCount(
cl::init(2),
cl::Hidden);
+namespace llvm {
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
@@ -204,6 +205,7 @@ extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
// Command line option to specify the name of the function for CFG dump
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+} // namespace llvm
namespace {
@@ -3337,6 +3339,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TailDupSize = TailDupPlacementAggressiveThreshold;
}
+ // If there's no threshold provided through options, query the target
+ // information for a threshold instead.
+ if (TailDupPlacementThreshold.getNumOccurrences() == 0 &&
+ (PassConfig->getOptLevel() < CodeGenOpt::Aggressive ||
+ TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0))
+ TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
+
if (allowTailDupPlacement()) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
bool OptForSize = MF.getFunction().hasOptSize() ||
diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index f1d68c79a212..c9f762f9a6e7 100644
--- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -25,6 +25,7 @@ INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
"Machine Branch Probability Analysis", false, true)
+namespace llvm {
cl::opt<unsigned>
StaticLikelyProb("static-likely-prob",
cl::desc("branch probability threshold in percentage"
@@ -36,6 +37,7 @@ cl::opt<unsigned> ProfileLikelyProb(
cl::desc("branch probability threshold in percentage to be considered"
" very likely when profile is available"),
cl::init(51), cl::Hidden);
+} // namespace llvm
char MachineBranchProbabilityInfo::ID = 0;
@@ -66,26 +68,6 @@ bool MachineBranchProbabilityInfo::isEdgeHot(
return getEdgeProbability(Src, Dst) > HotProb;
}
-MachineBasicBlock *
-MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
- auto MaxProb = BranchProbability::getZero();
- MachineBasicBlock *MaxSucc = nullptr;
- for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I) {
- auto Prob = getEdgeProbability(MBB, I);
- if (Prob > MaxProb) {
- MaxProb = Prob;
- MaxSucc = *I;
- }
- }
-
- BranchProbability HotProb(StaticLikelyProb, 100);
- if (getEdgeProbability(MBB, MaxSucc) >= HotProb)
- return MaxSucc;
-
- return nullptr;
-}
-
raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability(
raw_ostream &OS, const MachineBasicBlock *Src,
const MachineBasicBlock *Dst) const {
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index 199fe2dc6454..cb2e18e8c813 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -588,6 +588,23 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Examining: " << *MI);
LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+ // Prevent CSE-ing non-local convergent instructions.
+ // LLVM's current definition of `isConvergent` does not necessarily prove
+ // that non-local CSE is illegal. The following check extends the definition
+ // of `isConvergent` to assume a convergent instruction is dependent not
+ // only on additional conditions, but also on fewer conditions. LLVM does
+ // not have a MachineInstr attribute which expresses this extended
+ // definition, so it's necessary to use `isConvergent` to prevent illegally
+ // CSE-ing the subset of `isConvergent` instructions which do fall into this
+ // extended definition.
+ if (MI->isConvergent() && MI->getParent() != CSMI->getParent()) {
+ LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
+ "different BBs, avoid CSE!\n");
+ VNT.insert(MI, CurrVN++);
+ Exps.push_back(MI);
+ continue;
+ }
+
// Check if it's profitable to perform this CSE.
bool DoCSE = true;
unsigned NumDefs = MI->getNumDefs();
@@ -820,6 +837,15 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
if (BB != nullptr && BB1 != nullptr &&
(isPotentiallyReachable(BB1, BB) ||
isPotentiallyReachable(BB, BB1))) {
+ // The following check extends the definition of `isConvergent` to
+ // assume a convergent instruction is dependent not only on additional
+ // conditions, but also on fewer conditions. LLVM does not have a
+ // MachineInstr attribute which expresses this extended definition, so
+ // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
+ // subset of `isConvergent` instructions which do fall into this
+ // extended definition.
+ if (MI->isConvergent() && CMBB != MBB)
+ continue;
assert(MI->getOperand(0).isDef() &&
"First operand of instr with one explicit def must be this def");
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index d8659c1c7853..10b74f5f47f5 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -294,7 +294,7 @@ private:
SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
/// Multimap tracking debug users in current BB
- DenseMap<MachineInstr*, SmallVector<MachineInstr*, 2>> CopyDbgUsers;
+ DenseMap<MachineInstr *, SmallSet<MachineInstr *, 2>> CopyDbgUsers;
CopyTracker Tracker;
@@ -321,7 +321,7 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
MaybeDeadCopies.remove(Copy);
} else {
- CopyDbgUsers[Copy].push_back(&Reader);
+ CopyDbgUsers[Copy].insert(&Reader);
}
}
}
@@ -734,7 +734,11 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Update matching debug values, if any.
assert(MaybeDead->isCopy());
Register SrcReg = MaybeDead->getOperand(1).getReg();
- MRI->updateDbgUsersToReg(SrcReg, CopyDbgUsers[MaybeDead]);
+ Register DestReg = MaybeDead->getOperand(0).getReg();
+ SmallVector<MachineInstr *> MaybeDeadDbgUsers(
+ CopyDbgUsers[MaybeDead].begin(), CopyDbgUsers[MaybeDead].end());
+ MRI->updateDbgUsersToReg(DestReg.asMCReg(), SrcReg.asMCReg(),
+ MaybeDeadDbgUsers);
MaybeDead->eraseFromParent();
Changed = true;
@@ -866,12 +870,32 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
if (MO.isDef())
Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
- if (MO.readsReg())
- Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
+ if (MO.readsReg()) {
+ if (MO.isDebug()) {
+ // Check if the register in the debug instruction is utilized
+ // in a copy instruction, so we can update the debug info if the
+ // register is changed.
+ for (MCRegUnitIterator RUI(MO.getReg().asMCReg(), TRI); RUI.isValid();
+ ++RUI) {
+ if (auto *Copy = Tracker.findCopyDefViaUnit(*RUI, *TRI)) {
+ CopyDbgUsers[Copy].insert(MI);
+ }
+ }
+ } else {
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
+ }
+ }
}
}
for (auto *Copy : MaybeDeadCopies) {
+
+ Register Src = Copy->getOperand(1).getReg();
+ Register Def = Copy->getOperand(0).getReg();
+ SmallVector<MachineInstr *> MaybeDeadDbgUsers(CopyDbgUsers[Copy].begin(),
+ CopyDbgUsers[Copy].end());
+
+ MRI->updateDbgUsersToReg(Src.asMCReg(), Def.asMCReg(), MaybeDeadDbgUsers);
Copy->eraseFromParent();
++NumDeletes;
}
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index 7ba27ff1c856..ca5936a14779 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -173,7 +173,7 @@ uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
// value.
Align StackAlign;
if (adjustsStack() || hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+ (RegInfo->hasStackRealignment(MF) && getObjectIndexEnd() != 0))
StackAlign = TFI->getStackAlign();
else
StackAlign = TFI->getTransientStackAlign();
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 3f44578b1a2c..0a454b68aca3 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -438,15 +438,34 @@ MachineMemOperand *MachineFunction::getMachineMemOperand(
}
MachineMemOperand *MachineFunction::getMachineMemOperand(
- const MachineMemOperand *MMO, MachinePointerInfo &PtrInfo, uint64_t Size) {
- return new (Allocator) MachineMemOperand(
- PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(), AAMDNodes(), nullptr,
- MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering());
+ MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy,
+ Align base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
+ SyncScope::ID SSID, AtomicOrdering Ordering,
+ AtomicOrdering FailureOrdering) {
+ return new (Allocator)
+ MachineMemOperand(PtrInfo, f, MemTy, base_alignment, AAInfo, Ranges, SSID,
+ Ordering, FailureOrdering);
+}
+
+MachineMemOperand *MachineFunction::getMachineMemOperand(
+ const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, uint64_t Size) {
+ return new (Allocator)
+ MachineMemOperand(PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(),
+ AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+ MMO->getSuccessOrdering(), MMO->getFailureOrdering());
+}
+
+MachineMemOperand *MachineFunction::getMachineMemOperand(
+ const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, LLT Ty) {
+ return new (Allocator)
+ MachineMemOperand(PtrInfo, MMO->getFlags(), Ty, MMO->getBaseAlign(),
+ AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+ MMO->getSuccessOrdering(), MMO->getFailureOrdering());
}
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
- int64_t Offset, uint64_t Size) {
+ int64_t Offset, LLT Ty) {
const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
// If there is no pointer value, the offset isn't tracked so we need to adjust
@@ -457,10 +476,10 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
// Do not preserve ranges, since we don't necessarily know what the high bits
// are anymore.
- return new (Allocator)
- MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size,
- Alignment, MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(),
- MMO->getOrdering(), MMO->getFailureOrdering());
+ return new (Allocator) MachineMemOperand(
+ PtrInfo.getWithOffset(Offset), MMO->getFlags(), Ty, Alignment,
+ MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(),
+ MMO->getSuccessOrdering(), MMO->getFailureOrdering());
}
MachineMemOperand *
@@ -472,7 +491,7 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
return new (Allocator) MachineMemOperand(
MPI, MMO->getFlags(), MMO->getSize(), MMO->getBaseAlign(), AAInfo,
- MMO->getRanges(), MMO->getSyncScopeID(), MMO->getOrdering(),
+ MMO->getRanges(), MMO->getSyncScopeID(), MMO->getSuccessOrdering(),
MMO->getFailureOrdering());
}
@@ -482,7 +501,7 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
return new (Allocator) MachineMemOperand(
MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlign(),
MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
- MMO->getOrdering(), MMO->getFailureOrdering());
+ MMO->getSuccessOrdering(), MMO->getFailureOrdering());
}
MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo(
@@ -850,9 +869,8 @@ int MachineFunction::getFilterIDFor(std::vector<unsigned> &TyIds) {
// If the new filter coincides with the tail of an existing filter, then
// re-use the existing filter. Folding filters more than this requires
// re-ordering filters and/or their elements - probably not worth it.
- for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
- E = FilterEnds.end(); I != E; ++I) {
- unsigned i = *I, j = TyIds.size();
+ for (unsigned i : FilterEnds) {
+ unsigned j = TyIds.size();
while (i && j)
if (FilterIds[--i] != TyIds[--j])
@@ -951,10 +969,11 @@ void MachineFunction::setDebugInstrNumberingCount(unsigned Num) {
}
void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A,
- DebugInstrOperandPair B) {
- auto Result = DebugValueSubstitutions.insert(std::make_pair(A, B));
- (void)Result;
- assert(Result.second && "Substitution for an already substituted value?");
+ DebugInstrOperandPair B,
+ unsigned Subreg) {
+ // Catch any accidental self-loops.
+ assert(A.first != B.first);
+ DebugValueSubstitutions.push_back({A, B, Subreg});
}
void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old,
@@ -971,7 +990,7 @@ void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old,
// MIR output.
// Examine all the operands, or the first N specified by the caller.
MaxOperand = std::min(MaxOperand, Old.getNumOperands());
- for (unsigned int I = 0; I < Old.getNumOperands(); ++I) {
+ for (unsigned int I = 0; I < MaxOperand; ++I) {
const auto &OldMO = Old.getOperand(I);
auto &NewMO = New.getOperand(I);
(void)NewMO;
@@ -986,6 +1005,222 @@ void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old,
}
}
+auto MachineFunction::salvageCopySSA(MachineInstr &MI)
+ -> DebugInstrOperandPair {
+ MachineRegisterInfo &MRI = getRegInfo();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ const TargetInstrInfo &TII = *getSubtarget().getInstrInfo();
+
+ // Chase the value read by a copy-like instruction back to the instruction
+ // that ultimately _defines_ that value. This may pass:
+ // * Through multiple intermediate copies, including subregister moves /
+ // copies,
+ // * Copies from physical registers that must then be traced back to the
+ // defining instruction,
+ // * Or, physical registers may be live-in to (only) the entry block, which
+ // requires a DBG_PHI to be created.
+ // We can pursue this problem in that order: trace back through copies,
+ // optionally through a physical register, to a defining instruction. We
+ // should never move from physreg to vreg. As we're still in SSA form, no need
+ // to worry about partial definitions of registers.
+
+ // Helper lambda to interpret a copy-like instruction. Takes instruction,
+ // returns the register read and any subregister identifying which part is
+ // read.
+ auto GetRegAndSubreg =
+ [&](const MachineInstr &Cpy) -> std::pair<Register, unsigned> {
+ Register NewReg, OldReg;
+ unsigned SubReg;
+ if (Cpy.isCopy()) {
+ OldReg = Cpy.getOperand(0).getReg();
+ NewReg = Cpy.getOperand(1).getReg();
+ SubReg = Cpy.getOperand(1).getSubReg();
+ } else if (Cpy.isSubregToReg()) {
+ OldReg = Cpy.getOperand(0).getReg();
+ NewReg = Cpy.getOperand(2).getReg();
+ SubReg = Cpy.getOperand(3).getImm();
+ } else {
+ auto CopyDetails = *TII.isCopyInstr(Cpy);
+ const MachineOperand &Src = *CopyDetails.Source;
+ const MachineOperand &Dest = *CopyDetails.Destination;
+ OldReg = Dest.getReg();
+ NewReg = Src.getReg();
+ SubReg = Src.getSubReg();
+ }
+
+ return {NewReg, SubReg};
+ };
+
+ // First seek either the defining instruction, or a copy from a physreg.
+ // During search, the current state is the current copy instruction, and which
+ // register we've read. Accumulate qualifying subregisters into SubregsSeen;
+ // deal with those later.
+ auto State = GetRegAndSubreg(MI);
+ auto CurInst = MI.getIterator();
+ SmallVector<unsigned, 4> SubregsSeen;
+ while (true) {
+ // If we've found a copy from a physreg, first portion of search is over.
+ if (!State.first.isVirtual())
+ break;
+
+ // Record any subregister qualifier.
+ if (State.second)
+ SubregsSeen.push_back(State.second);
+
+ assert(MRI.hasOneDef(State.first));
+ MachineInstr &Inst = *MRI.def_begin(State.first)->getParent();
+ CurInst = Inst.getIterator();
+
+ // Any non-copy instruction is the defining instruction we're seeking.
+ if (!Inst.isCopyLike() && !TII.isCopyInstr(Inst))
+ break;
+ State = GetRegAndSubreg(Inst);
+ };
+
+ // Helper lambda to apply additional subregister substitutions to a known
+ // instruction/operand pair. Adds new (fake) substitutions so that we can
+ // record the subregister. FIXME: this isn't very space efficient if multiple
+ // values are tracked back through the same copies; cache something later.
+ auto ApplySubregisters =
+ [&](DebugInstrOperandPair P) -> DebugInstrOperandPair {
+ for (unsigned Subreg : reverse(SubregsSeen)) {
+ // Fetch a new instruction number, not attached to an actual instruction.
+ unsigned NewInstrNumber = getNewDebugInstrNum();
+ // Add a substitution from the "new" number to the known one, with a
+ // qualifying subreg.
+ makeDebugValueSubstitution({NewInstrNumber, 0}, P, Subreg);
+ // Return the new number; to find the underlying value, consumers need to
+ // deal with the qualifying subreg.
+ P = {NewInstrNumber, 0};
+ }
+ return P;
+ };
+
+ // If we managed to find the defining instruction after COPYs, return an
+ // instruction / operand pair after adding subregister qualifiers.
+ if (State.first.isVirtual()) {
+ // Virtual register def -- we can just look up where this happens.
+ MachineInstr *Inst = MRI.def_begin(State.first)->getParent();
+ for (auto &MO : Inst->operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() != State.first)
+ continue;
+ return ApplySubregisters(
+ {Inst->getDebugInstrNum(), Inst->getOperandNo(&MO)});
+ }
+
+ llvm_unreachable("Vreg def with no corresponding operand?");
+ }
+
+ // Our search ended in a copy from a physreg: walk back up the function
+ // looking for whatever defines the physreg.
+ assert(CurInst->isCopyLike() || TII.isCopyInstr(*CurInst));
+ State = GetRegAndSubreg(*CurInst);
+ Register RegToSeek = State.first;
+
+ auto RMII = CurInst->getReverseIterator();
+ auto PrevInstrs = make_range(RMII, CurInst->getParent()->instr_rend());
+ for (auto &ToExamine : PrevInstrs) {
+ for (auto &MO : ToExamine.operands()) {
+ // Test for operand that defines something aliasing RegToSeek.
+ if (!MO.isReg() || !MO.isDef() ||
+ !TRI.regsOverlap(RegToSeek, MO.getReg()))
+ continue;
+
+ return ApplySubregisters(
+ {ToExamine.getDebugInstrNum(), ToExamine.getOperandNo(&MO)});
+ }
+ }
+
+ MachineBasicBlock &InsertBB = *CurInst->getParent();
+
+ // We reached the start of the block before finding a defining instruction.
+ // It could be from a constant register, otherwise it must be an argument.
+ if (TRI.isConstantPhysReg(State.first)) {
+ // We can produce a DBG_PHI that identifies the constant physreg. Doesn't
+ // matter where we put it, as it's constant valued.
+ assert(CurInst->isCopy());
+ } else if (State.first == TRI.getFrameRegister(*this)) {
+ // LLVM IR is allowed to read the framepointer by calling a
+ // llvm.frameaddress.* intrinsic. We can support this by emitting a
+ // DBG_PHI $fp. This isn't ideal, because it extends the behaviours /
+ // position that DBG_PHIs appear at, limiting what can be done later.
+ // TODO: see if there's a better way of expressing these variable
+ // locations.
+ ;
+ } else {
+ // Assert that this is the entry block. If it isn't, then there is some
+ // code construct we don't recognise that deals with physregs across
+ // blocks.
+ assert(!State.first.isVirtual());
+ assert(&*InsertBB.getParent()->begin() == &InsertBB);
+ }
+
+ // Create DBG_PHI for specified physreg.
+ auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(),
+ TII.get(TargetOpcode::DBG_PHI));
+ Builder.addReg(State.first, RegState::Debug);
+ unsigned NewNum = getNewDebugInstrNum();
+ Builder.addImm(NewNum);
+ return ApplySubregisters({NewNum, 0u});
+}
+
+void MachineFunction::finalizeDebugInstrRefs() {
+ auto *TII = getSubtarget().getInstrInfo();
+
+ auto MakeDbgValue = [&](MachineInstr &MI) {
+ const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE);
+ MI.setDesc(RefII);
+ MI.getOperand(1).ChangeToRegister(0, false);
+ MI.getOperand(0).setIsDebug();
+ };
+
+ if (!getTarget().Options.ValueTrackingVariableLocations)
+ return;
+
+ for (auto &MBB : *this) {
+ for (auto &MI : MBB) {
+ if (!MI.isDebugRef() || !MI.getOperand(0).isReg())
+ continue;
+
+ Register Reg = MI.getOperand(0).getReg();
+
+ // Some vregs can be deleted as redundant in the meantime. Mark those
+ // as DBG_VALUE $noreg.
+ if (Reg == 0) {
+ MakeDbgValue(MI);
+ continue;
+ }
+
+ assert(Reg.isVirtual());
+ MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg);
+ assert(RegInfo->hasOneDef(Reg));
+
+ // If we've found a copy-like instruction, follow it back to the
+ // instruction that defines the source value, see salvageCopySSA docs
+ // for why this is important.
+ if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) {
+ auto Result = salvageCopySSA(DefMI);
+ MI.getOperand(0).ChangeToImmediate(Result.first);
+ MI.getOperand(1).setImm(Result.second);
+ } else {
+ // Otherwise, identify the operand number that the VReg refers to.
+ unsigned OperandIdx = 0;
+ for (const auto &MO : DefMI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
+ break;
+ ++OperandIdx;
+ }
+ assert(OperandIdx < DefMI.getNumOperands());
+
+ // Morph this instr ref to point at the given instruction and operand.
+ unsigned ID = DefMI.getDebugInstrNum();
+ MI.getOperand(0).ChangeToImmediate(ID);
+ MI.getOperand(1).setImm(OperandIdx);
+ }
+ }
+ }
+}
+
/// \}
//===----------------------------------------------------------------------===//
@@ -1120,7 +1355,7 @@ unsigned MachineConstantPoolEntry::getSizeInBytes(const DataLayout &DL) const {
bool MachineConstantPoolEntry::needsRelocation() const {
if (isMachineConstantPoolEntry())
return true;
- return Val.ConstVal->needsRelocation();
+ return Val.ConstVal->needsDynamicRelocation();
}
SectionKind
@@ -1150,11 +1385,9 @@ MachineConstantPool::~MachineConstantPool() {
Deleted.insert(Constants[i].Val.MachineCPVal);
delete Constants[i].Val.MachineCPVal;
}
- for (DenseSet<MachineConstantPoolValue*>::iterator I =
- MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
- I != E; ++I) {
- if (Deleted.count(*I) == 0)
- delete *I;
+ for (MachineConstantPoolValue *CPV : MachineCPVsSharingEntries) {
+ if (Deleted.count(CPV) == 0)
+ delete CPV;
}
}
diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 483809a8ed96..0e0eb8b8e00f 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -23,6 +23,7 @@
// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
@@ -77,7 +78,7 @@ public:
};
} // end anonymous namespace
-static bool isColdBlock(MachineBasicBlock &MBB,
+static bool isColdBlock(const MachineBasicBlock &MBB,
const MachineBlockFrequencyInfo *MBFI,
ProfileSummaryInfo *PSI) {
Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
@@ -100,7 +101,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// since the split part may not be placed in a contiguous region. It may also
// be more beneficial to augment the linker to ensure contiguous layout of
// split functions within the same section as specified by the attribute.
- if (!MF.getFunction().getSection().empty())
+ if (MF.getFunction().hasSection() ||
+ MF.getFunction().hasFnAttribute("implicit-section-name"))
return false;
// We don't want to proceed further for cold functions
@@ -121,16 +123,28 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ SmallVector<MachineBasicBlock *, 2> LandingPads;
for (auto &MBB : MF) {
- // FIXME: We retain the entry block and conservatively keep all landing pad
- // blocks as part of the original function. Once D73739 is submitted, we can
- // improve the handling of ehpads.
- if ((MBB.pred_empty() || MBB.isEHPad()))
+ if (MBB.isEntryBlock())
continue;
- if (isColdBlock(MBB, MBFI, PSI))
+
+ if (MBB.isEHPad())
+ LandingPads.push_back(&MBB);
+ else if (isColdBlock(MBB, MBFI, PSI))
MBB.setSectionID(MBBSectionID::ColdSectionID);
}
+ // We only split out eh pads if all of them are cold.
+ bool HasHotLandingPads = false;
+ for (const MachineBasicBlock *LP : LandingPads) {
+ if (!isColdBlock(*LP, MBFI, PSI))
+ HasHotLandingPads = true;
+ }
+ if (!HasHotLandingPads) {
+ for (MachineBasicBlock *LP : LandingPads)
+ LP->setSectionID(MBBSectionID::ColdSectionID);
+ }
+
auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
return X.getSectionID().Type < Y.getSectionID().Type;
};
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 59d98054e3a2..0707945e7fb7 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -841,28 +841,35 @@ const DILabel *MachineInstr::getDebugLabel() const {
}
const MachineOperand &MachineInstr::getDebugVariableOp() const {
- assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
- return getOperand(2);
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*");
+ unsigned VariableOp = isDebugValueList() ? 0 : 2;
+ return getOperand(VariableOp);
}
MachineOperand &MachineInstr::getDebugVariableOp() {
- assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
- return getOperand(2);
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*");
+ unsigned VariableOp = isDebugValueList() ? 0 : 2;
+ return getOperand(VariableOp);
}
const DILocalVariable *MachineInstr::getDebugVariable() const {
- assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
- return cast<DILocalVariable>(getOperand(2).getMetadata());
+ return cast<DILocalVariable>(getDebugVariableOp().getMetadata());
+}
+
+const MachineOperand &MachineInstr::getDebugExpressionOp() const {
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*");
+ unsigned ExpressionOp = isDebugValueList() ? 1 : 3;
+ return getOperand(ExpressionOp);
}
MachineOperand &MachineInstr::getDebugExpressionOp() {
- assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
- return getOperand(3);
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*");
+ unsigned ExpressionOp = isDebugValueList() ? 1 : 3;
+ return getOperand(ExpressionOp);
}
const DIExpression *MachineInstr::getDebugExpression() const {
- assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
- return cast<DIExpression>(getOperand(3).getMetadata());
+ return cast<DIExpression>(getDebugExpressionOp().getMetadata());
}
bool MachineInstr::isDebugEntryValue() const {
@@ -1312,12 +1319,10 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
int64_t OverlapB =
KnownWidthB ? WidthB + OffsetB - MinOffset : MemoryLocation::UnknownSize;
- AliasResult AAResult = AA->alias(
+ return !AA->isNoAlias(
MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
MemoryLocation(ValB, OverlapB,
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
-
- return (AAResult != NoAlias);
}
bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
@@ -1462,7 +1467,8 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
}
bool MachineInstr::isLoadFoldBarrier() const {
- return mayStore() || isCall() || hasUnmodeledSideEffects();
+ return mayStore() || isCall() ||
+ (hasUnmodeledSideEffects() && !isPseudoProbe());
}
/// allDefsAreDead - Return true if all the defs of this instruction are dead.
@@ -1711,7 +1717,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << " ";
if (isDebugValue() && MO.isMetadata()) {
- // Pretty print DBG_VALUE instructions.
+ // Pretty print DBG_VALUE* instructions.
auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata());
if (DIV && !DIV->getName().empty())
OS << "!\"" << DIV->getName() << '\"';
@@ -2056,9 +2062,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs,
// This is a call with a register mask operand.
// Mask clobbers are always dead, so add defs for the non-dead defines.
if (HasRegMask)
- for (ArrayRef<Register>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
- I != E; ++I)
- addRegisterDefined(*I, &TRI);
+ for (const Register &UsedReg : UsedRegs)
+ addRegisterDefined(UsedReg, &TRI);
}
unsigned
@@ -2078,7 +2083,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
void MachineInstr::emitError(StringRef Msg) const {
// Find the source location cookie.
- unsigned LocCookie = 0;
+ uint64_t LocCookie = 0;
const MDNode *LocMD = nullptr;
for (unsigned i = getNumOperands(); i != 0; --i) {
if (getOperand(i-1).isMetadata() &&
@@ -2116,8 +2121,8 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
const MCInstrDesc &MCID, bool IsIndirect,
- MachineOperand &MO, const MDNode *Variable,
- const MDNode *Expr) {
+ const MachineOperand &MO,
+ const MDNode *Variable, const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
@@ -2131,7 +2136,28 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
else
MIB.addReg(0U, RegState::Debug);
return MIB.addMetadata(Variable).addMetadata(Expr);
- }
+}
+
+MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
+ const MCInstrDesc &MCID, bool IsIndirect,
+ ArrayRef<MachineOperand> MOs,
+ const MDNode *Variable, const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ if (MCID.Opcode == TargetOpcode::DBG_VALUE)
+ return BuildMI(MF, DL, MCID, IsIndirect, MOs[0], Variable, Expr);
+
+ auto MIB = BuildMI(MF, DL, MCID);
+ MIB.addMetadata(Variable).addMetadata(Expr);
+ for (const MachineOperand &MO : MOs)
+ if (MO.isReg())
+ MIB.addReg(MO.getReg(), RegState::Debug);
+ else
+ MIB.add(MO);
+ return MIB;
+}
MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
@@ -2155,10 +2181,22 @@ MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
return MachineInstrBuilder(MF, *MI);
}
+MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, const MCInstrDesc &MCID,
+ bool IsIndirect, ArrayRef<MachineOperand> MOs,
+ const MDNode *Variable, const MDNode *Expr) {
+ MachineFunction &MF = *BB.getParent();
+ MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, MOs, Variable, Expr);
+ BB.insert(I, MI);
+ return MachineInstrBuilder(MF, *MI);
+}
+
/// Compute the new DIExpression to use with a DBG_VALUE for a spill slot.
/// This prepends DW_OP_deref when spilling an indirect DBG_VALUE.
-static const DIExpression *computeExprForSpill(const MachineInstr &MI) {
- assert(MI.getOperand(0).isReg() && "can't spill non-register");
+static const DIExpression *
+computeExprForSpill(const MachineInstr &MI,
+ SmallVectorImpl<const MachineOperand *> &SpilledOperands) {
assert(MI.getDebugVariable()->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
"Expected inlined-at fields to agree");
@@ -2167,26 +2205,76 @@ static const DIExpression *computeExprForSpill(const MachineInstr &MI) {
assert(MI.getDebugOffset().getImm() == 0 &&
"DBG_VALUE with nonzero offset");
Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+ } else if (MI.isDebugValueList()) {
+ // We will replace the spilled register with a frame index, so
+ // immediately deref all references to the spilled register.
+ std::array<uint64_t, 1> Ops{{dwarf::DW_OP_deref}};
+ for (const MachineOperand *Op : SpilledOperands) {
+ unsigned OpIdx = MI.getDebugOperandIndex(Op);
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, OpIdx);
+ }
}
return Expr;
}
+static const DIExpression *computeExprForSpill(const MachineInstr &MI,
+ Register SpillReg) {
+ assert(MI.hasDebugOperandForReg(SpillReg) && "Spill Reg is not used in MI.");
+ SmallVector<const MachineOperand *> SpillOperands;
+ for (const MachineOperand &Op : MI.getDebugOperandsForReg(SpillReg))
+ SpillOperands.push_back(&Op);
+ return computeExprForSpill(MI, SpillOperands);
+}
MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
const MachineInstr &Orig,
- int FrameIndex) {
- const DIExpression *Expr = computeExprForSpill(Orig);
- return BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc())
- .addFrameIndex(FrameIndex)
- .addImm(0U)
- .addMetadata(Orig.getDebugVariable())
- .addMetadata(Expr);
-}
-
-void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) {
- const DIExpression *Expr = computeExprForSpill(Orig);
- Orig.getDebugOperand(0).ChangeToFrameIndex(FrameIndex);
- Orig.getDebugOffset().ChangeToImmediate(0U);
+ int FrameIndex, Register SpillReg) {
+ const DIExpression *Expr = computeExprForSpill(Orig, SpillReg);
+ MachineInstrBuilder NewMI =
+ BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc());
+ // Non-Variadic Operands: Location, Offset, Variable, Expression
+ // Variadic Operands: Variable, Expression, Locations...
+ if (Orig.isNonListDebugValue())
+ NewMI.addFrameIndex(FrameIndex).addImm(0U);
+ NewMI.addMetadata(Orig.getDebugVariable()).addMetadata(Expr);
+ if (Orig.isDebugValueList()) {
+ for (const MachineOperand &Op : Orig.debug_operands())
+ if (Op.isReg() && Op.getReg() == SpillReg)
+ NewMI.addFrameIndex(FrameIndex);
+ else
+ NewMI.add(MachineOperand(Op));
+ }
+ return NewMI;
+}
+MachineInstr *llvm::buildDbgValueForSpill(
+ MachineBasicBlock &BB, MachineBasicBlock::iterator I,
+ const MachineInstr &Orig, int FrameIndex,
+ SmallVectorImpl<const MachineOperand *> &SpilledOperands) {
+ const DIExpression *Expr = computeExprForSpill(Orig, SpilledOperands);
+ MachineInstrBuilder NewMI =
+ BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc());
+ // Non-Variadic Operands: Location, Offset, Variable, Expression
+ // Variadic Operands: Variable, Expression, Locations...
+ if (Orig.isNonListDebugValue())
+ NewMI.addFrameIndex(FrameIndex).addImm(0U);
+ NewMI.addMetadata(Orig.getDebugVariable()).addMetadata(Expr);
+ if (Orig.isDebugValueList()) {
+ for (const MachineOperand &Op : Orig.debug_operands())
+ if (is_contained(SpilledOperands, &Op))
+ NewMI.addFrameIndex(FrameIndex);
+ else
+ NewMI.add(MachineOperand(Op));
+ }
+ return NewMI;
+}
+
+void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex,
+ Register Reg) {
+ const DIExpression *Expr = computeExprForSpill(Orig, Reg);
+ if (Orig.isNonListDebugValue())
+ Orig.getDebugOffset().ChangeToImmediate(0U);
+ for (MachineOperand &Op : Orig.getDebugOperandsForReg(Reg))
+ Op.ChangeToFrameIndex(FrameIndex);
Orig.getDebugExpressionOp().setMetadata(Expr);
}
@@ -2201,7 +2289,7 @@ void MachineInstr::collectDebugValues(
DI != DE; ++DI) {
if (!DI->isDebugValue())
return;
- if (DI->getDebugOperandForReg(MI.getOperand(0).getReg()))
+ if (DI->hasDebugOperandForReg(MI.getOperand(0).getReg()))
DbgValues.push_back(&*DI);
}
}
@@ -2219,14 +2307,15 @@ void MachineInstr::changeDebugValuesDefReg(Register Reg) {
auto *DI = MO.getParent();
if (!DI->isDebugValue())
continue;
- if (DI->getDebugOperandForReg(DefReg)) {
+ if (DI->hasDebugOperandForReg(DefReg)) {
DbgValues.push_back(DI);
}
}
// Propagate Reg to debug value instructions.
for (auto *DBI : DbgValues)
- DBI->getDebugOperandForReg(DefReg)->setReg(Reg);
+ for (MachineOperand &Op : DBI->getDebugOperandsForReg(DefReg))
+ Op.setReg(Reg);
}
using MMOList = SmallVector<const MachineMemOperand *, 2>;
@@ -2285,3 +2374,9 @@ unsigned MachineInstr::getDebugInstrNum() {
DebugInstrNum = getParent()->getParent()->getNewDebugInstrNum();
return DebugInstrNum;
}
+
+unsigned MachineInstr::getDebugInstrNum(MachineFunction &MF) {
+ if (DebugInstrNum == 0)
+ DebugInstrNum = MF.getNewDebugInstrNum();
+ return DebugInstrNum;
+}
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 50456e489ea1..6ca97031b92a 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -47,11 +47,9 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
return false;
bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
-
- for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
- MIE = MBB->instr_end(); MII != MIE; ) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
+ MIE = MBB.instr_end(); MII != MIE; ) {
MachineInstr *MI = &*MII;
// Remove BUNDLE instruction and the InsideBundle flags from bundled
@@ -256,8 +254,7 @@ llvm::finalizeBundle(MachineBasicBlock &MBB,
/// MachineFunction. Return true if any bundles are finalized.
bool llvm::finalizeBundles(MachineFunction &MF) {
bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock &MBB = *I;
+ for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
if (MII == MIE)
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index c06bc39b4940..883299c452b7 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -69,11 +69,6 @@ HoistCheapInsts("hoist-cheap-insts",
cl::init(false), cl::Hidden);
static cl::opt<bool>
-SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
- cl::desc("MachineLICM should sink instructions into "
- "loops to avoid register spills"),
- cl::init(false), cl::Hidden);
-static cl::opt<bool>
HoistConstStores("hoist-const-stores",
cl::desc("Hoist invariant stores"),
cl::init(true), cl::Hidden);
@@ -246,8 +241,6 @@ namespace {
void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
- void SinkIntoLoop();
-
void InitRegPressure(MachineBasicBlock *BB);
DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
@@ -395,9 +388,6 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
FirstInLoop = true;
HoistOutOfLoop(N);
CSEMap.clear();
-
- if (SinkInstsToAvoidSpills)
- SinkIntoLoop();
}
}
@@ -787,88 +777,6 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
}
}
-/// Sink instructions into loops if profitable. This especially tries to prevent
-/// register spills caused by register pressure if there is little to no
-/// overhead moving instructions into loops.
-void MachineLICMBase::SinkIntoLoop() {
- MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader)
- return;
-
- SmallVector<MachineInstr *, 8> Candidates;
- for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
- I != Preheader->instr_end(); ++I) {
- // We need to ensure that we can safely move this instruction into the loop.
- // As such, it must not have side-effects, e.g. such as a call has.
- LLVM_DEBUG(dbgs() << "LICM: Analysing sink candidate: " << *I);
- if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) {
- LLVM_DEBUG(dbgs() << "LICM: Added as sink candidate.\n");
- Candidates.push_back(&*I);
- continue;
- }
- LLVM_DEBUG(dbgs() << "LICM: Not added as sink candidate.\n");
- }
-
- for (MachineInstr *I : Candidates) {
- const MachineOperand &MO = I->getOperand(0);
- if (!MO.isDef() || !MO.isReg() || !MO.getReg())
- continue;
- if (!MRI->hasOneDef(MO.getReg()))
- continue;
- bool CanSink = true;
- MachineBasicBlock *SinkBlock = nullptr;
- LLVM_DEBUG(dbgs() << "LICM: Try sinking: " << *I);
-
- for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
- LLVM_DEBUG(dbgs() << "LICM: Analysing use: "; MI.dump());
- // FIXME: Come up with a proper cost model that estimates whether sinking
- // the instruction (and thus possibly executing it on every loop
- // iteration) is more expensive than a register.
- // For now assumes that copies are cheap and thus almost always worth it.
- if (!MI.isCopy()) {
- CanSink = false;
- break;
- }
- if (!SinkBlock) {
- SinkBlock = MI.getParent();
- LLVM_DEBUG(dbgs() << "LICM: Setting sink block to: "
- << printMBBReference(*SinkBlock) << "\n");
- continue;
- }
- SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
- if (!SinkBlock) {
- LLVM_DEBUG(dbgs() << "LICM: Can't find nearest dominator\n");
- CanSink = false;
- break;
- }
- LLVM_DEBUG(dbgs() << "LICM: Setting nearest common dom block: " <<
- printMBBReference(*SinkBlock) << "\n");
- }
- if (!CanSink) {
- LLVM_DEBUG(dbgs() << "LICM: Can't sink instruction.\n");
- continue;
- }
- if (!SinkBlock) {
- LLVM_DEBUG(dbgs() << "LICM: Not sinking, can't find sink block.\n");
- continue;
- }
- if (SinkBlock == Preheader) {
- LLVM_DEBUG(dbgs() << "LICM: Not sinking, sink block is the preheader\n");
- continue;
- }
-
- LLVM_DEBUG(dbgs() << "LICM: Sinking to " << printMBBReference(*SinkBlock)
- << " from " << printMBBReference(*I->getParent())
- << ": " << *I);
- SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
-
- // The instruction is moved from its basic block, so do not retain the
- // debug information.
- assert(!I->isDebugInstr() && "Should not sink debug inst");
- I->setDebugLoc(DebugLoc());
- }
-}
-
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
}
@@ -1056,11 +964,11 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
return false;
}
- // If it is load then check if it is guaranteed to execute by making sure that
- // it dominates all exiting blocks. If it doesn't, then there is a path out of
- // the loop which does not execute this load, so we can't hoist it. Loads
- // from constant memory are not safe to speculate all the time, for example
- // indexed load from a jump table.
+ // If it is a load then check if it is guaranteed to execute by making sure
+ // that it dominates all exiting blocks. If it doesn't, then there is a path
+ // out of the loop which does not execute this load, so we can't hoist it.
+ // Loads from constant memory are safe to speculate, for example indexed load
+ // from a jump table.
// Stores and side effects are already checked by isSafeToMove.
if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
!IsGuaranteedToExecute(I.getParent())) {
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 78480d0e1488..8f91a5b698d0 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -115,8 +115,8 @@ DebugLoc MachineLoop::getStartLoc() const {
}
MachineBasicBlock *
-MachineLoopInfo::findLoopPreheader(MachineLoop *L,
- bool SpeculativePreheader) const {
+MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,
+ bool FindMultiLoopPreheader) const {
if (MachineBasicBlock *PB = L->getLoopPreheader())
return PB;
@@ -139,12 +139,14 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L,
// Check if the preheader candidate is a successor of any other loop
// headers. We want to avoid having two loop setups in the same block.
- for (MachineBasicBlock *S : Preheader->successors()) {
- if (S == HB)
- continue;
- MachineLoop *T = getLoopFor(S);
- if (T && T->getHeader() == S)
- return nullptr;
+ if (!FindMultiLoopPreheader) {
+ for (MachineBasicBlock *S : Preheader->successors()) {
+ if (S == HB)
+ continue;
+ MachineLoop *T = getLoopFor(S);
+ if (T && T->getHeader() == S)
+ return nullptr;
+ }
}
return Preheader;
}
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 5565b9cededa..50cbb14e926e 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -16,7 +16,9 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
@@ -76,11 +78,25 @@ class MMIAddrLabelMap {
/// we get notified if a block is deleted or RAUWd.
std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
+ /// This is a per-function list of symbols whose corresponding BasicBlock got
+ /// deleted. These symbols need to be emitted at some point in the file, so
+ /// AsmPrinter emits them after the function body.
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>
+ DeletedAddrLabelsNeedingEmission;
+
public:
MMIAddrLabelMap(MCContext &context) : Context(context) {}
+ ~MMIAddrLabelMap() {
+ assert(DeletedAddrLabelsNeedingEmission.empty() &&
+ "Some labels for deleted blocks never got emitted");
+ }
+
ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB);
+ void takeDeletedSymbolsForFunction(Function *F,
+ std::vector<MCSymbol*> &Result);
+
void UpdateForDeletedBlock(BasicBlock *BB);
void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
};
@@ -110,6 +126,20 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
return Entry.Symbols;
}
+/// If we have any deleted symbols for F, return them.
+void MMIAddrLabelMap::
+takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>::iterator I =
+ DeletedAddrLabelsNeedingEmission.find(F);
+
+ // If there are no entries for the function, just return.
+ if (I == DeletedAddrLabelsNeedingEmission.end()) return;
+
+ // Otherwise, take the list.
+ std::swap(Result, I->second);
+ DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
// If the block got deleted, there is no need for the symbol. If the symbol
// was already emitted, we can just forget about it, otherwise we need to
@@ -122,8 +152,16 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) &&
"Block/parent mismatch");
- assert(llvm::all_of(Entry.Symbols, [](MCSymbol *Sym) {
- return Sym->isDefined(); }));
+ for (MCSymbol *Sym : Entry.Symbols) {
+ if (Sym->isDefined())
+ return;
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ }
}
void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
@@ -158,6 +196,7 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
void MachineModuleInfo::initialize() {
ObjFileMMI = nullptr;
CurCallSite = 0;
+ NextFnNum = 0;
UsesMSVCFloatingPoint = UsesMorestackAddr = false;
HasSplitStack = HasNosplitStack = false;
AddrLabelSymbols = nullptr;
@@ -178,9 +217,11 @@ void MachineModuleInfo::finalize() {
MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
: TM(std::move(MMI.TM)),
- Context(MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(),
- MMI.TM.getObjFileLowering(), nullptr, nullptr, false),
+ Context(MMI.TM.getTargetTriple(), MMI.TM.getMCAsmInfo(),
+ MMI.TM.getMCRegisterInfo(), MMI.TM.getMCSubtargetInfo(), nullptr,
+ nullptr, false),
MachineFunctions(std::move(MMI.MachineFunctions)) {
+ Context.setObjectFileInfo(MMI.TM.getObjFileLowering());
ObjFileMMI = MMI.ObjFileMMI;
CurCallSite = MMI.CurCallSite;
UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint;
@@ -193,16 +234,20 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
}
MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
- : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
- TM->getObjFileLowering(), nullptr, nullptr, false) {
+ : TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(),
+ TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(),
+ nullptr, nullptr, false) {
+ Context.setObjectFileInfo(TM->getObjFileLowering());
initialize();
}
MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM,
MCContext *ExtContext)
- : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
- TM->getObjFileLowering(), nullptr, nullptr, false),
+ : TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(),
+ TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(),
+ nullptr, nullptr, false),
ExternalContext(ExtContext) {
+ Context.setObjectFileInfo(TM->getObjFileLowering());
initialize();
}
@@ -218,14 +263,21 @@ MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
}
+void MachineModuleInfo::
+takeDeletedSymbolsForFunction(const Function *F,
+ std::vector<MCSymbol*> &Result) {
+ // If no blocks have had their addresses taken, we're done.
+ if (!AddrLabelSymbols) return;
+ return AddrLabelSymbols->
+ takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
+}
+
/// \name Exception Handling
/// \{
void MachineModuleInfo::addPersonality(const Function *Personality) {
- for (unsigned i = 0; i < Personalities.size(); ++i)
- if (Personalities[i] == Personality)
- return;
- Personalities.push_back(Personality);
+ if (!llvm::is_contained(Personalities, Personality))
+ Personalities.push_back(Personality);
}
/// \}
@@ -317,9 +369,44 @@ INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo",
"Machine Module Information", false, false)
char MachineModuleInfoWrapperPass::ID = 0;
+static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr,
+ std::vector<const MDNode *> &LocInfos) {
+ // Look up a LocInfo for the buffer this diagnostic is coming from.
+ unsigned BufNum = SrcMgr.FindBufferContainingLoc(SMD.getLoc());
+ const MDNode *LocInfo = nullptr;
+ if (BufNum > 0 && BufNum <= LocInfos.size())
+ LocInfo = LocInfos[BufNum - 1];
+
+ // If the inline asm had metadata associated with it, pull out a location
+ // cookie corresponding to which line the error occurred on.
+ unsigned LocCookie = 0;
+ if (LocInfo) {
+ unsigned ErrorLine = SMD.getLineNo() - 1;
+ if (ErrorLine >= LocInfo->getNumOperands())
+ ErrorLine = 0;
+
+ if (LocInfo->getNumOperands() != 0)
+ if (const ConstantInt *CI =
+ mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+ LocCookie = CI->getZExtValue();
+ }
+
+ return LocCookie;
+}
+
bool MachineModuleInfoWrapperPass::doInitialization(Module &M) {
MMI.initialize();
MMI.TheModule = &M;
+ // FIXME: Do this for new pass manager.
+ LLVMContext &Ctx = M.getContext();
+ MMI.getContext().setDiagnosticHandler(
+ [&Ctx](const SMDiagnostic &SMD, bool IsInlineAsm, const SourceMgr &SrcMgr,
+ std::vector<const MDNode *> &LocInfos) {
+ unsigned LocCookie = 0;
+ if (IsInlineAsm)
+ LocCookie = getLocCookie(SMD, SrcMgr, LocInfos);
+ Ctx.diagnose(DiagnosticInfoSrcMgr(SMD, IsInlineAsm, LocCookie));
+ });
MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
return false;
}
diff --git a/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
index 16d24880ebe4..9c3b31935f6d 100644
--- a/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
void MachineModuleInfoMachO::anchor() {}
void MachineModuleInfoELF::anchor() {}
void MachineModuleInfoCOFF::anchor() {}
+void MachineModuleInfoWasm::anchor() {}
using PairTy = std::pair<MCSymbol *, MachineModuleInfoImpl::StubValueTy>;
static int SortSymbolPair(const PairTy *LHS, const PairTy *RHS) {
diff --git a/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp b/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
new file mode 100644
index 000000000000..e4da179efcc4
--- /dev/null
+++ b/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
@@ -0,0 +1,81 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleSlotTracker.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+using namespace llvm;
+
+void MachineModuleSlotTracker::processMachineFunctionMetadata(
+ AbstractSlotTrackerStorage *AST, const MachineFunction &MF) {
+ // Create metadata created within the backend.
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB.instrs())
+ for (const MachineMemOperand *MMO : MI.memoperands()) {
+ AAMDNodes AAInfo = MMO->getAAInfo();
+ if (AAInfo.TBAA)
+ AST->createMetadataSlot(AAInfo.TBAA);
+ if (AAInfo.TBAAStruct)
+ AST->createMetadataSlot(AAInfo.TBAAStruct);
+ if (AAInfo.Scope)
+ AST->createMetadataSlot(AAInfo.Scope);
+ if (AAInfo.NoAlias)
+ AST->createMetadataSlot(AAInfo.NoAlias);
+ }
+}
+
+void MachineModuleSlotTracker::processMachineModule(
+ AbstractSlotTrackerStorage *AST, const Module *M,
+ bool ShouldInitializeAllMetadata) {
+ if (ShouldInitializeAllMetadata) {
+ for (const Function &F : *M) {
+ if (&F != &TheFunction)
+ continue;
+ MDNStartSlot = AST->getNextMetadataSlot();
+ if (auto *MF = TheMMI.getMachineFunction(F))
+ processMachineFunctionMetadata(AST, *MF);
+ MDNEndSlot = AST->getNextMetadataSlot();
+ break;
+ }
+ }
+}
+
+void MachineModuleSlotTracker::processMachineFunction(
+ AbstractSlotTrackerStorage *AST, const Function *F,
+ bool ShouldInitializeAllMetadata) {
+ if (!ShouldInitializeAllMetadata && F == &TheFunction) {
+ MDNStartSlot = AST->getNextMetadataSlot();
+ if (auto *MF = TheMMI.getMachineFunction(*F))
+ processMachineFunctionMetadata(AST, *MF);
+ MDNEndSlot = AST->getNextMetadataSlot();
+ }
+}
+
+void MachineModuleSlotTracker::collectMachineMDNodes(
+ MachineMDNodeListType &L) const {
+ collectMDNodes(L, MDNStartSlot, MDNEndSlot);
+}
+
+MachineModuleSlotTracker::MachineModuleSlotTracker(
+ const MachineFunction *MF, bool ShouldInitializeAllMetadata)
+ : ModuleSlotTracker(MF->getFunction().getParent(),
+ ShouldInitializeAllMetadata),
+ TheFunction(MF->getFunction()), TheMMI(MF->getMMI()), MDNStartSlot(0),
+ MDNEndSlot(0) {
+ setProcessHook([this](AbstractSlotTrackerStorage *AST, const Module *M,
+ bool ShouldInitializeAllMetadata) {
+ this->processMachineModule(AST, M, ShouldInitializeAllMetadata);
+ });
+ setProcessHook([this](AbstractSlotTrackerStorage *AST, const Function *F,
+ bool ShouldInitializeAllMetadata) {
+ this->processMachineFunction(AST, F, ShouldInitializeAllMetadata);
+ });
+}
+
+MachineModuleSlotTracker::~MachineModuleSlotTracker() = default;
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 9b09f5273298..b8ba0453d24c 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -653,6 +653,14 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,
printCFIRegister(CFI.getRegister(), OS, TRI);
OS << ", " << CFI.getOffset();
break;
+ case MCCFIInstruction::OpLLVMDefAspaceCfa:
+ OS << "llvm_def_aspace_cfa ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ OS << ", " << CFI.getAddressSpace();
+ break;
case MCCFIInstruction::OpRelOffset:
OS << "rel_offset ";
if (MCSymbol *Label = CFI.getLabel())
@@ -927,7 +935,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
case MachineOperand::MO_IntrinsicID: {
Intrinsic::ID ID = getIntrinsicID();
if (ID < Intrinsic::num_intrinsics)
- OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')';
+ OS << "intrinsic(@" << Intrinsic::getBaseName(ID) << ')';
else if (IntrinsicInfo)
OS << "intrinsic(@" << IntrinsicInfo->getName(ID) << ')';
else
@@ -1015,13 +1023,12 @@ MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) {
}
MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
- uint64_t s, Align a,
- const AAMDNodes &AAInfo,
+ LLT type, Align a, const AAMDNodes &AAInfo,
const MDNode *Ranges, SyncScope::ID SSID,
AtomicOrdering Ordering,
AtomicOrdering FailureOrdering)
- : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlign(a), AAInfo(AAInfo),
- Ranges(Ranges) {
+ : PtrInfo(ptrinfo), MemoryType(type), FlagVals(f), BaseAlign(a),
+ AAInfo(AAInfo), Ranges(Ranges) {
assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() ||
isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) &&
"invalid pointer value");
@@ -1030,16 +1037,26 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
AtomicInfo.SSID = static_cast<unsigned>(SSID);
assert(getSyncScopeID() == SSID && "Value truncated");
AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
- assert(getOrdering() == Ordering && "Value truncated");
+ assert(getSuccessOrdering() == Ordering && "Value truncated");
AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
assert(getFailureOrdering() == FailureOrdering && "Value truncated");
}
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
+ uint64_t s, Align a,
+ const AAMDNodes &AAInfo,
+ const MDNode *Ranges, SyncScope::ID SSID,
+ AtomicOrdering Ordering,
+ AtomicOrdering FailureOrdering)
+ : MachineMemOperand(ptrinfo, f,
+ s == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * s), a,
+ AAInfo, Ranges, SSID, Ordering, FailureOrdering) {}
+
/// Profile - Gather unique data for the object.
///
void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
ID.AddInteger(getOffset());
- ID.AddInteger(Size);
+ ID.AddInteger(getMemoryType().getUniqueRAWLLTData());
ID.AddPointer(getOpaqueValue());
ID.AddInteger(getFlags());
ID.AddInteger(getBaseAlign().value());
@@ -1060,10 +1077,6 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
}
}
-/// getAlignment - Return the minimum known alignment in bytes of the
-/// actual memory reference.
-uint64_t MachineMemOperand::getAlignment() const { return getAlign().value(); }
-
/// getAlign - Return the minimum known alignment in bytes of the
/// actual memory reference.
Align MachineMemOperand::getAlign() const {
@@ -1103,15 +1116,15 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
printSyncScope(OS, Context, getSyncScopeID(), SSNs);
- if (getOrdering() != AtomicOrdering::NotAtomic)
- OS << toIRString(getOrdering()) << ' ';
+ if (getSuccessOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(getSuccessOrdering()) << ' ';
if (getFailureOrdering() != AtomicOrdering::NotAtomic)
OS << toIRString(getFailureOrdering()) << ' ';
- if (getSize() == MemoryLocation::UnknownSize)
- OS << "unknown-size";
+ if (getMemoryType().isValid())
+ OS << '(' << getMemoryType() << ')';
else
- OS << getSize();
+ OS << "unknown-size";
if (const Value *Val = getValue()) {
OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
@@ -1160,9 +1173,14 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
break;
}
}
+ } else if (getOpaqueValue() == nullptr && getOffset() != 0) {
+ OS << ((isLoad() && isStore()) ? " on "
+ : isLoad() ? " from "
+ : " into ")
+ << "unknown-address";
}
MachineOperand::printOperandOffset(OS, getOffset());
- if (getAlign() != getSize())
+ if (getSize() > 0 && getAlign() != getSize())
OS << ", align " << getAlign().value();
if (getAlign() != getBaseAlign())
OS << ", basealign " << getBaseAlign().value();
diff --git a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index dcb8e4073ea3..59fc23983d3d 100644
--- a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -93,7 +93,7 @@ static const char ore_name[] = "Machine Optimization Remark Emitter";
#define ORE_NAME "machine-opt-remark-emitter"
INITIALIZE_PASS_BEGIN(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
- false, true)
+ true, true)
INITIALIZE_PASS_DEPENDENCY(LazyMachineBlockFrequencyInfoPass)
INITIALIZE_PASS_END(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
- false, true)
+ true, true)
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 02998d41d831..1d55bd00e033 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -518,9 +518,8 @@ void MachineOutliner::findCandidates(
// First, find all of the repeated substrings in the tree of minimum length
// 2.
std::vector<Candidate> CandidatesForRepeatedSeq;
- for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
+ for (const SuffixTree::RepeatedSubstring &RS : ST) {
CandidatesForRepeatedSeq.clear();
- SuffixTree::RepeatedSubstring RS = *It;
unsigned StringLen = RS.Length;
for (const unsigned &StartIdx : RS.StartIndices) {
unsigned EndIdx = StartIdx + StringLen - 1;
@@ -807,7 +806,7 @@ bool MachineOutliner::outline(Module &M,
if (MOP.isDef()) {
// Introduce DefRegs set to skip the redundant register.
DefRegs.insert(MOP.getReg());
- if (UseRegs.count(MOP.getReg()))
+ if (!MOP.isDead() && UseRegs.count(MOP.getReg()))
// Since the regiester is modeled as defined,
// it is not necessary to be put in use register set.
UseRegs.erase(MOP.getReg());
diff --git a/llvm/lib/CodeGen/MachinePassManager.cpp b/llvm/lib/CodeGen/MachinePassManager.cpp
index e81575c88935..476dc059d2b5 100644
--- a/llvm/lib/CodeGen/MachinePassManager.cpp
+++ b/llvm/lib/CodeGen/MachinePassManager.cpp
@@ -49,11 +49,6 @@ Error MachineFunctionPassManager::run(Module &M,
});
}
- if (DebugLogging) {
- dbgs() << "Starting " << getTypeName<MachineFunction>()
- << " pass manager run.\n";
- }
-
for (auto &F : InitializationFuncs) {
if (auto Err = F(M, MFAM))
return Err;
@@ -64,9 +59,6 @@ Error MachineFunctionPassManager::run(Module &M,
do {
// Run machine module passes
for (; MachineModulePasses.count(Idx) && Idx != Size; ++Idx) {
- if (DebugLogging)
- dbgs() << "Running pass: " << Passes[Idx]->name() << " on "
- << M.getName() << '\n';
if (auto Err = MachineModulePasses.at(Idx)(M, MFAM))
return Err;
}
@@ -110,11 +102,6 @@ Error MachineFunctionPassManager::run(Module &M,
return Err;
}
- if (DebugLogging) {
- dbgs() << "Finished " << getTypeName<MachineFunction>()
- << " pass manager run.\n";
- }
-
return Error::success();
}
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index d0fe29f65ede..caa3f8049aeb 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -34,6 +34,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -812,11 +813,10 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
SU.addPred(Dep);
continue;
}
- AliasResult AAResult = AA->alias(
- MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
- MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo()));
-
- if (AAResult != NoAlias) {
+ if (!AA->isNoAlias(
+ MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
+ MemoryLocation::getAfter(MMO2->getValue(),
+ MMO2->getAAInfo()))) {
SDep Dep(Load, SDep::Barrier);
Dep.setLatency(1);
SU.addPred(Dep);
@@ -949,10 +949,9 @@ void SwingSchedulerDAG::changeDependences() {
// Remove the dependence. The value now depends on a prior iteration.
SmallVector<SDep, 4> Deps;
- for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
- ++P)
- if (P->getSUnit() == DefSU)
- Deps.push_back(*P);
+ for (const SDep &P : I.Preds)
+ if (P.getSUnit() == DefSU)
+ Deps.push_back(P);
for (int i = 0, e = Deps.size(); i != e; i++) {
Topo.RemovePred(&I, Deps[i].getSUnit());
I.removePred(Deps[i]);
@@ -1203,12 +1202,10 @@ static void swapAntiDependences(std::vector<SUnit> &SUnits) {
DepsAdded.push_back(std::make_pair(SU, *IP));
}
}
- for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
- E = DepsAdded.end();
- I != E; ++I) {
+ for (std::pair<SUnit *, SDep> &P : DepsAdded) {
// Remove this anti dependency and add one in the reverse direction.
- SUnit *SU = I->first;
- SDep &D = I->second;
+ SUnit *SU = P.first;
+ SDep &D = P.second;
SUnit *TargetSU = D.getSUnit();
unsigned Reg = D.getReg();
unsigned Lat = D.getLatency();
@@ -1447,22 +1444,18 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
ScheduleInfo.resize(SUnits.size());
LLVM_DEBUG({
- for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
- E = Topo.end();
- I != E; ++I) {
- const SUnit &SU = SUnits[*I];
+ for (int I : Topo) {
+ const SUnit &SU = SUnits[I];
dumpNode(SU);
}
});
int maxASAP = 0;
// Compute ASAP and ZeroLatencyDepth.
- for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
- E = Topo.end();
- I != E; ++I) {
+ for (int I : Topo) {
int asap = 0;
int zeroLatencyDepth = 0;
- SUnit *SU = &SUnits[*I];
+ SUnit *SU = &SUnits[I];
for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
EP = SU->Preds.end();
IP != EP; ++IP) {
@@ -1476,8 +1469,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
getDistance(pred, SU, *IP) * MII));
}
maxASAP = std::max(maxASAP, asap);
- ScheduleInfo[*I].ASAP = asap;
- ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
+ ScheduleInfo[I].ASAP = asap;
+ ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;
}
// Compute ALAP, ZeroLatencyHeight, and MOV.
@@ -1531,25 +1524,22 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
Preds.clear();
for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
I != E; ++I) {
- for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
- PI != PE; ++PI) {
- if (S && S->count(PI->getSUnit()) == 0)
+ for (const SDep &Pred : (*I)->Preds) {
+ if (S && S->count(Pred.getSUnit()) == 0)
continue;
- if (ignoreDependence(*PI, true))
+ if (ignoreDependence(Pred, true))
continue;
- if (NodeOrder.count(PI->getSUnit()) == 0)
- Preds.insert(PI->getSUnit());
+ if (NodeOrder.count(Pred.getSUnit()) == 0)
+ Preds.insert(Pred.getSUnit());
}
// Back-edges are predecessors with an anti-dependence.
- for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
- ES = (*I)->Succs.end();
- IS != ES; ++IS) {
- if (IS->getKind() != SDep::Anti)
+ for (const SDep &Succ : (*I)->Succs) {
+ if (Succ.getKind() != SDep::Anti)
continue;
- if (S && S->count(IS->getSUnit()) == 0)
+ if (S && S->count(Succ.getSUnit()) == 0)
continue;
- if (NodeOrder.count(IS->getSUnit()) == 0)
- Preds.insert(IS->getSUnit());
+ if (NodeOrder.count(Succ.getSUnit()) == 0)
+ Preds.insert(Succ.getSUnit());
}
}
return !Preds.empty();
@@ -1564,24 +1554,21 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder,
Succs.clear();
for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
I != E; ++I) {
- for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
- SI != SE; ++SI) {
- if (S && S->count(SI->getSUnit()) == 0)
+ for (SDep &Succ : (*I)->Succs) {
+ if (S && S->count(Succ.getSUnit()) == 0)
continue;
- if (ignoreDependence(*SI, false))
+ if (ignoreDependence(Succ, false))
continue;
- if (NodeOrder.count(SI->getSUnit()) == 0)
- Succs.insert(SI->getSUnit());
+ if (NodeOrder.count(Succ.getSUnit()) == 0)
+ Succs.insert(Succ.getSUnit());
}
- for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
- PE = (*I)->Preds.end();
- PI != PE; ++PI) {
- if (PI->getKind() != SDep::Anti)
+ for (SDep &Pred : (*I)->Preds) {
+ if (Pred.getKind() != SDep::Anti)
continue;
- if (S && S->count(PI->getSUnit()) == 0)
+ if (S && S->count(Pred.getSUnit()) == 0)
continue;
- if (NodeOrder.count(PI->getSUnit()) == 0)
- Succs.insert(PI->getSUnit());
+ if (NodeOrder.count(Pred.getSUnit()) == 0)
+ Succs.insert(Pred.getSUnit());
}
}
return !Succs.empty();
@@ -1613,14 +1600,6 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
return FoundPath;
}
-/// Return true if Set1 is a subset of Set2.
-template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
- for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
- if (Set2.count(*I) == 0)
- return false;
- return true;
-}
-
/// Compute the live-out registers for the instructions in a node-set.
/// The live-out registers are those that are defined in the node-set,
/// but not used. Except for use operands of Phis.
@@ -1724,7 +1703,7 @@ void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
SmallSetVector<SUnit *, 8> S2;
if (N2.empty() || !succ_L(N2, S2))
continue;
- if (isSubset(S1, S2) && S1.size() == S2.size()) {
+ if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) {
N1.setColocate(++Colocate);
N2.setColocate(Colocate);
break;
@@ -1807,11 +1786,10 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
// Create new nodes sets with the connected nodes any remaining node that
// has no predecessor.
- for (unsigned i = 0; i < SUnits.size(); ++i) {
- SUnit *SU = &SUnits[i];
- if (NodesAdded.count(SU) == 0) {
+ for (SUnit &SU : SUnits) {
+ if (NodesAdded.count(&SU) == 0) {
NewSet.clear();
- addConnectedNodes(SU, NewSet, NodesAdded);
+ addConnectedNodes(&SU, NewSet, NodesAdded);
if (!NewSet.empty())
NodeSets.push_back(NewSet);
}
@@ -1858,9 +1836,8 @@ void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
if (NJ.compareRecMII(NI) > 0)
NI.setRecMII(NJ.getRecMII());
- for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
- ++NII)
- I->insert(*NII);
+ for (SUnit *SU : *J)
+ I->insert(SU);
NodeSets.erase(J);
E = NodeSets.end();
} else {
@@ -1898,11 +1875,11 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
OrderKind Order;
SmallSetVector<SUnit *, 8> N;
- if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
+ if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
R.insert(N.begin(), N.end());
Order = BottomUp;
LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
- } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
+ } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
R.insert(N.begin(), N.end());
Order = TopDown;
LLVM_DEBUG(dbgs() << " Top down (succs) ");
@@ -2049,9 +2026,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
}
bool scheduleFound = false;
- unsigned II = 0;
// Keep increasing II until a valid schedule is found.
- for (II = MII; II <= MAX_II && !scheduleFound; ++II) {
+ for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
Schedule.reset();
Schedule.setInitiationInterval(II);
LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
@@ -2124,7 +2100,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
scheduleFound = Schedule.isValidSchedule(this);
}
- LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
+ LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
+ << " (II=" << Schedule.getInitiationInterval()
<< ")\n");
if (scheduleFound) {
@@ -2132,7 +2109,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
Pass.ORE->emit([&]() {
return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
- << "Schedule found with Initiation Interval: " << ore::NV("II", II)
+ << "Schedule found with Initiation Interval: "
+ << ore::NV("II", Schedule.getInitiationInterval())
<< ", MaxStageCount: "
<< ore::NV("MaxStageCount", Schedule.getMaxStageCount());
});
@@ -2404,14 +2382,12 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
checkCycle <= LastCycle; checkCycle += II) {
std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
- for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
- E = cycleInstrs.end();
- I != E; ++I) {
- if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
+ for (SUnit *CI : cycleInstrs) {
+ if (ST.getInstrInfo()->isZeroCost(CI->getInstr()->getOpcode()))
continue;
- assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) &&
+ assert(ProcItinResources.canReserveResources(*CI->getInstr()) &&
"These instructions have already been scheduled.");
- ProcItinResources.reserveResources(*(*I)->getInstr());
+ ProcItinResources.reserveResources(*CI->getInstr());
}
}
if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
@@ -2742,8 +2718,7 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
// different stage than the definition. The pipeliner does not handle
// physical register values that may cross a basic block boundary.
bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
- for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
- SUnit &SU = SSD->SUnits[i];
+ for (SUnit &SU : SSD->SUnits) {
if (!SU.hasPhysRegDefs)
continue;
int StageDef = stageScheduled(&SU);
@@ -2939,14 +2914,12 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
std::deque<SUnit *> newOrderPhi;
- for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
- SUnit *SU = cycleInstrs[i];
+ for (SUnit *SU : cycleInstrs) {
if (SU->getInstr()->isPHI())
newOrderPhi.push_back(SU);
}
std::deque<SUnit *> newOrderI;
- for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
- SUnit *SU = cycleInstrs[i];
+ for (SUnit *SU : cycleInstrs) {
if (!SU->getInstr()->isPHI())
orderDependence(SSD, SU, newOrderI);
}
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 5325eda9d478..3f6b11e072b4 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -434,8 +434,8 @@ void MachineRegisterInfo::clearKillFlags(Register Reg) const {
}
bool MachineRegisterInfo::isLiveIn(Register Reg) const {
- for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
- if ((Register)I->first == Reg || I->second == Reg)
+ for (const std::pair<MCRegister, Register> &LI : liveins())
+ if ((Register)LI.first == Reg || LI.second == Reg)
return true;
return false;
}
@@ -443,18 +443,18 @@ bool MachineRegisterInfo::isLiveIn(Register Reg) const {
/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
/// corresponding live-in physical register.
MCRegister MachineRegisterInfo::getLiveInPhysReg(Register VReg) const {
- for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
- if (I->second == VReg)
- return I->first;
+ for (const std::pair<MCRegister, Register> &LI : liveins())
+ if (LI.second == VReg)
+ return LI.first;
return MCRegister();
}
/// getLiveInVirtReg - If PReg is a live-in physical register, return the
/// corresponding live-in physical register.
Register MachineRegisterInfo::getLiveInVirtReg(MCRegister PReg) const {
- for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
- if (I->first == PReg)
- return I->second;
+ for (const std::pair<MCRegister, Register> &LI : liveins())
+ if (LI.first == PReg)
+ return LI.second;
return Register();
}
@@ -530,14 +530,11 @@ bool MachineRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
/// specified register as undefined which causes the DBG_VALUE to be
/// deleted during LiveDebugVariables analysis.
void MachineRegisterInfo::markUsesInDebugValueAsUndef(Register Reg) const {
- // Mark any DBG_VALUE that uses Reg as undef (but don't delete it.)
- MachineRegisterInfo::use_instr_iterator nextI;
- for (use_instr_iterator I = use_instr_begin(Reg), E = use_instr_end();
- I != E; I = nextI) {
- nextI = std::next(I); // I is invalidated by the setReg
- MachineInstr *UseMI = &*I;
- if (UseMI->isDebugValue())
- UseMI->getDebugOperandForReg(Reg)->setReg(0U);
+ // Mark any DBG_VALUE* that uses Reg as undef (but don't delete it.)
+ // We use make_early_inc_range because setReg invalidates the iterator.
+ for (MachineInstr &UseMI : llvm::make_early_inc_range(use_instructions(Reg))) {
+ if (UseMI.isDebugValue() && UseMI.hasDebugOperandForReg(Reg))
+ UseMI.setDebugValueUndef();
}
}
@@ -585,8 +582,9 @@ bool MachineRegisterInfo::isPhysRegModified(MCRegister PhysReg,
return false;
}
-bool MachineRegisterInfo::isPhysRegUsed(MCRegister PhysReg) const {
- if (UsedPhysRegMask.test(PhysReg))
+bool MachineRegisterInfo::isPhysRegUsed(MCRegister PhysReg,
+ bool SkipRegMaskTest) const {
+ if (!SkipRegMaskTest && UsedPhysRegMask.test(PhysReg))
return true;
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
for (MCRegAliasIterator AliasReg(PhysReg, TRI, true); AliasReg.isValid();
diff --git a/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 462082df5d05..930677e4fd7d 100644
--- a/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -164,9 +164,7 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
Register SingularValue;
bool isFirstPred = true;
- for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
- E = BB->pred_end(); PI != E; ++PI) {
- MachineBasicBlock *PredBB = *PI;
+ for (MachineBasicBlock *PredBB : BB->predecessors()) {
Register PredVal = GetValueAtEndOfBlockInternal(PredBB);
PredValues.push_back(std::make_pair(PredBB, PredVal));
@@ -236,10 +234,10 @@ void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
U.setReg(NewVR);
}
-/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
-/// template, specialized for MachineSSAUpdater.
namespace llvm {
+/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
+/// template, specialized for MachineSSAUpdater.
template<>
class SSAUpdaterTraits<MachineSSAUpdater> {
public:
@@ -284,9 +282,7 @@ public:
/// vector.
static void FindPredecessorBlocks(MachineBasicBlock *BB,
SmallVectorImpl<MachineBasicBlock*> *Preds){
- for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
- E = BB->pred_end(); PI != E; ++PI)
- Preds->push_back(*PI);
+ append_range(*Preds, BB->predecessors());
}
/// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 8d51bb26103a..4f42a2c8aeff 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -297,7 +297,7 @@ priorNonDebug(MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator Beg) {
assert(I != Beg && "reached the top of the region, cannot decrement");
while (--I != Beg) {
- if (!I->isDebugInstr())
+ if (!I->isDebugOrPseudoInstr())
break;
}
return I;
@@ -317,7 +317,7 @@ static MachineBasicBlock::const_iterator
nextIfDebug(MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator End) {
for(; I != End; ++I) {
- if (!I->isDebugInstr())
+ if (!I->isDebugOrPseudoInstr())
break;
}
return I;
@@ -508,7 +508,7 @@ getSchedRegions(MachineBasicBlock *MBB,
MachineInstr &MI = *std::prev(I);
if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
- if (!MI.isDebugInstr()) {
+ if (!MI.isDebugOrPseudoInstr()) {
// MBB::size() uses instr_iterator to count. Here we need a bundle to
// count as a single instruction.
++NumRegionInstrs;
@@ -927,8 +927,8 @@ void ScheduleDAGMI::placeDebugValues() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
- for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
- if (SUnit *SU = getSUnit(&(*MI)))
+ for (MachineInstr &MI : *this) {
+ if (SUnit *SU = getSUnit(&MI))
dumpNode(*SU);
else
dbgs() << "Missing SUnit\n";
@@ -1927,17 +1927,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
}
LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
// Add the weak edges.
- for (SmallVectorImpl<SUnit*>::const_iterator
- I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
- LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
+ for (SUnit *LU : LocalUses) {
+ LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
<< GlobalSU->NodeNum << ")\n");
- DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
+ DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
}
- for (SmallVectorImpl<SUnit*>::const_iterator
- I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
- LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
+ for (SUnit *GU : GlobalUses) {
+ LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
<< FirstLocalSU->NodeNum << ")\n");
- DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
+ DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
}
}
@@ -2006,6 +2004,7 @@ void SchedBoundary::reset() {
IsResourceLimited = false;
ReservedCycles.clear();
ReservedCyclesIndex.clear();
+ ResourceGroupSubUnitMasks.clear();
#ifndef NDEBUG
// Track the maximum number of stall cycles that could arise either from the
// latency of a DAG edge or the number of cycles that a processor resource is
@@ -2047,11 +2046,18 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
ReservedCyclesIndex.resize(ResourceCount);
ExecutedResCounts.resize(ResourceCount);
+ ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
unsigned NumUnits = 0;
for (unsigned i = 0; i < ResourceCount; ++i) {
ReservedCyclesIndex[i] = NumUnits;
NumUnits += SchedModel->getProcResource(i)->NumUnits;
+ if (isUnbufferedGroup(i)) {
+ auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
+ for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
+ U != UE; ++U)
+ ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
+ }
}
ReservedCycles.resize(NumUnits, InvalidCycle);
@@ -2093,7 +2099,9 @@ unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
/// scheduled. Returns the next cycle and the index of the processor resource
/// instance in the reserved cycles vector.
std::pair<unsigned, unsigned>
-SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
+ unsigned Cycles) {
+
unsigned MinNextUnreserved = InvalidCycle;
unsigned InstanceIdx = 0;
unsigned StartIndex = ReservedCyclesIndex[PIdx];
@@ -2101,6 +2109,35 @@ SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
assert(NumberOfInstances > 0 &&
"Cannot have zero instances of a ProcResource");
+ if (isUnbufferedGroup(PIdx)) {
+ // If any subunits are used by the instruction, report that the resource
+ // group is available at 0, effectively removing the group record from
+ // hazarding and basing the hazarding decisions on the subunit records.
+ // Otherwise, choose the first available instance from among the subunits.
+ // Specifications which assign cycles to both the subunits and the group or
+ // which use an unbuffered group with buffered subunits will appear to
+ // schedule strangely. In the first case, the additional cycles for the
+ // group will be ignored. In the second, the group will be ignored
+ // entirely.
+ for (const MCWriteProcResEntry &PE :
+ make_range(SchedModel->getWriteProcResBegin(SC),
+ SchedModel->getWriteProcResEnd(SC)))
+ if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
+ return std::make_pair(0u, StartIndex);
+
+ auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
+ for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
+ unsigned NextUnreserved, NextInstanceIdx;
+ std::tie(NextUnreserved, NextInstanceIdx) =
+ getNextResourceCycle(SC, SubUnits[I], Cycles);
+ if (MinNextUnreserved > NextUnreserved) {
+ InstanceIdx = NextInstanceIdx;
+ MinNextUnreserved = NextUnreserved;
+ }
+ }
+ return std::make_pair(MinNextUnreserved, InstanceIdx);
+ }
+
for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
++I) {
unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
@@ -2154,7 +2191,7 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
unsigned ResIdx = PE.ProcResourceIdx;
unsigned Cycles = PE.Cycles;
unsigned NRCycle, InstanceIdx;
- std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
+ std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
if (NRCycle > CurrCycle) {
#ifndef NDEBUG
MaxObservedStall = std::max(Cycles, MaxObservedStall);
@@ -2304,8 +2341,8 @@ void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
///
/// \return the next cycle at which the instruction may execute without
/// oversubscribing resources.
-unsigned SchedBoundary::
-countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
+unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
+ unsigned Cycles, unsigned NextCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx);
unsigned Count = Factor * Cycles;
LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
@@ -2327,7 +2364,7 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
}
// For reserved resources, record the highest cycle using the resource.
unsigned NextAvailable, InstanceIdx;
- std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
+ std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
if (NextAvailable > CurrCycle) {
LLVM_DEBUG(dbgs() << " Resource conflict: "
<< SchedModel->getResourceName(PIdx)
@@ -2407,7 +2444,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned RCycle =
- countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
+ countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
if (RCycle > NextCycle)
NextCycle = RCycle;
}
@@ -2422,7 +2459,8 @@ void SchedBoundary::bumpNode(SUnit *SU) {
unsigned PIdx = PI->ProcResourceIdx;
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
unsigned ReservedUntil, InstanceIdx;
- std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
+ std::tie(ReservedUntil, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, 0);
if (isTop()) {
ReservedCycles[InstanceIdx] =
std::max(ReservedUntil, NextCycle + PI->Cycles);
@@ -2780,6 +2818,8 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
namespace llvm {
/// Return true if this heuristic determines order.
+/// TODO: Consider refactor return type of these functions as integer or enum,
+/// as we may need to differentiate whether TryCand is better than Cand.
bool tryLess(int TryVal, int CandVal,
GenericSchedulerBase::SchedCandidate &TryCand,
GenericSchedulerBase::SchedCandidate &Cand,
@@ -3138,34 +3178,35 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
/// \param Cand provides the policy and current best candidate.
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
/// \param Zone describes the scheduled zone that we are extending, or nullptr
-// if Cand is from a different zone than TryCand.
-void GenericScheduler::tryCandidate(SchedCandidate &Cand,
+/// if Cand is from a different zone than TryCand.
+/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
+bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary *Zone) const {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
- return;
+ return true;
}
// Bias PhysReg Defs and copies to their uses and defined respectively.
if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
- return;
+ return TryCand.Reason != NoCand;
// Avoid exceeding the target's limit.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
Cand.RPDelta.Excess,
TryCand, Cand, RegExcess, TRI,
DAG->MF))
- return;
+ return TryCand.Reason != NoCand;
// Avoid increasing the max critical pressure in the scheduled region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
Cand.RPDelta.CriticalMax,
TryCand, Cand, RegCritical, TRI,
DAG->MF))
- return;
+ return TryCand.Reason != NoCand;
// We only compare a subset of features when comparing nodes between
// Top and Bottom boundary. Some properties are simply incomparable, in many
@@ -3179,12 +3220,12 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// heuristics to take precedence.
if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
tryLatency(TryCand, Cand, *Zone))
- return;
+ return TryCand.Reason != NoCand;
// Prioritize instructions that read unbuffered resources by stall cycles.
if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
- return;
+ return TryCand.Reason != NoCand;
}
// Keep clustered nodes together to encourage downstream peephole
@@ -3200,14 +3241,14 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
if (tryGreater(TryCand.SU == TryCandNextClusterSU,
Cand.SU == CandNextClusterSU,
TryCand, Cand, Cluster))
- return;
+ return TryCand.Reason != NoCand;
if (SameBoundary) {
// Weak edges are for clustering and other constraints.
if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
getWeakLeft(Cand.SU, Cand.AtTop),
TryCand, Cand, Weak))
- return;
+ return TryCand.Reason != NoCand;
}
// Avoid increasing the max pressure of the entire region.
@@ -3215,31 +3256,34 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
Cand.RPDelta.CurrentMax,
TryCand, Cand, RegMax, TRI,
DAG->MF))
- return;
+ return TryCand.Reason != NoCand;
if (SameBoundary) {
// Avoid critical resource consumption and balance the schedule.
TryCand.initResourceDelta(DAG, SchedModel);
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
- return;
+ return TryCand.Reason != NoCand;
if (tryGreater(TryCand.ResDelta.DemandedResources,
Cand.ResDelta.DemandedResources,
TryCand, Cand, ResourceDemand))
- return;
+ return TryCand.Reason != NoCand;
// Avoid serializing long latency dependence chains.
// For acyclic path limited loops, latency was already checked above.
if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
!Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
- return;
+ return TryCand.Reason != NoCand;
// Fall through to original instruction order.
if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
|| (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
TryCand.Reason = NodeOrder;
+ return true;
}
}
+
+ return false;
}
/// Pick the best candidate from the queue.
@@ -3261,8 +3305,7 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
- tryCandidate(Cand, TryCand, ZoneArg);
- if (TryCand.Reason != NoCand) {
+ if (tryCandidate(Cand, TryCand, ZoneArg)) {
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(DAG, SchedModel);
@@ -3340,8 +3383,7 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
assert(TopCand.isValid());
SchedCandidate Cand = BotCand;
TopCand.Reason = NoCand;
- tryCandidate(Cand, TopCand, nullptr);
- if (TopCand.Reason != NoCand) {
+ if (tryCandidate(Cand, TopCand, nullptr)) {
Cand.setBest(TopCand);
LLVM_DEBUG(traceCandidate(Cand));
}
@@ -3505,42 +3547,47 @@ void PostGenericScheduler::registerRoots() {
///
/// \param Cand provides the policy and current best candidate.
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
-void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
+/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
+bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand) {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
- return;
+ return true;
}
// Prioritize instructions that read unbuffered resources by stall cycles.
if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
- return;
+ return TryCand.Reason != NoCand;
// Keep clustered nodes together.
if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
Cand.SU == DAG->getNextClusterSucc(),
TryCand, Cand, Cluster))
- return;
+ return TryCand.Reason != NoCand;
// Avoid critical resource consumption and balance the schedule.
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
- return;
+ return TryCand.Reason != NoCand;
if (tryGreater(TryCand.ResDelta.DemandedResources,
Cand.ResDelta.DemandedResources,
TryCand, Cand, ResourceDemand))
- return;
+ return TryCand.Reason != NoCand;
// Avoid serializing long latency dependence chains.
if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
- return;
+ return TryCand.Reason != NoCand;
}
// Fall through to original instruction order.
- if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ return false;
}
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
@@ -3550,8 +3597,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
TryCand.SU = SU;
TryCand.AtTop = true;
TryCand.initResourceDelta(DAG, SchedModel);
- tryCandidate(Cand, TryCand);
- if (TryCand.Reason != NoCand) {
+ if (tryCandidate(Cand, TryCand)) {
Cand.setBest(TryCand);
LLVM_DEBUG(traceCandidate(Cand));
}
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 378df1b75e25..ec98394dca79 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -16,6 +16,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
@@ -91,7 +92,19 @@ static cl::opt<unsigned> SinkLoadBlocksThreshold(
"the straight line is higher than this threshold."),
cl::init(20), cl::Hidden);
+static cl::opt<bool>
+SinkInstsIntoLoop("sink-insts-to-avoid-spills",
+ cl::desc("Sink instructions into loops to avoid "
+ "register spills"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> SinkIntoLoopLimit(
+ "machine-sink-loop-limit",
+ cl::desc("The maximum number of instructions considered for loop sinking."),
+ cl::init(50), cl::Hidden);
+
STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
@@ -216,6 +229,11 @@ namespace {
bool &LocalUse) const;
MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
+
+ void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
+ SmallVectorImpl<MachineInstr *> &Candidates);
+ bool SinkIntoLoop(MachineLoop *L, MachineInstr &I);
+
bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *SuccToSinkTo,
@@ -340,6 +358,60 @@ bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
return true;
}
+/// Return true if this machine instruction loads from global offset table or
+/// constant pool.
+static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
+ assert(MI.mayLoad() && "Expected MI that loads!");
+
+ // If we lost memory operands, conservatively assume that the instruction
+ // reads from everything..
+ if (MI.memoperands_empty())
+ return true;
+
+ for (MachineMemOperand *MemOp : MI.memoperands())
+ if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
+ if (PSV->isGOT() || PSV->isConstantPool())
+ return true;
+
+ return false;
+}
+
+void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
+ SmallVectorImpl<MachineInstr *> &Candidates) {
+ for (auto &MI : *BB) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI);
+ if (!TII->shouldSink(MI)) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this "
+ "target\n");
+ continue;
+ }
+ if (!L->isLoopInvariant(MI)) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n");
+ continue;
+ }
+ bool DontMoveAcrossStore = true;
+ if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n");
+ continue;
+ }
+ if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n");
+ continue;
+ }
+ if (MI.isConvergent())
+ continue;
+
+ const MachineOperand &MO = MI.getOperand(0);
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+ continue;
+ if (!MRI->hasOneDef(MO.getReg()))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n");
+ Candidates.push_back(&MI);
+ }
+}
+
bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -389,6 +461,37 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
EverMadeChange = true;
}
+ if (SinkInstsIntoLoop) {
+ SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end());
+ for (auto *L : Loops) {
+ MachineBasicBlock *Preheader = LI->findLoopPreheader(L);
+ if (!Preheader) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n");
+ continue;
+ }
+ SmallVector<MachineInstr *, 8> Candidates;
+ FindLoopSinkCandidates(L, Preheader, Candidates);
+
+ // Walk the candidates in reverse order so that we start with the use
+ // of a def-use chain, if there is any.
+ // TODO: Sort the candidates using a cost-model.
+ unsigned i = 0;
+ for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) {
+ if (i++ == SinkIntoLoopLimit) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to "
+ "be analysed.");
+ break;
+ }
+
+ MachineInstr *I = *It;
+ if (!SinkIntoLoop(L, *I))
+ break;
+ EverMadeChange = true;
+ ++NumLoopSunk;
+ }
+ }
+ }
+
HasStoreCache.clear();
StoreInstrCache.clear();
@@ -427,7 +530,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (!ProcessedBegin)
--I;
- if (MI.isDebugInstr()) {
+ if (MI.isDebugOrPseudoInstr()) {
if (MI.isDebugValue())
ProcessDbgInst(MI);
continue;
@@ -464,9 +567,10 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
MI.getDebugLoc()->getInlinedAt());
bool SeenBefore = SeenDbgVars.contains(Var);
- MachineOperand &MO = MI.getDebugOperand(0);
- if (MO.isReg() && MO.getReg().isVirtual())
- SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
+ for (MachineOperand &MO : MI.debug_operands()) {
+ if (MO.isReg() && MO.getReg().isVirtual())
+ SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
+ }
// Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
SeenDbgVars.insert(Var);
@@ -614,7 +718,7 @@ MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
MIE = MBB.instr_begin();
MII != MIE; --MII) {
MachineInstr &MI = *std::prev(MII);
- if (MI.isDebugValue() || MI.isDebugLabel())
+ if (MI.isDebugInstr() || MI.isPseudoProbe())
continue;
RegisterOperands RegOpers;
RegOpers.collect(MI, *TRI, *MRI, false, false);
@@ -926,14 +1030,14 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
/// there's any subregister weirdness involved. Returns true if copy
/// propagation occurred.
-static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) {
+static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI,
+ Register Reg) {
const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
// Copy DBG_VALUE operand and set the original to undef. We then check to
// see whether this is something that can be copy-forwarded. If it isn't,
// continue around the loop.
- MachineOperand &DbgMO = DbgMI.getDebugOperand(0);
const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
auto CopyOperands = TII.isCopyInstr(SinkInst);
@@ -946,36 +1050,41 @@ static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) {
bool PostRA = MRI.getNumVirtRegs() == 0;
// Trying to forward between physical and virtual registers is too hard.
- if (DbgMO.getReg().isVirtual() != SrcMO->getReg().isVirtual())
+ if (Reg.isVirtual() != SrcMO->getReg().isVirtual())
return false;
// Only try virtual register copy-forwarding before regalloc, and physical
// register copy-forwarding after regalloc.
- bool arePhysRegs = !DbgMO.getReg().isVirtual();
+ bool arePhysRegs = !Reg.isVirtual();
if (arePhysRegs != PostRA)
return false;
// Pre-regalloc, only forward if all subregisters agree (or there are no
// subregs at all). More analysis might recover some forwardable copies.
- if (!PostRA && (DbgMO.getSubReg() != SrcMO->getSubReg() ||
- DbgMO.getSubReg() != DstMO->getSubReg()))
- return false;
+ if (!PostRA)
+ for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg))
+ if (DbgMO.getSubReg() != SrcMO->getSubReg() ||
+ DbgMO.getSubReg() != DstMO->getSubReg())
+ return false;
// Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
// of this copy. Only forward the copy if the DBG_VALUE operand exactly
// matches the copy destination.
- if (PostRA && DbgMO.getReg() != DstMO->getReg())
+ if (PostRA && Reg != DstMO->getReg())
return false;
- DbgMO.setReg(SrcMO->getReg());
- DbgMO.setSubReg(SrcMO->getSubReg());
+ for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) {
+ DbgMO.setReg(SrcMO->getReg());
+ DbgMO.setSubReg(SrcMO->getSubReg());
+ }
return true;
}
+using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>;
/// Sink an instruction and its associated debug instructions.
static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
MachineBasicBlock::iterator InsertPos,
- SmallVectorImpl<MachineInstr *> &DbgValuesToSink) {
+ SmallVectorImpl<MIRegs> &DbgValuesToSink) {
// If we cannot find a location to use (merge with), then we erase the debug
// location to prevent debug-info driven tools from potentially reporting
@@ -995,14 +1104,21 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
// DBG_VALUE location as 'undef', indicating that any earlier variable
// location should be terminated as we've optimised away the value at this
// point.
- for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
- DBE = DbgValuesToSink.end();
- DBI != DBE; ++DBI) {
- MachineInstr *DbgMI = *DBI;
- MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI);
+ for (auto DbgValueToSink : DbgValuesToSink) {
+ MachineInstr *DbgMI = DbgValueToSink.first;
+ MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI);
SuccToSinkTo.insert(InsertPos, NewDbgMI);
- if (!attemptDebugCopyProp(MI, *DbgMI))
+ bool PropagatedAllSunkOps = true;
+ for (unsigned Reg : DbgValueToSink.second) {
+ if (DbgMI->hasDebugOperandForReg(Reg)) {
+ if (!attemptDebugCopyProp(MI, *DbgMI, Reg)) {
+ PropagatedAllSunkOps = false;
+ break;
+ }
+ }
+ }
+ if (!PropagatedAllSunkOps)
DbgMI->setDebugValueUndef();
}
}
@@ -1098,6 +1214,77 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
return HasAliasedStore;
}
+/// Sink instructions into loops if profitable. This especially tries to prevent
+/// register spills caused by register pressure if there is little to no
+/// overhead moving instructions into loops.
+bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I);
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ assert(Preheader && "Loop sink needs a preheader block");
+ MachineBasicBlock *SinkBlock = nullptr;
+ bool CanSink = true;
+ const MachineOperand &MO = I.getOperand(0);
+
+ for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI);
+ if (!L->contains(&MI)) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n");
+ CanSink = false;
+ break;
+ }
+
+ // FIXME: Come up with a proper cost model that estimates whether sinking
+ // the instruction (and thus possibly executing it on every loop
+ // iteration) is more expensive than a register.
+ // For now assumes that copies are cheap and thus almost always worth it.
+ if (!MI.isCopy()) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n");
+ CanSink = false;
+ break;
+ }
+ if (!SinkBlock) {
+ SinkBlock = MI.getParent();
+ LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: "
+ << printMBBReference(*SinkBlock) << "\n");
+ continue;
+ }
+ SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
+ if (!SinkBlock) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n");
+ CanSink = false;
+ break;
+ }
+ LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " <<
+ printMBBReference(*SinkBlock) << "\n");
+ }
+
+ if (!CanSink) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n");
+ return false;
+ }
+ if (!SinkBlock) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n");
+ return false;
+ }
+ if (SinkBlock == Preheader) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
+ return false;
+ }
+ if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
+ SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
+
+ // The instruction is moved from its basic block, so do not retain the
+ // debug information.
+ assert(!I.isDebugInstr() && "Should not sink debug inst");
+ I.setDebugLoc(DebugLoc());
+ return true;
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@@ -1214,7 +1401,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
++InsertPos;
// Collect debug users of any vreg that this inst defines.
- SmallVector<MachineInstr *, 4> DbgUsersToSink;
+ SmallVector<MIRegs, 4> DbgUsersToSink;
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
continue;
@@ -1228,10 +1415,11 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
if (User.getInt()) {
// This DBG_VALUE would re-order assignments. If we can't copy-propagate
// it, it can't be recovered. Set it undef.
- if (!attemptDebugCopyProp(MI, *DbgMI))
+ if (!attemptDebugCopyProp(MI, *DbgMI, MO.getReg()))
DbgMI->setDebugValueUndef();
} else {
- DbgUsersToSink.push_back(DbgMI);
+ DbgUsersToSink.push_back(
+ {DbgMI, SmallVector<unsigned, 2>(1, MO.getReg())});
}
}
}
@@ -1266,10 +1454,12 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
// be sunk. For the rest, if they are not dominated by the block we will sink
// MI into, propagate the copy source to them.
SmallVector<MachineInstr *, 4> DbgDefUsers;
+ SmallVector<Register, 4> DbgUseRegs;
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
continue;
+ DbgUseRegs.push_back(MO.getReg());
for (auto &User : MRI.use_instructions(MO.getReg())) {
if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
continue;
@@ -1278,8 +1468,8 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
if (User.getParent() == MI.getParent())
continue;
- assert(User.getDebugOperand(0).isReg() &&
- "DBG_VALUE user of vreg, but non reg operand?");
+ assert(User.hasDebugOperandForReg(MO.getReg()) &&
+ "DBG_VALUE user of vreg, but has no operand for it?");
DbgDefUsers.push_back(&User);
}
}
@@ -1287,8 +1477,12 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
// Point the users of this copy that are no longer dominated, at the source
// of the copy.
for (auto *User : DbgDefUsers) {
- User->getDebugOperand(0).setReg(MI.getOperand(1).getReg());
- User->getDebugOperand(0).setSubReg(MI.getOperand(1).getSubReg());
+ for (auto &Reg : DbgUseRegs) {
+ for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) {
+ DbgOp.setReg(MI.getOperand(1).getReg());
+ DbgOp.setSubReg(MI.getOperand(1).getSubReg());
+ }
+ }
}
}
@@ -1351,8 +1545,10 @@ private:
LiveRegUnits ModifiedRegUnits, UsedRegUnits;
/// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
- /// entry in this map for each unit it touches.
- DenseMap<unsigned, TinyPtrVector<MachineInstr *>> SeenDbgInstrs;
+ /// entry in this map for each unit it touches. The DBG_VALUE's entry
+ /// consists of a pointer to the instruction itself, and a vector of registers
+ /// referred to by the instruction that overlap the key register unit.
+ DenseMap<unsigned, SmallVector<MIRegs, 2>> SeenDbgInstrs;
/// Sink Copy instructions unused in the same block close to their uses in
/// successors.
@@ -1534,23 +1730,32 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// We must sink this DBG_VALUE if its operand is sunk. To avoid searching
// for DBG_VALUEs later, record them when they're encountered.
if (MI->isDebugValue()) {
- auto &MO = MI->getDebugOperand(0);
- if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
- // Bail if we can already tell the sink would be rejected, rather
- // than needlessly accumulating lots of DBG_VALUEs.
- if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
- ModifiedRegUnits, UsedRegUnits))
- continue;
-
- // Record debug use of each reg unit.
- SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI);
- for (MCRegister Reg : Units)
- SeenDbgInstrs[Reg].push_back(MI);
+ SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits;
+ bool IsValid = true;
+ for (MachineOperand &MO : MI->debug_operands()) {
+ if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
+ // Bail if we can already tell the sink would be rejected, rather
+ // than needlessly accumulating lots of DBG_VALUEs.
+ if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+ ModifiedRegUnits, UsedRegUnits)) {
+ IsValid = false;
+ break;
+ }
+
+ // Record debug use of each reg unit.
+ SmallSet<MCRegister, 4> RegUnits = getRegUnits(MO.getReg(), TRI);
+ for (MCRegister Reg : RegUnits)
+ MIUnits[Reg].push_back(MO.getReg());
+ }
+ }
+ if (IsValid) {
+ for (auto RegOps : MIUnits)
+ SeenDbgInstrs[RegOps.first].push_back({MI, RegOps.second});
}
continue;
}
- if (MI->isDebugInstr())
+ if (MI->isDebugOrPseudoInstr())
continue;
// Do not move any instruction across function call.
@@ -1587,18 +1792,22 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// Collect DBG_VALUEs that must sink with this copy. We've previously
// recorded which reg units that DBG_VALUEs read, if this instruction
// writes any of those units then the corresponding DBG_VALUEs must sink.
- SetVector<MachineInstr *> DbgValsToSinkSet;
+ MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
for (auto &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI);
- for (MCRegister Reg : Units)
- for (auto *MI : SeenDbgInstrs.lookup(Reg))
- DbgValsToSinkSet.insert(MI);
+ for (MCRegister Reg : Units) {
+ for (auto MIRegs : SeenDbgInstrs.lookup(Reg)) {
+ auto &Regs = DbgValsToSinkMap[MIRegs.first];
+ for (unsigned Reg : MIRegs.second)
+ Regs.push_back(Reg);
+ }
+ }
}
- SmallVector<MachineInstr *, 4> DbgValsToSink(DbgValsToSinkSet.begin(),
- DbgValsToSinkSet.end());
+ SmallVector<MIRegs, 4> DbgValsToSink(DbgValsToSinkMap.begin(),
+ DbgValsToSinkMap.end());
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index fb14f0a33209..0803c2b8b85a 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -182,7 +182,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
HashComponents.push_back(static_cast<unsigned>(Op->getSize()));
HashComponents.push_back(static_cast<unsigned>(Op->getFlags()));
HashComponents.push_back(static_cast<unsigned>(Op->getOffset()));
- HashComponents.push_back(static_cast<unsigned>(Op->getOrdering()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getSuccessOrdering()));
HashComponents.push_back(static_cast<unsigned>(Op->getAddrSpace()));
HashComponents.push_back(static_cast<unsigned>(Op->getSyncScopeID()));
HashComponents.push_back(static_cast<unsigned>(Op->getBaseAlign().value()));
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 0f6d9b888f47..7e3198af02cd 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -223,6 +223,7 @@ namespace {
void report(const char *msg, const MachineInstr *MI);
void report(const char *msg, const MachineOperand *MO, unsigned MONum,
LLT MOVRegType = LLT{});
+ void report(const Twine &Msg, const MachineInstr *MI);
void report_context(const LiveInterval &LI) const;
void report_context(const LiveRange &LR, Register VRegUnit,
@@ -500,6 +501,10 @@ void MachineVerifier::report(const char *msg, const MachineOperand *MO,
errs() << "\n";
}
+void MachineVerifier::report(const Twine &Msg, const MachineInstr *MI) {
+ report(Msg.str().c_str(), MI);
+}
+
void MachineVerifier::report_context(SlotIndex Pos) const {
errs() << "- at: " << Pos << '\n';
}
@@ -940,7 +945,46 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
report(ErrorInfo.data(), MI);
// Verify properties of various specific instruction types
- switch (MI->getOpcode()) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::G_ASSERT_SEXT:
+ case TargetOpcode::G_ASSERT_ZEXT: {
+ std::string OpcName =
+ Opc == TargetOpcode::G_ASSERT_ZEXT ? "G_ASSERT_ZEXT" : "G_ASSERT_SEXT";
+ if (!MI->getOperand(2).isImm()) {
+ report(Twine(OpcName, " expects an immediate operand #2"), MI);
+ break;
+ }
+
+ Register Dst = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
+ LLT SrcTy = MRI->getType(Src);
+ int64_t Imm = MI->getOperand(2).getImm();
+ if (Imm <= 0) {
+ report(Twine(OpcName, " size must be >= 1"), MI);
+ break;
+ }
+
+ if (Imm >= SrcTy.getScalarSizeInBits()) {
+ report(Twine(OpcName, " size must be less than source bit width"), MI);
+ break;
+ }
+
+ if (MRI->getRegBankOrNull(Src) != MRI->getRegBankOrNull(Dst)) {
+ report(
+ Twine(OpcName, " source and destination register banks must match"),
+ MI);
+ break;
+ }
+
+ if (MRI->getRegClassOrNull(Src) != MRI->getRegClassOrNull(Dst))
+ report(
+ Twine(OpcName, " source and destination register classes must match"),
+ MI);
+
+ break;
+ }
+
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FCONSTANT: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
@@ -1241,6 +1285,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (!DstTy.isVector() || !SrcTy.isVector())
report("G_CONCAT_VECTOR requires vector source and destination operands",
MI);
+
+ if (MI->getNumOperands() < 3)
+ report("G_CONCAT_VECTOR requires at least 2 source operands", MI);
+
for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
if (MRI->getType(MI->getOperand(1).getReg()) !=
MRI->getType(MI->getOperand(i).getReg()))
@@ -1363,10 +1411,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
- LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
- verifyVectorElementMatch(DstTy, SrcTy, MI);
-
int64_t Imm = MI->getOperand(2).getImm();
if (Imm <= 0)
report("G_SEXT_INREG size must be >= 1", MI);
@@ -1432,6 +1477,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
break;
}
+ case TargetOpcode::G_MEMCPY_INLINE:
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE: {
ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
@@ -1462,28 +1508,38 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace())
report("inconsistent load address space", MI);
+ if (Opc != TargetOpcode::G_MEMCPY_INLINE)
+ if (!MI->getOperand(3).isImm() || (MI->getOperand(3).getImm() & ~1LL))
+ report("'tail' flag (operand 3) must be an immediate 0 or 1", MI);
+
break;
}
+ case TargetOpcode::G_BZERO:
case TargetOpcode::G_MEMSET: {
ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
+ std::string Name = Opc == TargetOpcode::G_MEMSET ? "memset" : "bzero";
if (MMOs.size() != 1) {
- report("memset must have 1 memory operand", MI);
+ report(Twine(Name, " must have 1 memory operand"), MI);
break;
}
if ((!MMOs[0]->isStore() || MMOs[0]->isLoad())) {
- report("memset memory operand must be a store", MI);
+ report(Twine(Name, " memory operand must be a store"), MI);
break;
}
LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg());
if (!DstPtrTy.isPointer()) {
- report("memset operand must be a pointer", MI);
+ report(Twine(Name, " operand must be a pointer"), MI);
break;
}
if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
- report("inconsistent memset address space", MI);
+ report("inconsistent " + Twine(Name, " address space"), MI);
+
+ if (!MI->getOperand(MI->getNumOperands() - 1).isImm() ||
+ (MI->getOperand(MI->getNumOperands() - 1).getImm() & ~1LL))
+ report("'tail' flag (last operand) must be an immediate 0 or 1", MI);
break;
}
@@ -1521,6 +1577,28 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
report("Vector reduction requires vector source=", MI);
break;
}
+
+ case TargetOpcode::G_SBFX:
+ case TargetOpcode::G_UBFX: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (DstTy.isVector()) {
+ report("Bitfield extraction is not supported on vectors", MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_ROTR:
+ case TargetOpcode::G_ROTL: {
+ LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg());
+ LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg());
+ if (Src1Ty.isVector() != Src2Ty.isVector()) {
+ report("Rotate requires operands to be either all scalars or all vectors",
+ MI);
+ break;
+ }
+ break;
+ }
+
default:
break;
}
@@ -1582,7 +1660,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// Other instructions must have one, unless they are inside a bundle.
if (LiveInts) {
bool mapped = !LiveInts->isNotInMIMap(*MI);
- if (MI->isDebugInstr()) {
+ if (MI->isDebugOrPseudoInstr()) {
if (mapped)
report("Debug instruction has a slot index", MI);
} else if (MI->isInsideBundle()) {
@@ -1594,7 +1672,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
}
- if (isPreISelGenericOpcode(MCID.getOpcode())) {
+ unsigned Opc = MCID.getOpcode();
+ if (isPreISelGenericOpcode(Opc) || isPreISelGenericOptimizationHint(Opc)) {
verifyPreISelGenericInstruction(MI);
return;
}
@@ -1606,32 +1685,56 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// Verify properties of various specific instruction types
switch (MI->getOpcode()) {
case TargetOpcode::COPY: {
- if (foundErrors)
- break;
const MachineOperand &DstOp = MI->getOperand(0);
const MachineOperand &SrcOp = MI->getOperand(1);
- LLT DstTy = MRI->getType(DstOp.getReg());
- LLT SrcTy = MRI->getType(SrcOp.getReg());
+ const Register SrcReg = SrcOp.getReg();
+ const Register DstReg = DstOp.getReg();
+
+ LLT DstTy = MRI->getType(DstReg);
+ LLT SrcTy = MRI->getType(SrcReg);
if (SrcTy.isValid() && DstTy.isValid()) {
// If both types are valid, check that the types are the same.
if (SrcTy != DstTy) {
report("Copy Instruction is illegal with mismatching types", MI);
errs() << "Def = " << DstTy << ", Src = " << SrcTy << "\n";
}
+
+ break;
}
- if (SrcTy.isValid() || DstTy.isValid()) {
- // If one of them have valid types, let's just check they have the same
- // size.
- unsigned SrcSize = TRI->getRegSizeInBits(SrcOp.getReg(), *MRI);
- unsigned DstSize = TRI->getRegSizeInBits(DstOp.getReg(), *MRI);
- assert(SrcSize && "Expecting size here");
- assert(DstSize && "Expecting size here");
- if (SrcSize != DstSize)
- if (!DstOp.getSubReg() && !SrcOp.getSubReg()) {
- report("Copy Instruction is illegal with mismatching sizes", MI);
- errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize
- << "\n";
- }
+
+ if (!SrcTy.isValid() && !DstTy.isValid())
+ break;
+
+ // If we have only one valid type, this is likely a copy between a virtual
+ // and physical register.
+ unsigned SrcSize = 0;
+ unsigned DstSize = 0;
+ if (SrcReg.isPhysical() && DstTy.isValid()) {
+ const TargetRegisterClass *SrcRC =
+ TRI->getMinimalPhysRegClassLLT(SrcReg, DstTy);
+ if (SrcRC)
+ SrcSize = TRI->getRegSizeInBits(*SrcRC);
+ }
+
+ if (SrcSize == 0)
+ SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI);
+
+ if (DstReg.isPhysical() && SrcTy.isValid()) {
+ const TargetRegisterClass *DstRC =
+ TRI->getMinimalPhysRegClassLLT(DstReg, SrcTy);
+ if (DstRC)
+ DstSize = TRI->getRegSizeInBits(*DstRC);
+ }
+
+ if (DstSize == 0)
+ DstSize = TRI->getRegSizeInBits(DstReg, *MRI);
+
+ if (SrcSize != 0 && DstSize != 0 && SrcSize != DstSize) {
+ if (!DstOp.getSubReg() && !SrcOp.getSubReg()) {
+ report("Copy Instruction is illegal with mismatching sizes", MI);
+ errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize
+ << "\n";
+ }
}
break;
}
@@ -1679,6 +1782,19 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// TODO: verify we have properly encoded deopt arguments
} break;
+ case TargetOpcode::INSERT_SUBREG: {
+ unsigned InsertedSize;
+ if (unsigned SubIdx = MI->getOperand(2).getSubReg())
+ InsertedSize = TRI->getSubRegIdxSize(SubIdx);
+ else
+ InsertedSize = TRI->getRegSizeInBits(MI->getOperand(2).getReg(), *MRI);
+ unsigned SubRegSize = TRI->getSubRegIdxSize(MI->getOperand(3).getImm());
+ if (SubRegSize < InsertedSize) {
+ report("INSERT_SUBREG expected inserted value to have equal or lesser "
+ "size than the subreg it was inserted into", MI);
+ break;
+ }
+ } break;
}
}
@@ -1716,9 +1832,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MCOI.OperandType == MCOI::OPERAND_REGISTER &&
!MO->isReg() && !MO->isFI())
report("Expected a register operand.", MO, MONum);
- if ((MCOI.OperandType == MCOI::OPERAND_IMMEDIATE ||
- MCOI.OperandType == MCOI::OPERAND_PCREL) && MO->isReg())
- report("Expected a non-register operand.", MO, MONum);
+ if (MO->isReg()) {
+ if (MCOI.OperandType == MCOI::OPERAND_IMMEDIATE ||
+ (MCOI.OperandType == MCOI::OPERAND_PCREL &&
+ !TII->isPCRelRegisterOperandLegal(*MO)))
+ report("Expected a non-register operand.", MO, MONum);
+ }
}
int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO);
@@ -2150,12 +2269,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (!Register::isPhysicalRegister(MOP.getReg()))
continue;
- for (const MCPhysReg &SubReg : TRI->subregs(MOP.getReg())) {
- if (SubReg == Reg) {
- Bad = false;
- break;
- }
- }
+ if (llvm::is_contained(TRI->subregs(MOP.getReg()), Reg))
+ Bad = false;
}
}
if (Bad)
@@ -2903,6 +3018,15 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Check that VNI is live-out of all predecessors.
for (const MachineBasicBlock *Pred : MFI->predecessors()) {
SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred);
+ // Predecessor of landing pad live-out on last call.
+ if (MFI->isEHPad()) {
+ for (auto I = Pred->rbegin(), E = Pred->rend(); I != E; ++I) {
+ if (I->isCall()) {
+ PEnd = Indexes->getInstructionIndex(*I).getBoundaryIndex();
+ break;
+ }
+ }
+ }
const VNInfo *PVNI = LR.getVNInfoBefore(PEnd);
// All predecessors must have a live-out value. However for a phi
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 095da09ea82b..b5517c40a28a 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -1275,15 +1275,15 @@ class KernelRewriter {
Register undef(const TargetRegisterClass *RC);
public:
- KernelRewriter(MachineLoop &L, ModuloSchedule &S,
+ KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
LiveIntervals *LIS = nullptr);
void rewrite();
};
} // namespace
KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
- LiveIntervals *LIS)
- : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()),
+ MachineBasicBlock *LoopBB, LiveIntervals *LIS)
+ : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
PreheaderBB = *BB->pred_begin();
@@ -1981,7 +1981,7 @@ void PeelingModuloScheduleExpander::fixupBranches() {
}
void PeelingModuloScheduleExpander::rewriteKernel() {
- KernelRewriter KR(*Schedule.getLoop(), Schedule);
+ KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
KR.rewrite();
}
@@ -2024,7 +2024,7 @@ void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
Preheader->addSuccessor(BB);
// Now run the new expansion algorithm.
- KernelRewriter KR(*Schedule.getLoop(), Schedule);
+ KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
KR.rewrite();
peelPrologAndEpilogs();
diff --git a/llvm/lib/CodeGen/OptimizePHIs.cpp b/llvm/lib/CodeGen/OptimizePHIs.cpp
index 02a70ab801e9..8a6cf47c0d89 100644
--- a/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -83,8 +83,8 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
// introduce new opportunities, e.g., when i64 values are split up for
// 32-bit targets.
bool Changed = false;
- for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
- Changed |= OptimizeBB(*I);
+ for (MachineBasicBlock &MBB : Fn)
+ Changed |= OptimizeBB(MBB);
return Changed;
}
@@ -195,9 +195,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
// Check for dead PHI cycles.
PHIsInCycle.clear();
if (IsDeadPHICycle(MI, PHIsInCycle)) {
- for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
- PI != PE; ++PI) {
- MachineInstr *PhiMI = *PI;
+ for (MachineInstr *PhiMI : PHIsInCycle) {
if (MII == PhiMI)
++MII;
PhiMI->eraseFromParent();
diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp
index 8148b64d8443..54805584dbc1 100644
--- a/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/llvm/lib/CodeGen/PHIElimination.cpp
@@ -316,6 +316,16 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
IncomingReg, DestReg);
}
+ if (MPhi->peekDebugInstrNum()) {
+ // If referred to by debug-info, store where this PHI was.
+ MachineFunction *MF = MBB.getParent();
+ unsigned ID = MPhi->peekDebugInstrNum();
+ auto P = MachineFunction::DebugPHIRegallocPos(&MBB, IncomingReg, 0);
+ auto Res = MF->DebugPHIPositions.insert({ID, P});
+ assert(Res.second);
+ (void)Res;
+ }
+
// Update live variable information if there is any.
if (LV) {
if (IncomingReg) {
@@ -475,9 +485,10 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (DefMI->isImplicitDef())
ImpDefs.insert(DefMI);
} else {
- NewSrcInstr =
- TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(),
- SrcReg, SrcSubReg, IncomingReg);
+ // Delete the debug location, since the copy is inserted into a
+ // different basic block.
+ NewSrcInstr = TII->createPHISourceCopy(opBlock, InsertPos, nullptr,
+ SrcReg, SrcSubReg, IncomingReg);
}
}
@@ -550,9 +561,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
LiveInterval &SrcLI = LIS->getInterval(SrcReg);
bool isLiveOut = false;
- for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
- SE = opBlock.succ_end(); SI != SE; ++SI) {
- SlotIndex startIdx = LIS->getMBBStartIdx(*SI);
+ for (MachineBasicBlock *Succ : opBlock.successors()) {
+ SlotIndex startIdx = LIS->getMBBStartIdx(Succ);
VNInfo *VNI = SrcLI.getVNInfoAt(startIdx);
// Definitions by other PHIs are not truly live-in for our purposes.
diff --git a/llvm/lib/CodeGen/ParallelCG.cpp b/llvm/lib/CodeGen/ParallelCG.cpp
index 849b667254bd..3e32afaafa6e 100644
--- a/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/llvm/lib/CodeGen/ParallelCG.cpp
@@ -36,8 +36,8 @@ static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
CodeGenPasses.run(*M);
}
-std::unique_ptr<Module> llvm::splitCodeGen(
- std::unique_ptr<Module> M, ArrayRef<llvm::raw_pwrite_stream *> OSs,
+void llvm::splitCodeGen(
+ Module &M, ArrayRef<llvm::raw_pwrite_stream *> OSs,
ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
CodeGenFileType FileType, bool PreserveLocals) {
@@ -45,9 +45,9 @@ std::unique_ptr<Module> llvm::splitCodeGen(
if (OSs.size() == 1) {
if (!BCOSs.empty())
- WriteBitcodeToFile(*M, *BCOSs[0]);
- codegen(M.get(), *OSs[0], TMFactory, FileType);
- return M;
+ WriteBitcodeToFile(M, *BCOSs[0]);
+ codegen(&M, *OSs[0], TMFactory, FileType);
+ return;
}
// Create ThreadPool in nested scope so that threads will be joined
@@ -57,7 +57,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(
int ThreadCount = 0;
SplitModule(
- std::move(M), OSs.size(),
+ M, OSs.size(),
[&](std::unique_ptr<Module> MPart) {
// We want to clone the module in a new context to multi-thread the
// codegen. We do it by serializing partition modules to bitcode
@@ -95,6 +95,4 @@ std::unique_ptr<Module> llvm::splitCodeGen(
},
PreserveLocals);
}
-
- return {};
}
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 34ac396c0471..49bdba518322 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -585,15 +585,30 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
MRI->constrainRegClass(DstReg, DstRC);
}
+ // SubReg defs are illegal in machine SSA phase,
+ // we should not generate SubReg defs.
+ //
+ // For example, for the instructions:
+ //
+ // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
+ // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc
+ //
+ // We should generate:
+ //
+ // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
+ // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0
+ // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0
+ //
+ if (UseSrcSubIdx)
+ RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
+
Register NewVR = MRI->createVirtualRegister(RC);
- MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), NewVR)
+ BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
.addReg(DstReg, 0, SubIdx);
- // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
- if (UseSrcSubIdx) {
- Copy->getOperand(0).setSubReg(SubIdx);
- Copy->getOperand(0).setIsUndef();
- }
+ if (UseSrcSubIdx)
+ UseMO->setSubReg(0);
+
UseMO->setReg(NewVR);
++NumReuse;
Changed = true;
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index ed19f7448151..d232ca3a69c3 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -143,18 +143,16 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form.");
assert(WorkList.empty() && "Inconsistent worklist state");
- for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
- MFI != MFE; ++MFI) {
+ for (MachineBasicBlock &MBB : MF) {
// Scan the basic block for implicit defs.
- for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(),
- MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI)
- if (MBBI->isImplicitDef())
- WorkList.insert(&*MBBI);
+ for (MachineInstr &MI : MBB)
+ if (MI.isImplicitDef())
+ WorkList.insert(&MI);
if (WorkList.empty())
continue;
- LLVM_DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size()
+ LLVM_DEBUG(dbgs() << printMBBReference(MBB) << " has " << WorkList.size()
<< " implicit defs.\n");
Changed = true;
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 378aaba2a65f..2f65a450fb02 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -138,11 +138,6 @@ char PEI::ID = 0;
char &llvm::PrologEpilogCodeInserterID = PEI::ID;
-static cl::opt<unsigned>
-WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1),
- cl::desc("Warn for stack size bigger than the given"
- " number"));
-
INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false,
false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
@@ -185,7 +180,8 @@ static void stashEntryDbgValues(MachineBasicBlock &MBB,
break;
if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter())
continue;
- if (MI.getDebugOperand(0).isFI()) {
+ if (any_of(MI.debug_operands(),
+ [](const MachineOperand &MO) { return MO.isFI(); })) {
// We can only emit valid locations for frame indices after the frame
// setup, so do not stash away them.
FrameIndexValues.push_back(&MI);
@@ -277,8 +273,19 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
// Warn on stack size when we exceeds the given limit.
MachineFrameInfo &MFI = MF.getFrameInfo();
uint64_t StackSize = MFI.getStackSize();
- if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
- DiagnosticInfoStackSize DiagStackSize(F, StackSize);
+
+ unsigned Threshold = UINT_MAX;
+ if (MF.getFunction().hasFnAttribute("warn-stack-size")) {
+ bool Failed = MF.getFunction()
+ .getFnAttribute("warn-stack-size")
+ .getValueAsString()
+ .getAsInteger(10, Threshold);
+ // Verifier should have caught this.
+ assert(!Failed && "Invalid warn-stack-size fn attr value");
+ (void)Failed;
+ }
+ if (StackSize > Threshold) {
+ DiagnosticInfoStackSize DiagStackSize(F, StackSize, DS_Warning, Threshold);
F.getContext().diagnose(DiagStackSize);
}
ORE->emit([&]() {
@@ -317,8 +324,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) {
return;
std::vector<MachineBasicBlock::iterator> FrameSDOps;
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ for (MachineBasicBlock &BB : MF)
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I)
if (TII.isFrameInstr(*I)) {
unsigned Size = TII.getFrameSize(*I);
if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
@@ -337,10 +344,7 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) {
MFI.setAdjustsStack(AdjustsStack);
MFI.setMaxCallFrameSize(MaxCallFrameSize);
- for (std::vector<MachineBasicBlock::iterator>::iterator
- i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
- MachineBasicBlock::iterator I = *i;
-
+ for (MachineBasicBlock::iterator I : FrameSDOps) {
// If call frames are not being included as part of the stack frame, and
// the target doesn't indicate otherwise, remove the call frame pseudos
// here. The sub/add sp instruction pairs are still inserted, but we don't
@@ -401,7 +405,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
MachineFrameInfo &MFI = F.getFrameInfo();
- if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
+ if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI, MinCSFrameIndex,
+ MaxCSFrameIndex)) {
// If target doesn't implement this, use generic code.
if (CSI.empty())
@@ -438,7 +443,7 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
unsigned Size = RegInfo->getSpillSize(*RC);
if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
// Nope, just spill it anywhere convenient.
- Align Alignment(RegInfo->getSpillAlignment(*RC));
+ Align Alignment = RegInfo->getSpillAlign(*RC);
// We may not be able to satisfy the desired alignment specification of
// the TargetRegisterClass if the stack alignment is smaller. Use the
// min.
@@ -679,10 +684,12 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
// StackSlot scavenging is only implemented for the default stack.
if (MFI.getStackID(i) == TargetStackID::Default)
AllocatedFrameSlots.push_back(i);
- // Add callee-save objects.
- for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
- if (MFI.getStackID(i) == TargetStackID::Default)
- AllocatedFrameSlots.push_back(i);
+ // Add callee-save objects if there are any.
+ if (MinCSFrameIndex <= MaxCSFrameIndex) {
+ for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
+ if (MFI.getStackID(i) == TargetStackID::Default)
+ AllocatedFrameSlots.push_back(i);
+ }
for (int i : AllocatedFrameSlots) {
// These are converted from int64_t, but they should always fit in int
@@ -772,9 +779,7 @@ static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
int64_t &Offset, Align &MaxAlign,
unsigned Skew) {
- for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
- E = UnassignedObjs.end(); I != E; ++I) {
- int i = *I;
+ for (int i : UnassignedObjs) {
AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
ProtectedObjs.insert(i);
}
@@ -837,7 +842,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// First assign frame offsets to stack objects that are used to spill
// callee saved registers.
- if (StackGrowsDown) {
+ if (StackGrowsDown && MaxCSFrameIndex >= MinCSFrameIndex) {
for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
if (MFI.getStackID(i) !=
TargetStackID::Default) // Only allocate objects on the default stack.
@@ -881,16 +886,14 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// incoming stack pointer if a frame pointer is required and is closer
// to the incoming rather than the final stack pointer.
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- bool EarlyScavengingSlots = (TFI.hasFP(MF) &&
- TFI.isFPCloseToIncomingSP() &&
+ bool EarlyScavengingSlots = (TFI.hasFP(MF) && TFI.isFPCloseToIncomingSP() &&
RegInfo->useFPForScavengingIndex(MF) &&
- !RegInfo->needsStackRealignment(MF));
+ !RegInfo->hasStackRealignment(MF));
if (RS && EarlyScavengingSlots) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
- for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
- IE = SFIs.end(); I != IE; ++I)
- AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
+ for (int SFI : SFIs)
+ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign, Skew);
}
// FIXME: Once this is working, then enable flag will change to a target
@@ -1050,9 +1053,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
if (RS && !EarlyScavengingSlots) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
- for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
- IE = SFIs.end(); I != IE; ++I)
- AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
+ for (int SFI : SFIs)
+ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign, Skew);
}
if (!TFI.targetHandlesStackFrameRounding()) {
@@ -1069,7 +1071,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// value.
Align StackAlign;
if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI.getObjectIndexEnd() != 0))
+ (RegInfo->hasStackRealignment(MF) && MFI.getObjectIndexEnd() != 0))
StackAlign = TFI.getStackAlign();
else
StackAlign = TFI.getTransientStackAlign();
@@ -1083,18 +1085,19 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// If we have increased the offset to fulfill the alignment constrants,
// then the scavenging spill slots may become harder to reach from the
// stack pointer, float them so they stay close.
- if (OffsetBeforeAlignment != Offset && RS && !EarlyScavengingSlots) {
+ if (StackGrowsDown && OffsetBeforeAlignment != Offset && RS &&
+ !EarlyScavengingSlots) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
LLVM_DEBUG(if (!SFIs.empty()) llvm::dbgs()
<< "Adjusting emergency spill slots!\n";);
int64_t Delta = Offset - OffsetBeforeAlignment;
- for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end();
- I != IE; ++I) {
- LLVM_DEBUG(llvm::dbgs() << "Adjusting offset of emergency spill slot #"
- << *I << " from " << MFI.getObjectOffset(*I););
- MFI.setObjectOffset(*I, MFI.getObjectOffset(*I) - Delta);
- LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(*I) << "\n";);
+ for (int SFI : SFIs) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "Adjusting offset of emergency spill slot #" << SFI
+ << " from " << MFI.getObjectOffset(SFI););
+ MFI.setObjectOffset(SFI, MFI.getObjectOffset(SFI) - Delta);
+ LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(SFI) << "\n";);
}
}
}
@@ -1222,16 +1225,19 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
// way with simply the frame index and offset rather than any
// target-specific addressing mode.
if (MI.isDebugValue()) {
- assert(i == 0 && "Frame indices can only appear as the first "
- "operand of a DBG_VALUE machine instruction");
+ MachineOperand &Op = MI.getOperand(i);
+ assert(
+ MI.isDebugOperand(&Op) &&
+ "Frame indices can only appear as a debug operand in a DBG_VALUE*"
+ " machine instruction");
Register Reg;
- unsigned FrameIdx = MI.getOperand(0).getIndex();
+ unsigned FrameIdx = Op.getIndex();
unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx);
StackOffset Offset =
TFI->getFrameIndexReference(MF, FrameIdx, Reg);
- MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
- MI.getOperand(0).setIsDebug();
+ Op.ChangeToRegister(Reg, false /*isDef*/);
+ Op.setIsDebug();
const DIExpression *DIExpr = MI.getDebugExpression();
@@ -1240,25 +1246,38 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
// complex location that is interpreted as being a memory address.
// This changes a pointer-valued variable to dereference that pointer,
// which is incorrect. Fix by adding DW_OP_stack_value.
- unsigned PrependFlags = DIExpression::ApplyOffset;
- if (!MI.isIndirectDebugValue() && !DIExpr->isComplex())
- PrependFlags |= DIExpression::StackValue;
-
- // If we have DBG_VALUE that is indirect and has a Implicit location
- // expression need to insert a deref before prepending a Memory
- // location expression. Also after doing this we change the DBG_VALUE
- // to be direct.
- if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) {
- SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
- bool WithStackValue = true;
- DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
- // Make the DBG_VALUE direct.
- MI.getDebugOffset().ChangeToRegister(0, false);
- }
- DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset);
+ if (MI.isNonListDebugValue()) {
+ unsigned PrependFlags = DIExpression::ApplyOffset;
+ if (!MI.isIndirectDebugValue() && !DIExpr->isComplex())
+ PrependFlags |= DIExpression::StackValue;
+
+ // If we have DBG_VALUE that is indirect and has a Implicit location
+ // expression need to insert a deref before prepending a Memory
+ // location expression. Also after doing this we change the DBG_VALUE
+ // to be direct.
+ if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) {
+ SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
+ bool WithStackValue = true;
+ DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+ // Make the DBG_VALUE direct.
+ MI.getDebugOffset().ChangeToRegister(0, false);
+ }
+ DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset);
+ } else {
+ // The debug operand at DebugOpIndex was a frame index at offset
+ // `Offset`; now the operand has been replaced with the frame
+ // register, we must add Offset with `register x, plus Offset`.
+ unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op);
+ SmallVector<uint64_t, 3> Ops;
+ TRI.getOffsetOpcodes(Offset, Ops);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex);
+ }
MI.getDebugExpressionOp().setMetadata(DIExpr);
continue;
+ } else if (MI.isDebugPHI()) {
+ // Allow stack ref to continue onwards.
+ continue;
}
// TODO: This code should be commoned with the code for
diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
index 9c716a5a37ea..a9fb577d5735 100644
--- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp
+++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -20,8 +20,9 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Target/TargetMachine.h"
-#include <unordered_map>
+#include <unordered_set>
#define DEBUG_TYPE "pseudo-probe-inserter"
@@ -47,7 +48,10 @@ public:
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
+ MachineInstr *FirstInstr = nullptr;
for (MachineInstr &MI : MBB) {
+ if (!MI.isPseudo())
+ FirstInstr = &MI;
if (MI.isCall()) {
if (DILocation *DL = MI.getDebugLoc()) {
auto Value = DL->getDiscriminator();
@@ -65,6 +69,53 @@ public:
}
}
}
+
+ // Walk the block backwards, move PSEUDO_PROBE before the first real
+ // instruction to fix out-of-order probes. There is a problem with probes
+ // as the terminator of the block. During the offline counts processing,
+ // the samples collected on the first physical instruction following a
+ // probe will be counted towards the probe. This logically equals to
+ // treating the instruction next to a probe as if it is from the same
+ // block of the probe. This is accurate most of the time unless the
+ // instruction can be reached from multiple flows, which means it actually
+ // starts a new block. Samples collected on such probes may cause
+ // imprecision with the counts inference algorithm. Fortunately, if
+ // there are still other native instructions preceding the probe we can
+ // use them as a place holder to collect samples for the probe.
+ if (FirstInstr) {
+ auto MII = MBB.rbegin();
+ while (MII != MBB.rend()) {
+ // Skip all pseudo probes followed by a real instruction since they
+ // are not dangling.
+ if (!MII->isPseudo())
+ break;
+ auto Cur = MII++;
+ if (Cur->getOpcode() != TargetOpcode::PSEUDO_PROBE)
+ continue;
+ // Move the dangling probe before FirstInstr.
+ auto *ProbeInstr = &*Cur;
+ MBB.remove(ProbeInstr);
+ MBB.insert(FirstInstr, ProbeInstr);
+ Changed = true;
+ }
+ } else {
+ // Probes not surrounded by any real instructions in the same block are
+ // called dangling probes. Since there's no good way to pick up a sample
+ // collection point for dangling probes at compile time, they are being
+ // removed so that the profile correlation tool will not report any
+ // samples collected for them and it's up to the counts inference tool
+ // to get them a reasonable count.
+ SmallVector<MachineInstr *, 4> ToBeRemoved;
+ for (MachineInstr &MI : MBB) {
+ if (MI.isPseudoProbe())
+ ToBeRemoved.push_back(&MI);
+ }
+
+ for (auto *MI : ToBeRemoved)
+ MI->eraseFromParent();
+
+ Changed |= !ToBeRemoved.empty();
+ }
}
return Changed;
diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp
index cebb902f0a4a..f605068e076d 100644
--- a/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/llvm/lib/CodeGen/RDFGraph.cpp
@@ -994,8 +994,8 @@ RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
// For each stack in the map DefM, push the delimiter for block B on it.
void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) {
// Push block delimiters.
- for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I)
- I->second.start_block(B);
+ for (auto &P : DefM)
+ P.second.start_block(B);
}
// Remove all definitions coming from block B from each stack in DefM.
@@ -1003,8 +1003,8 @@ void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) {
// Pop all defs from this block from the definition stack. Defs that were
// added to the map during the traversal of instructions will not have a
// delimiter, but for those, the whole stack will be emptied.
- for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I)
- I->second.clear_block(B);
+ for (auto &P : DefM)
+ P.second.clear_block(B);
// Finally, remove empty stacks from the map.
for (auto I = DefM.begin(), E = DefM.end(), NextI = I; I != E; I = NextI) {
diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp
index 76bf0c280970..d92c6a997f31 100644
--- a/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -238,8 +238,8 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
[this](auto A, auto B) { return MDT.properlyDominates(A, B); });
std::vector<NodeId> TmpInst;
- for (auto I = TmpBB.rbegin(), E = TmpBB.rend(); I != E; ++I) {
- auto &Bucket = Blocks[*I];
+ for (MachineBasicBlock *MBB : llvm::reverse(TmpBB)) {
+ auto &Bucket = Blocks[MBB];
TmpInst.insert(TmpInst.end(), Bucket.rbegin(), Bucket.rend());
}
@@ -866,8 +866,8 @@ void Liveness::computeLiveIns() {
// Dump the liveness map
for (MachineBasicBlock &B : MF) {
std::vector<RegisterRef> LV;
- for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
- LV.push_back(RegisterRef(I->PhysReg, I->LaneMask));
+ for (const MachineBasicBlock::RegisterMaskPair &LI : B.liveins())
+ LV.push_back(RegisterRef(LI.PhysReg, LI.LaneMask));
llvm::sort(LV);
dbgs() << printMBBReference(B) << "\t rec = {";
for (auto I : LV)
@@ -893,16 +893,14 @@ void Liveness::resetLiveIns() {
for (auto &B : DFG.getMF()) {
// Remove all live-ins.
std::vector<unsigned> T;
- for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
- T.push_back(I->PhysReg);
+ for (const MachineBasicBlock::RegisterMaskPair &LI : B.liveins())
+ T.push_back(LI.PhysReg);
for (auto I : T)
B.removeLiveIn(I);
// Add the newly computed live-ins.
const RegisterAggr &LiveIns = LiveMap[&B];
- for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) {
- RegisterRef R = *I;
+ for (const RegisterRef R : make_range(LiveIns.rr_begin(), LiveIns.rr_end()))
B.addLiveIn({MCPhysReg(R.Reg), R.Mask});
- }
}
}
@@ -933,13 +931,12 @@ void Liveness::resetKills(MachineBasicBlock *B) {
for (auto SI : B->successors())
CopyLiveIns(SI, Live);
- for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) {
- MachineInstr *MI = &*I;
- if (MI->isDebugInstr())
+ for (MachineInstr &MI : llvm::reverse(*B)) {
+ if (MI.isDebugInstr())
continue;
- MI->clearKillInfo();
- for (auto &Op : MI->operands()) {
+ MI.clearKillInfo();
+ for (auto &Op : MI.operands()) {
// An implicit def of a super-register may not necessarily start a
// live range of it, since an implicit use could be used to keep parts
// of it live. Instead of analyzing the implicit operands, ignore
@@ -952,7 +949,7 @@ void Liveness::resetKills(MachineBasicBlock *B) {
for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
Live.reset(*SR);
}
- for (auto &Op : MI->operands()) {
+ for (auto &Op : MI.operands()) {
if (!Op.isReg() || !Op.isUse() || Op.isUndef())
continue;
Register R = Op.getReg();
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index d16e90a7e0b4..c850571da2ed 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -124,7 +125,7 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
for (MCRegUnitIterator Unit(MO.getReg().asMCReg(), TRI); Unit.isValid();
++Unit) {
// This instruction explicitly defines the current reg unit.
- LLVM_DEBUG(dbgs() << printReg(*Unit, TRI) << ":\t" << CurInstr
+ LLVM_DEBUG(dbgs() << printRegUnit(*Unit, TRI) << ":\t" << CurInstr
<< '\t' << *MI);
// How many instructions since this reg unit was last written?
@@ -660,10 +661,7 @@ void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI,
SmallPtrSet<MachineInstr*, 4> Uses;
getGlobalUses(Def, PhysReg, Uses);
- for (auto *Use : Uses)
- if (!Dead.count(Use))
- return false;
- return true;
+ return llvm::set_is_subset(Uses, Dead);
};
for (auto &MO : MI->operands()) {
@@ -688,9 +686,8 @@ bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) {
SmallPtrSet<MachineInstr*, 2> Uses;
getGlobalUses(Def, PhysReg, Uses);
- for (auto *Use : Uses)
- if (!Ignore.count(Use))
- return false;
+ if (!llvm::set_is_subset(Uses, Ignore))
+ return false;
} else
return false;
}
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index aa749ca43e74..d891d4c2ffbb 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -35,7 +35,7 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
-STATISTIC(NumNewQueued , "Number of new live ranges queued");
+STATISTIC(NumNewQueued, "Number of new live ranges queued");
// Temporary verification option until we can put verification inside
// MachineVerifier.
@@ -54,8 +54,7 @@ bool RegAllocBase::VerifyEnabled = false;
// Pin the vtable to this file.
void RegAllocBase::anchor() {}
-void RegAllocBase::init(VirtRegMap &vrm,
- LiveIntervals &lis,
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
LiveRegMatrix &mat) {
TRI = &vrm.getTargetRegInfo();
MRI = &vrm.getRegInfo();
@@ -124,7 +123,12 @@ void RegAllocBase::allocatePhysRegs() {
if (MI->isInlineAsm())
break;
}
- if (MI && MI->isInlineAsm()) {
+
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg->reg());
+ ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC);
+ if (AllocOrder.empty())
+ report_fatal_error("no registers from class available to allocate");
+ else if (MI && MI->isInlineAsm()) {
MI->emitError("inline assembly requires more registers than available");
} else if (MI) {
LLVMContext &Context =
@@ -133,10 +137,9 @@ void RegAllocBase::allocatePhysRegs() {
} else {
report_fatal_error("ran out of registers during register allocation");
}
+
// Keep going after reporting the error.
- VRM->assignVirt2Phys(
- VirtReg->reg(),
- RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg())).front());
+ VRM->assignVirt2Phys(VirtReg->reg(), AllocOrder.front());
continue;
}
@@ -172,3 +175,21 @@ void RegAllocBase::postOptimization() {
}
DeadRemats.clear();
}
+
+void RegAllocBase::enqueue(LiveInterval *LI) {
+ const Register Reg = LI->reg();
+
+ assert(Reg.isVirtual() && "Can only enqueue virtual registers");
+
+ if (VRM->hasPhys(Reg))
+ return;
+
+ const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
+ if (ShouldAllocateClass(*TRI, RC)) {
+ LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n');
+ enqueueImpl(LI);
+ } else {
+ LLVM_DEBUG(dbgs() << "Not enqueueing " << printReg(Reg, TRI)
+ << " in skipped register class\n");
+ }
+}
diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h
index 3144605345e9..1fb56dbaebb7 100644
--- a/llvm/lib/CodeGen/RegAllocBase.h
+++ b/llvm/lib/CodeGen/RegAllocBase.h
@@ -37,6 +37,7 @@
#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/RegAllocCommon.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
namespace llvm {
@@ -67,6 +68,7 @@ protected:
LiveIntervals *LIS = nullptr;
LiveRegMatrix *Matrix = nullptr;
RegisterClassInfo RegClassInfo;
+ const RegClassFilterFunc ShouldAllocateClass;
/// Inst which is a def of an original reg and whose defs are already all
/// dead after remat is saved in DeadRemats. The deletion of such inst is
@@ -74,7 +76,9 @@ protected:
/// always available for the remat of all the siblings of the original reg.
SmallPtrSet<MachineInstr *, 32> DeadRemats;
- RegAllocBase() = default;
+ RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses) :
+ ShouldAllocateClass(F) {}
+
virtual ~RegAllocBase() = default;
// A RegAlloc pass should call this before allocatePhysRegs.
@@ -92,7 +96,10 @@ protected:
virtual Spiller &spiller() = 0;
/// enqueue - Add VirtReg to the priority queue of unassigned registers.
- virtual void enqueue(LiveInterval *LI) = 0;
+ virtual void enqueueImpl(LiveInterval *LI) = 0;
+
+ /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+ void enqueue(LiveInterval *LI);
/// dequeue - Return the next unassigned register, or NULL.
virtual LiveInterval *dequeue() = 0;
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 8f2cb48c5d69..b65d58077958 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -76,7 +76,7 @@ class RABasic : public MachineFunctionPass,
void LRE_WillShrinkVirtReg(Register) override;
public:
- RABasic();
+ RABasic(const RegClassFilterFunc F = allocateAllRegClasses);
/// Return the pass name.
StringRef getPassName() const override { return "Basic Register Allocator"; }
@@ -88,7 +88,7 @@ public:
Spiller &spiller() override { return *SpillerInstance; }
- void enqueue(LiveInterval *LI) override {
+ void enqueueImpl(LiveInterval *LI) override {
Queue.push(LI);
}
@@ -171,7 +171,9 @@ void RABasic::LRE_WillShrinkVirtReg(Register VirtReg) {
enqueue(&LI);
}
-RABasic::RABasic(): MachineFunctionPass(ID) {
+RABasic::RABasic(RegClassFilterFunc F):
+ MachineFunctionPass(ID),
+ RegAllocBase(F) {
}
void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -286,16 +288,14 @@ MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg,
}
// Try to spill another interfering reg with less spill weight.
- for (auto PhysRegI = PhysRegSpillCands.begin(),
- PhysRegE = PhysRegSpillCands.end();
- PhysRegI != PhysRegE; ++PhysRegI) {
- if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs))
+ for (MCRegister &PhysReg : PhysRegSpillCands) {
+ if (!spillInterferences(VirtReg, PhysReg, SplitVRegs))
continue;
- assert(!Matrix->checkInterference(VirtReg, *PhysRegI) &&
+ assert(!Matrix->checkInterference(VirtReg, PhysReg) &&
"Interference after spill.");
// Tell the caller to allocate to this newly freed physical register.
- return *PhysRegI;
+ return PhysReg;
}
// No other spill candidates were found, so spill the current VirtReg.
@@ -322,7 +322,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
getAnalysis<MachineBlockFrequencyInfo>());
VRAI.calculateSpillWeightsAndHints();
- SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, VRAI));
allocatePhysRegs();
postOptimization();
@@ -334,7 +334,10 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
return true;
}
-FunctionPass* llvm::createBasicRegisterAllocator()
-{
+FunctionPass* llvm::createBasicRegisterAllocator() {
return new RABasic();
}
+
+FunctionPass* llvm::createBasicRegisterAllocator(RegClassFilterFunc F) {
+ return new RABasic(F);
+}
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 6e548d4a93c8..707161d5a8b0 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocCommon.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -69,7 +70,13 @@ namespace {
public:
static char ID;
- RegAllocFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
+ RegAllocFast(const RegClassFilterFunc F = allocateAllRegClasses,
+ bool ClearVirtRegs_ = true) :
+ MachineFunctionPass(ID),
+ ShouldAllocateClass(F),
+ StackSlotForVirtReg(-1),
+ ClearVirtRegs(ClearVirtRegs_) {
+ }
private:
MachineFrameInfo *MFI;
@@ -77,6 +84,7 @@ namespace {
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
RegisterClassInfo RegClassInfo;
+ const RegClassFilterFunc ShouldAllocateClass;
/// Basic block currently being allocated.
MachineBasicBlock *MBB;
@@ -84,6 +92,8 @@ namespace {
/// Maps virtual regs to the frame index where these values are spilled.
IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+ bool ClearVirtRegs;
+
/// Everything we know about a live virtual register.
struct LiveReg {
MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
@@ -108,7 +118,7 @@ namespace {
/// Stores assigned virtual registers present in the bundle MI.
DenseMap<Register, MCPhysReg> BundleVirtRegsMap;
- DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
+ DenseMap<unsigned, SmallVector<MachineOperand *, 2>> LiveDbgValueMap;
/// List of DBG_VALUE that we encountered without the vreg being assigned
/// because they were placed after the last use of the vreg.
DenseMap<unsigned, SmallVector<MachineInstr *, 1>> DanglingDbgValues;
@@ -147,6 +157,8 @@ namespace {
RegUnitSet UsedInInstr;
RegUnitSet PhysRegUses;
SmallVector<uint16_t, 8> DefOperandIndexes;
+ // Register masks attached to the current instruction.
+ SmallVector<const uint32_t *> RegMasks;
void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
bool isPhysRegFree(MCPhysReg PhysReg) const;
@@ -157,8 +169,17 @@ namespace {
UsedInInstr.insert(*Units);
}
+ // Check if physreg is clobbered by instruction's regmask(s).
+ bool isClobberedByRegMasks(MCPhysReg PhysReg) const {
+ return llvm::any_of(RegMasks, [PhysReg](const uint32_t *Mask) {
+ return MachineOperand::clobbersPhysReg(Mask, PhysReg);
+ });
+ }
+
/// Check if a physreg or any of its aliases are used in this instruction.
bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
+ if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg))
+ return true;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
if (UsedInInstr.count(*Units))
return true;
@@ -202,8 +223,12 @@ namespace {
}
MachineFunctionProperties getSetProperties() const override {
- return MachineFunctionProperties().set(
+ if (ClearVirtRegs) {
+ return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ return MachineFunctionProperties();
}
MachineFunctionProperties getClearedProperties() const override {
@@ -406,9 +431,15 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
// When we spill a virtual register, we will have spill instructions behind
// every definition of it, meaning we can switch all the DBG_VALUEs over
// to just reference the stack slot.
- SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg];
- for (MachineInstr *DBG : LRIDbgValues) {
- MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI);
+ SmallVectorImpl<MachineOperand *> &LRIDbgOperands = LiveDbgValueMap[VirtReg];
+ SmallDenseMap<MachineInstr *, SmallVector<const MachineOperand *>>
+ SpilledOperandsMap;
+ for (MachineOperand *MO : LRIDbgOperands)
+ SpilledOperandsMap[MO->getParent()].push_back(MO);
+ for (auto MISpilledOperands : SpilledOperandsMap) {
+ MachineInstr &DBG = *MISpilledOperands.first;
+ MachineInstr *NewDV = buildDbgValueForSpill(
+ *MBB, Before, *MISpilledOperands.first, FI, MISpilledOperands.second);
assert(NewDV->getParent() == MBB && "dangling parent pointer");
(void)NewDV;
LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV);
@@ -424,14 +455,19 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
}
// Rewrite unassigned dbg_values to use the stack slot.
- MachineOperand &MO = DBG->getOperand(0);
- if (MO.isReg() && MO.getReg() == 0)
- updateDbgValueForSpill(*DBG, FI);
+ // TODO We can potentially do this for list debug values as well if we know
+ // how the dbg_values are getting unassigned.
+ if (DBG.isNonListDebugValue()) {
+ MachineOperand &MO = DBG.getDebugOperand(0);
+ if (MO.isReg() && MO.getReg() == 0) {
+ updateDbgValueForSpill(DBG, FI, 0);
+ }
+ }
}
// Now this register is spilled there is should not be any DBG_VALUE
// pointing to this register because they are all pointing to spilled value
// now.
- LRIDbgValues.clear();
+ LRIDbgOperands.clear();
}
/// Insert reload instruction for \p PhysReg before \p Before.
@@ -623,8 +659,7 @@ void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
SmallVectorImpl<MachineInstr*> &Dangling = UDBGValIter->second;
for (MachineInstr *DbgValue : Dangling) {
assert(DbgValue->isDebugValue());
- MachineOperand &MO = DbgValue->getOperand(0);
- if (!MO.isReg())
+ if (!DbgValue->hasDebugOperandForReg(VirtReg))
continue;
// Test whether the physreg survives from the definition to the DBG_VALUE.
@@ -639,9 +674,11 @@ void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
break;
}
}
- MO.setReg(SetToReg);
- if (SetToReg != 0)
- MO.setIsRenamable();
+ for (MachineOperand &MO : DbgValue->getDebugOperandsForReg(VirtReg)) {
+ MO.setReg(SetToReg);
+ if (SetToReg != 0)
+ MO.setIsRenamable();
+ }
}
Dangling.clear();
}
@@ -1076,6 +1113,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// operands and early-clobbers.
UsedInInstr.clear();
+ RegMasks.clear();
BundleVirtRegsMap.clear();
// Scan for special cases; Apply pre-assigned register defs to state.
@@ -1115,6 +1153,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
} else if (MO.isRegMask()) {
HasRegMask = true;
+ RegMasks.push_back(MO.getRegMask());
}
}
@@ -1230,6 +1269,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
continue;
}
+ assert((!MO.isTied() || !isClobberedByRegMasks(MO.getReg())) &&
+ "tied def assigned to clobbered register");
+
// Do not free tied operands and early clobbers.
if (MO.isTied() || MO.isEarlyClobber())
continue;
@@ -1246,20 +1288,16 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Displace clobbered registers.
if (HasRegMask) {
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isRegMask()) {
- // MRI bookkeeping.
- MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
-
- // Displace clobbered registers.
- const uint32_t *Mask = MO.getRegMask();
- for (LiveRegMap::iterator LRI = LiveVirtRegs.begin(),
- LRIE = LiveVirtRegs.end(); LRI != LRIE; ++LRI) {
- MCPhysReg PhysReg = LRI->PhysReg;
- if (PhysReg != 0 && MachineOperand::clobbersPhysReg(Mask, PhysReg))
- displacePhysReg(MI, PhysReg);
- }
- }
+ assert(!RegMasks.empty() && "expected RegMask");
+ // MRI bookkeeping.
+ for (const auto *RM : RegMasks)
+ MRI->addPhysRegsUsedFromRegMask(RM);
+
+ // Displace clobbered registers.
+ for (const LiveReg &LR : LiveVirtRegs) {
+ MCPhysReg PhysReg = LR.PhysReg;
+ if (PhysReg != 0 && isClobberedByRegMasks(PhysReg))
+ displacePhysReg(MI, PhysReg);
}
}
@@ -1361,37 +1399,40 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
void RegAllocFast::handleDebugValue(MachineInstr &MI) {
- MachineOperand &MO = MI.getDebugOperand(0);
-
// Ignore DBG_VALUEs that aren't based on virtual registers. These are
// mostly constants and frame indices.
- if (!MO.isReg())
- return;
- Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
- return;
+ for (Register Reg : MI.getUsedDebugRegs()) {
+ if (!Register::isVirtualRegister(Reg))
+ continue;
- // Already spilled to a stackslot?
- int SS = StackSlotForVirtReg[Reg];
- if (SS != -1) {
- // Modify DBG_VALUE now that the value is in a spill slot.
- updateDbgValueForSpill(MI, SS);
- LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI);
- return;
- }
+ // Already spilled to a stackslot?
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS != -1) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ updateDbgValueForSpill(MI, SS, Reg);
+ LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI);
+ continue;
+ }
- // See if this virtual register has already been allocated to a physical
- // register or spilled to a stack slot.
- LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
- if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
- setPhysReg(MI, MO, LRI->PhysReg);
- } else {
- DanglingDbgValues[Reg].push_back(&MI);
- }
+ // See if this virtual register has already been allocated to a physical
+ // register or spilled to a stack slot.
+ LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
+ SmallVector<MachineOperand *> DbgOps;
+ for (MachineOperand &Op : MI.getDebugOperandsForReg(Reg))
+ DbgOps.push_back(&Op);
+
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
+ // Update every use of Reg within MI.
+ for (auto &RegMO : DbgOps)
+ setPhysReg(MI, *RegMO, LRI->PhysReg);
+ } else {
+ DanglingDbgValues[Reg].push_back(&MI);
+ }
- // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
- // that future spills of Reg will have DBG_VALUEs.
- LiveDbgValueMap[Reg].push_back(&MI);
+ // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
+ // that future spills of Reg will have DBG_VALUEs.
+ LiveDbgValueMap[Reg].append(DbgOps.begin(), DbgOps.end());
+ }
}
void RegAllocFast::handleBundle(MachineInstr &MI) {
@@ -1425,10 +1466,8 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
- for (MachineBasicBlock *Succ : MBB.successors()) {
- for (const MachineBasicBlock::RegisterMaskPair &LI : Succ->liveins())
- setPhysRegState(LI.PhysReg, regPreAssigned);
- }
+ for (auto &LiveReg : MBB.liveouts())
+ setPhysRegState(LiveReg.PhysReg, regPreAssigned);
Coalesced.clear();
@@ -1473,13 +1512,12 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
for (auto &UDBGPair : DanglingDbgValues) {
for (MachineInstr *DbgValue : UDBGPair.second) {
assert(DbgValue->isDebugValue() && "expected DBG_VALUE");
- MachineOperand &MO = DbgValue->getOperand(0);
// Nothing to do if the vreg was spilled in the meantime.
- if (!MO.isReg())
+ if (!DbgValue->hasDebugOperandForReg(UDBGPair.first))
continue;
LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
<< '\n');
- MO.setReg(0);
+ DbgValue->setDebugValueUndef();
}
}
DanglingDbgValues.clear();
@@ -1515,9 +1553,11 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
allocateBasicBlock(MBB);
- // All machine operands and other references to virtual registers have been
- // replaced. Remove the virtual registers.
- MRI->clearVirtRegs();
+ if (ClearVirtRegs) {
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers.
+ MRI->clearVirtRegs();
+ }
StackSlotForVirtReg.clear();
LiveDbgValueMap.clear();
@@ -1527,3 +1567,9 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
FunctionPass *llvm::createFastRegisterAllocator() {
return new RegAllocFast();
}
+
+FunctionPass *llvm::createFastRegisterAllocator(
+ std::function<bool(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass &RC)> Ftor, bool ClearVirtRegs) {
+ return new RegAllocFast(Ftor, ClearVirtRegs);
+}
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 166414e4ffa1..4eb12aa30ee9 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -69,6 +69,7 @@
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -406,8 +407,12 @@ class RAGreedy : public MachineFunctionPass,
/// Set of broken hints that may be reconciled later because of eviction.
SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
+ /// The register cost values. This list will be recreated for each Machine
+ /// Function
+ ArrayRef<uint8_t> RegCosts;
+
public:
- RAGreedy();
+ RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
/// Return the pass name.
StringRef getPassName() const override { return "Greedy Register Allocator"; }
@@ -416,7 +421,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
void releaseMemory() override;
Spiller &spiller() override { return *SpillerInstance; }
- void enqueue(LiveInterval *LI) override;
+ void enqueueImpl(LiveInterval *LI) override;
LiveInterval *dequeue() override;
MCRegister selectOrSplit(LiveInterval &,
SmallVectorImpl<Register> &) override;
@@ -463,28 +468,29 @@ private:
bool calcCompactRegion(GlobalSplitCandidate&);
void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
- Register canReassign(LiveInterval &VirtReg, Register PrevReg);
- bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
+ Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
+ bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
- const SmallVirtRegSet &);
- bool canEvictInterferenceInRange(LiveInterval &VirtReg, MCRegister PhysReg,
- SlotIndex Start, SlotIndex End,
- EvictionCost &MaxCost);
+ const SmallVirtRegSet &) const;
+ bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
+ MCRegister PhysReg, SlotIndex Start,
+ SlotIndex End, EvictionCost &MaxCost) const;
MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
- LiveInterval &VirtReg, SlotIndex Start,
- SlotIndex End, float *BestEvictWeight);
+ const LiveInterval &VirtReg,
+ SlotIndex Start, SlotIndex End,
+ float *BestEvictWeight) const;
void evictInterference(LiveInterval &, MCRegister,
SmallVectorImpl<Register> &);
bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
SmallLISet &RecoloringCandidates,
const SmallVirtRegSet &FixedRegisters);
- Register tryAssign(LiveInterval&, AllocationOrder&,
+ MCRegister tryAssign(LiveInterval&, AllocationOrder&,
SmallVectorImpl<Register>&,
const SmallVirtRegSet&);
- unsigned tryEvict(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<Register>&, unsigned,
- const SmallVirtRegSet&);
+ MCRegister tryEvict(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &, uint8_t,
+ const SmallVirtRegSet &);
MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &);
/// Calculate cost of region splitting.
@@ -501,7 +507,7 @@ private:
/// time.
MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg,
AllocationOrder &Order, MCRegister PhysReg,
- unsigned &CostPerUseLimit,
+ uint8_t &CostPerUseLimit,
SmallVectorImpl<Register> &NewVRegs);
void initializeCSRCost();
unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
@@ -541,19 +547,50 @@ private:
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
- /// Compute and report the number of spills and reloads for a loop.
- void reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
- unsigned &FoldedReloads, unsigned &Spills,
- unsigned &FoldedSpills);
-
- /// Report the number of spills and reloads for each loop.
- void reportNumberOfSplillsReloads() {
- for (MachineLoop *L : *Loops) {
- unsigned Reloads, FoldedReloads, Spills, FoldedSpills;
- reportNumberOfSplillsReloads(L, Reloads, FoldedReloads, Spills,
- FoldedSpills);
+ /// Greedy RA statistic to remark.
+ struct RAGreedyStats {
+ unsigned Reloads = 0;
+ unsigned FoldedReloads = 0;
+ unsigned ZeroCostFoldedReloads = 0;
+ unsigned Spills = 0;
+ unsigned FoldedSpills = 0;
+ unsigned Copies = 0;
+ float ReloadsCost = 0.0f;
+ float FoldedReloadsCost = 0.0f;
+ float SpillsCost = 0.0f;
+ float FoldedSpillsCost = 0.0f;
+ float CopiesCost = 0.0f;
+
+ bool isEmpty() {
+ return !(Reloads || FoldedReloads || Spills || FoldedSpills ||
+ ZeroCostFoldedReloads || Copies);
}
- }
+
+ void add(RAGreedyStats other) {
+ Reloads += other.Reloads;
+ FoldedReloads += other.FoldedReloads;
+ ZeroCostFoldedReloads += other.ZeroCostFoldedReloads;
+ Spills += other.Spills;
+ FoldedSpills += other.FoldedSpills;
+ Copies += other.Copies;
+ ReloadsCost += other.ReloadsCost;
+ FoldedReloadsCost += other.FoldedReloadsCost;
+ SpillsCost += other.SpillsCost;
+ FoldedSpillsCost += other.FoldedSpillsCost;
+ CopiesCost += other.CopiesCost;
+ }
+
+ void report(MachineOptimizationRemarkMissed &R);
+ };
+
+ /// Compute statistic for a basic block.
+ RAGreedyStats computeStats(MachineBasicBlock &MBB);
+
+ /// Compute and report statistic through a remark.
+ RAGreedyStats reportStats(MachineLoop *L);
+
+ /// Report the statistic for each loop.
+ void reportStats();
};
} // end anonymous namespace
@@ -599,7 +636,22 @@ FunctionPass* llvm::createGreedyRegisterAllocator() {
return new RAGreedy();
}
-RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+namespace llvm {
+FunctionPass* createGreedyRegisterAllocator(
+ std::function<bool(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass &RC)> Ftor);
+
+}
+
+FunctionPass* llvm::createGreedyRegisterAllocator(
+ std::function<bool(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass &RC)> Ftor) {
+ return new RAGreedy(Ftor);
+}
+
+RAGreedy::RAGreedy(RegClassFilterFunc F):
+ MachineFunctionPass(ID),
+ RegAllocBase(F) {
}
void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -656,7 +708,7 @@ void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
// Register is assigned, put it back on the queue for reassignment.
LiveInterval &LI = LIS->getInterval(VirtReg);
Matrix->unassign(LI);
- enqueue(&LI);
+ RegAllocBase::enqueue(&LI);
}
void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
@@ -679,7 +731,7 @@ void RAGreedy::releaseMemory() {
GlobalCand.clear();
}
-void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); }
+void RAGreedy::enqueueImpl(LiveInterval *LI) { enqueue(Queue, LI); }
void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Prioritize live ranges by size, assigning larger ranges first.
@@ -708,6 +760,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Giant live ranges fall back to the global assignment heuristic, which
// prevents excessive spilling in pathological cases.
bool ReverseLocal = TRI->reverseLocalAssignment();
+ bool AddPriorityToGlobal = TRI->addAllocPriorityToGlobalRanges();
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
bool ForceGlobal = !ReverseLocal &&
(Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
@@ -731,6 +784,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// don't fit should be spilled (or split) ASAP so they don't create
// interference. Mark a bit to prioritize global above local ranges.
Prio = (1u << 29) + Size;
+
+ if (AddPriorityToGlobal)
+ Prio |= RC.AllocationPriority << 24;
}
// Mark a higher bit to prioritize global and local above RS_Split.
Prio |= (1u << 31);
@@ -759,11 +815,11 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
//===----------------------------------------------------------------------===//
/// tryAssign - Try to assign VirtReg to an available register.
-Register RAGreedy::tryAssign(LiveInterval &VirtReg,
+MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
const SmallVirtRegSet &FixedRegisters) {
- Register PhysReg;
+ MCRegister PhysReg;
for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
assert(*I);
if (!Matrix->checkInterference(VirtReg, *I)) {
@@ -797,7 +853,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg,
}
// Try to evict interference from a cheaper alternative.
- unsigned Cost = TRI->getCostPerUse(PhysReg);
+ uint8_t Cost = RegCosts[PhysReg];
// Most registers have 0 additional cost.
if (!Cost)
@@ -805,7 +861,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg,
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
<< Cost << '\n');
- Register CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
+ MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
return CheapReg ? CheapReg : PhysReg;
}
@@ -813,7 +869,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg,
// Interference eviction
//===----------------------------------------------------------------------===//
-Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) {
+Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) const {
auto Order =
AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
MCRegister PhysReg;
@@ -853,7 +909,7 @@ Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) {
/// @param B The live range to be evicted.
/// @param BreaksHint True when B is already assigned to its preferred register.
bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
- LiveInterval &B, bool BreaksHint) {
+ LiveInterval &B, bool BreaksHint) const {
bool CanSplit = getStage(B) < RS_Spill;
// Be fairly aggressive about following hints as long as the evictee can be
@@ -877,9 +933,9 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// @param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
- bool IsHint, EvictionCost &MaxCost,
- const SmallVirtRegSet &FixedRegisters) {
+bool RAGreedy::canEvictInterference(
+ LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+ EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
// It is only possible to evict virtual register interference.
if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
return false;
@@ -975,14 +1031,15 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
/// \param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// \return True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
+bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
MCRegister PhysReg, SlotIndex Start,
SlotIndex End,
- EvictionCost &MaxCost) {
+ EvictionCost &MaxCost) const {
EvictionCost Cost;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ Q.collectInterferingVRegs();
// Check if any interfering live range is heavier than MaxWeight.
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
@@ -1027,9 +1084,9 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
/// \return The PhysReg which is the best candidate for eviction and the
/// eviction cost in BestEvictweight
MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
- LiveInterval &VirtReg,
+ const LiveInterval &VirtReg,
SlotIndex Start, SlotIndex End,
- float *BestEvictweight) {
+ float *BestEvictweight) const {
EvictionCost BestEvictCost;
BestEvictCost.setMax();
BestEvictCost.MaxWeight = VirtReg.weight();
@@ -1109,10 +1166,9 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
/// @param VirtReg Currently unassigned virtual register.
/// @param Order Physregs to try.
/// @return Physreg to assign VirtReg, or 0.
-unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
- AllocationOrder &Order,
+MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
- unsigned CostPerUseLimit,
+ uint8_t CostPerUseLimit,
const SmallVirtRegSet &FixedRegisters) {
NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
TimePassesIsEnabled);
@@ -1125,13 +1181,13 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
// When we are just looking for a reduced cost per use, don't break any
// hints, and only evict smaller spill weights.
- if (CostPerUseLimit < ~0u) {
+ if (CostPerUseLimit < uint8_t(~0u)) {
BestCost.BrokenHints = 0;
BestCost.MaxWeight = VirtReg.weight();
// Check of any registers in RC are below CostPerUseLimit.
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
- unsigned MinCost = RegClassInfo.getMinCost(RC);
+ uint8_t MinCost = RegClassInfo.getMinCost(RC);
if (MinCost >= CostPerUseLimit) {
LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
<< MinCost << ", no cheaper registers to be found.\n");
@@ -1140,7 +1196,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
// It is normal for register classes to have a long tail of registers with
// the same cost. We don't need to look at them if they're too expensive.
- if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {
+ if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
OrderLimit = RegClassInfo.getLastCostChange(RC);
LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
<< " regs.\n");
@@ -1151,7 +1207,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
++I) {
MCRegister PhysReg = *I;
assert(PhysReg);
- if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
+ if (RegCosts[PhysReg] >= CostPerUseLimit)
continue;
// The first use of a callee-saved register in a function has cost 1.
// Don't start using a CSR when the CostPerUseLimit is low.
@@ -1175,10 +1231,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
break;
}
- if (!BestPhys)
- return 0;
-
- evictInterference(VirtReg, BestPhys, NewVRegs);
+ if (BestPhys.isValid())
+ evictInterference(VirtReg, BestPhys, NewVRegs);
return BestPhys;
}
@@ -1289,8 +1343,9 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
// Abort if the spill cannot be inserted at the MBB' start
MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
- if (!MBB->empty() &&
- SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()),
+ auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
+ if (FirstNonDebugInstr != MBB->end() &&
+ SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
SA->getFirstSplitPoint(Number)))
return false;
// Interference for the live-in value.
@@ -1331,9 +1386,7 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
for (unsigned Bundle : NewBundles) {
// Look at all blocks connected to Bundle in the full graph.
ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
- for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
- I != E; ++I) {
- unsigned Block = *I;
+ for (unsigned Block : Blocks) {
if (!Todo.test(Block))
continue;
Todo.reset(Block);
@@ -1557,25 +1610,9 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
return false;
}
- // Check if the local interval will evict a cheaper interval.
- float CheapestEvictWeight = 0;
- MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight(
- Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(),
- Cand.Intf.last(), &CheapestEvictWeight);
-
- // Have we found an interval that can be evicted?
- if (FutureEvictedPhysReg) {
- float splitArtifactWeight =
- VRAI->futureWeight(LIS->getInterval(VirtRegToSplit),
- Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
- // Will the weight of the local interval be higher than the cheapest evictee
- // weight? If so it will evict it and will not cause a spill.
- if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight)
- return false;
- }
-
- // The local interval is not able to find non interferencing assignment and
- // not able to evict a less worthy interval, therfore, it can cause a spill.
+ // The local interval is not able to find non interferencing assignment
+ // and not able to evict a less worthy interval, therfore, it can cause a
+ // spill.
return true;
}
@@ -2650,18 +2687,16 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// with VirtReg on PhysReg (or one of its aliases).
// Enqueue them for recoloring and perform the actual recoloring.
PQueue RecoloringQueue;
- for (SmallLISet::iterator It = RecoloringCandidates.begin(),
- EndIt = RecoloringCandidates.end();
- It != EndIt; ++It) {
- Register ItVirtReg = (*It)->reg();
- enqueue(RecoloringQueue, *It);
+ for (LiveInterval *RC : RecoloringCandidates) {
+ Register ItVirtReg = RC->reg();
+ enqueue(RecoloringQueue, RC);
assert(VRM->hasPhys(ItVirtReg) &&
"Interferences are supposed to be with allocated variables");
// Record the current allocation.
VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
// unset the related struct.
- Matrix->unassign(**It);
+ Matrix->unassign(*RC);
}
// Do as if VirtReg was assigned to PhysReg so that the underlying
@@ -2695,22 +2730,18 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// don't add it to NewVRegs because its physical register will be restored
// below. Other vregs in CurrentNewVRegs are created by calling
// selectOrSplit and should be added into NewVRegs.
- for (SmallVectorImpl<Register>::iterator Next = CurrentNewVRegs.begin(),
- End = CurrentNewVRegs.end();
- Next != End; ++Next) {
- if (RecoloringCandidates.count(&LIS->getInterval(*Next)))
+ for (Register &R : CurrentNewVRegs) {
+ if (RecoloringCandidates.count(&LIS->getInterval(R)))
continue;
- NewVRegs.push_back(*Next);
+ NewVRegs.push_back(R);
}
- for (SmallLISet::iterator It = RecoloringCandidates.begin(),
- EndIt = RecoloringCandidates.end();
- It != EndIt; ++It) {
- Register ItVirtReg = (*It)->reg();
+ for (LiveInterval *RC : RecoloringCandidates) {
+ Register ItVirtReg = RC->reg();
if (VRM->hasPhys(ItVirtReg))
- Matrix->unassign(**It);
+ Matrix->unassign(*RC);
MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg];
- Matrix->assign(**It, ItPhysReg);
+ Matrix->assign(*RC, ItPhysReg);
}
}
@@ -2793,7 +2824,7 @@ MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg,
/// to use the CSR; otherwise return 0.
MCRegister
RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
- MCRegister PhysReg, unsigned &CostPerUseLimit,
+ MCRegister PhysReg, uint8_t &CostPerUseLimit,
SmallVectorImpl<Register> &NewVRegs) {
if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
// We choose spill over using the CSR for the first time if the spill cost
@@ -2924,7 +2955,12 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
if (Register::isPhysicalRegister(Reg))
continue;
- assert(VRM->hasPhys(Reg) && "We have unallocated variable!!");
+ // This may be a skipped class
+ if (!VRM->hasPhys(Reg)) {
+ assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
+ "We have an unallocated variable which should have been handled");
+ continue;
+ }
// Get the live interval mapped with this virtual register to be able
// to check for the interference with the new color.
@@ -3024,13 +3060,13 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
unsigned Depth) {
- unsigned CostPerUseLimit = ~0u;
+ uint8_t CostPerUseLimit = uint8_t(~0u);
// First try assigning a free register.
auto Order =
AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
if (MCRegister PhysReg =
tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
- // If VirtReg got an assignment, the eviction info is no longre relevant.
+ // If VirtReg got an assignment, the eviction info is no longer relevant.
LastEvicted.clearEvicteeInfo(VirtReg.reg());
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
@@ -3067,7 +3103,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
if (Hint && Hint != PhysReg)
SetOfBrokenHints.insert(&VirtReg);
// If VirtReg eviction someone, the eviction info for it as an evictee is
- // no longre relevant.
+ // no longer relevant.
LastEvicted.clearEvicteeInfo(VirtReg.reg());
return PhysReg;
}
@@ -3133,75 +3169,162 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return 0;
}
-void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
- unsigned &FoldedReloads,
- unsigned &Spills,
- unsigned &FoldedSpills) {
- Reloads = 0;
- FoldedReloads = 0;
- Spills = 0;
- FoldedSpills = 0;
-
- // Sum up the spill and reloads in subloops.
- for (MachineLoop *SubLoop : *L) {
- unsigned SubReloads;
- unsigned SubFoldedReloads;
- unsigned SubSpills;
- unsigned SubFoldedSpills;
-
- reportNumberOfSplillsReloads(SubLoop, SubReloads, SubFoldedReloads,
- SubSpills, SubFoldedSpills);
- Reloads += SubReloads;
- FoldedReloads += SubFoldedReloads;
- Spills += SubSpills;
- FoldedSpills += SubFoldedSpills;
+void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
+ using namespace ore;
+ if (Spills) {
+ R << NV("NumSpills", Spills) << " spills ";
+ R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
+ }
+ if (FoldedSpills) {
+ R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
+ R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
+ << " total folded spills cost ";
+ }
+ if (Reloads) {
+ R << NV("NumReloads", Reloads) << " reloads ";
+ R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
+ }
+ if (FoldedReloads) {
+ R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
+ R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
+ << " total folded reloads cost ";
+ }
+ if (ZeroCostFoldedReloads)
+ R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
+ << " zero cost folded reloads ";
+ if (Copies) {
+ R << NV("NumVRCopies", Copies) << " virtual registers copies ";
+ R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
}
+}
+RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
+ RAGreedyStats Stats;
const MachineFrameInfo &MFI = MF->getFrameInfo();
int FI;
+ auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
+ return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
+ A->getPseudoValue())->getFrameIndex());
+ };
+ auto isPatchpointInstr = [](const MachineInstr &MI) {
+ return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT;
+ };
+ for (MachineInstr &MI : MBB) {
+ if (MI.isCopy()) {
+ MachineOperand &Dest = MI.getOperand(0);
+ MachineOperand &Src = MI.getOperand(1);
+ if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() &&
+ Src.getReg().isVirtual())
+ ++Stats.Copies;
+ continue;
+ }
+
+ SmallVector<const MachineMemOperand *, 2> Accesses;
+ if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
+ ++Stats.Reloads;
+ continue;
+ }
+ if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
+ ++Stats.Spills;
+ continue;
+ }
+ if (TII->hasLoadFromStackSlot(MI, Accesses) &&
+ llvm::any_of(Accesses, isSpillSlotAccess)) {
+ if (!isPatchpointInstr(MI)) {
+ Stats.FoldedReloads += Accesses.size();
+ continue;
+ }
+ // For statepoint there may be folded and zero cost folded stack reloads.
+ std::pair<unsigned, unsigned> NonZeroCostRange =
+ TII->getPatchpointUnfoldableRange(MI);
+ SmallSet<unsigned, 16> FoldedReloads;
+ SmallSet<unsigned, 16> ZeroCostFoldedReloads;
+ for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
+ MachineOperand &MO = MI.getOperand(Idx);
+ if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
+ continue;
+ if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
+ FoldedReloads.insert(MO.getIndex());
+ else
+ ZeroCostFoldedReloads.insert(MO.getIndex());
+ }
+ // If stack slot is used in folded reload it is not zero cost then.
+ for (unsigned Slot : FoldedReloads)
+ ZeroCostFoldedReloads.erase(Slot);
+ Stats.FoldedReloads += FoldedReloads.size();
+ Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
+ continue;
+ }
+ Accesses.clear();
+ if (TII->hasStoreToStackSlot(MI, Accesses) &&
+ llvm::any_of(Accesses, isSpillSlotAccess)) {
+ Stats.FoldedSpills += Accesses.size();
+ }
+ }
+ // Set cost of collected statistic by multiplication to relative frequency of
+ // this basic block.
+ float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
+ Stats.ReloadsCost = RelFreq * Stats.Reloads;
+ Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
+ Stats.SpillsCost = RelFreq * Stats.Spills;
+ Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
+ Stats.CopiesCost = RelFreq * Stats.Copies;
+ return Stats;
+}
+
+RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
+ RAGreedyStats Stats;
+
+ // Sum up the spill and reloads in subloops.
+ for (MachineLoop *SubLoop : *L)
+ Stats.add(reportStats(SubLoop));
+
for (MachineBasicBlock *MBB : L->getBlocks())
// Handle blocks that were not included in subloops.
if (Loops->getLoopFor(MBB) == L)
- for (MachineInstr &MI : *MBB) {
- SmallVector<const MachineMemOperand *, 2> Accesses;
- auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
- return MFI.isSpillSlotObjectIndex(
- cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
- ->getFrameIndex());
- };
-
- if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI))
- ++Reloads;
- else if (TII->hasLoadFromStackSlot(MI, Accesses) &&
- llvm::any_of(Accesses, isSpillSlotAccess))
- ++FoldedReloads;
- else if (TII->isStoreToStackSlot(MI, FI) &&
- MFI.isSpillSlotObjectIndex(FI))
- ++Spills;
- else if (TII->hasStoreToStackSlot(MI, Accesses) &&
- llvm::any_of(Accesses, isSpillSlotAccess))
- ++FoldedSpills;
- }
+ Stats.add(computeStats(*MBB));
- if (Reloads || FoldedReloads || Spills || FoldedSpills) {
+ if (!Stats.isEmpty()) {
using namespace ore;
ORE->emit([&]() {
- MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload",
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
L->getStartLoc(), L->getHeader());
- if (Spills)
- R << NV("NumSpills", Spills) << " spills ";
- if (FoldedSpills)
- R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
- if (Reloads)
- R << NV("NumReloads", Reloads) << " reloads ";
- if (FoldedReloads)
- R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
+ Stats.report(R);
R << "generated in loop";
return R;
});
}
+ return Stats;
+}
+
+void RAGreedy::reportStats() {
+ if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
+ return;
+ RAGreedyStats Stats;
+ for (MachineLoop *L : *Loops)
+ Stats.add(reportStats(L));
+ // Process non-loop blocks.
+ for (MachineBasicBlock &MBB : *MF)
+ if (!Loops->getLoopFor(&MBB))
+ Stats.add(computeStats(MBB));
+ if (!Stats.isEmpty()) {
+ using namespace ore;
+
+ ORE->emit([&]() {
+ DebugLoc Loc;
+ if (auto *SP = MF->getFunction().getSubprogram())
+ Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
+ &MF->front());
+ Stats.report(R);
+ R << "generated in function";
+ return R;
+ });
+ }
}
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
@@ -3232,7 +3355,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
DomTree = &getAnalysis<MachineDominatorTree>();
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
- SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
Loops = &getAnalysis<MachineLoopInfo>();
Bundles = &getAnalysis<EdgeBundles>();
SpillPlacer = &getAnalysis<SpillPlacement>();
@@ -3241,14 +3363,17 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
initializeCSRCost();
+ RegCosts = TRI->getRegisterCosts(*MF);
+
VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
VRAI->calculateSpillWeightsAndHints();
LLVM_DEBUG(LIS->dump());
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
- SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI));
+ SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
ExtraRegInfo.clear();
ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
@@ -3259,8 +3384,11 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
allocatePhysRegs();
tryHintsRecoloring();
+
+ if (VerifyEnabled)
+ MF->verify(this, "Before post optimization");
postOptimization();
- reportNumberOfSplillsReloads();
+ reportStats();
releaseMemory();
return true;
diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 7c5af1a0c56e..b22eb080791e 100644
--- a/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -703,9 +703,8 @@ void RegAllocPBQP::spillVReg(Register VReg,
// Copy any newly inserted live intervals into the list of regs to
// allocate.
- for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
- I != E; ++I) {
- const LiveInterval &LI = LIS.getInterval(*I);
+ for (const Register &R : LRE) {
+ const LiveInterval &LI = LIS.getInterval(R);
assert(!LI.empty() && "Empty spill range.");
LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " ");
VRegsToAlloc.insert(LI.reg());
@@ -759,10 +758,8 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
MachineRegisterInfo &MRI = MF.getRegInfo();
// First allocate registers for the empty intervals.
- for (RegSet::const_iterator
- I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end();
- I != E; ++I) {
- LiveInterval &LI = LIS.getInterval(*I);
+ for (const Register &R : EmptyIntervalVRegs) {
+ LiveInterval &LI = LIS.getInterval(R);
Register PReg = MRI.getSimpleHint(LI.reg());
@@ -803,7 +800,14 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), MBFI);
VRAI.calculateSpillWeightsAndHints();
- std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
+ // FIXME: we create DefaultVRAI here to match existing behavior pre-passing
+ // the VRAI through the spiller to the live range editor. However, it probably
+ // makes more sense to pass the PBQP VRAI. The existing behavior had
+ // LiveRangeEdit make its own VirtRegAuxInfo object.
+ VirtRegAuxInfo DefaultVRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(),
+ MBFI);
+ std::unique_ptr<Spiller> VRegSpiller(
+ createInlineSpiller(*this, MF, VRM, DefaultVRAI));
MF.getRegInfo().freezeReservedRegs(MF);
@@ -855,7 +859,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
".pbqpgraph";
std::error_code EC;
- raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_Text);
+ raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_TextWithCRLF);
LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
<< GraphFileName << "\"\n");
G.dump(OS);
diff --git a/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
index 0c3e8a89c920..800d952469a5 100644
--- a/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -30,8 +30,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include <map>
-#include <string>
using namespace llvm;
diff --git a/llvm/lib/CodeGen/RegisterClassInfo.cpp b/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 0488db3d09cb..797899fb5b86 100644
--- a/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -68,6 +68,8 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
}
CalleeSavedRegs = CSR;
+ RegCosts = TRI->getRegisterCosts(*MF);
+
// Different reserved registers?
const BitVector &RR = MF->getRegInfo().getReservedRegs();
if (Reserved.size() != RR.size() || RR != Reserved) {
@@ -100,8 +102,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
unsigned N = 0;
SmallVector<MCPhysReg, 16> CSRAlias;
- unsigned MinCost = 0xff;
- unsigned LastCost = ~0u;
+ uint8_t MinCost = uint8_t(~0u);
+ uint8_t LastCost = uint8_t(~0u);
unsigned LastCostChange = 0;
// FIXME: Once targets reserve registers instead of removing them from the
@@ -112,7 +114,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// Remove reserved registers from the allocation order.
if (Reserved.test(PhysReg))
continue;
- unsigned Cost = TRI->getCostPerUse(PhysReg);
+ uint8_t Cost = RegCosts[PhysReg];
MinCost = std::min(MinCost, Cost);
if (CalleeSavedAliases[PhysReg] &&
@@ -132,7 +134,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// CSR aliases go after the volatile registers, preserve the target's order.
for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
unsigned PhysReg = CSRAlias[i];
- unsigned Cost = TRI->getCostPerUse(PhysReg);
+ uint8_t Cost = RegCosts[PhysReg];
if (Cost != LastCost)
LastCostChange = N;
RCI.Order[N++] = PhysReg;
@@ -149,7 +151,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
RCI.ProperSubClass = true;
- RCI.MinCost = uint8_t(MinCost);
+ RCI.MinCost = MinCost;
RCI.LastCostChange = LastCostChange;
LLVM_DEBUG({
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 7fdc85a6e444..751f79e66b73 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -133,6 +133,20 @@ namespace {
AliasAnalysis *AA = nullptr;
RegisterClassInfo RegClassInfo;
+ /// Position and VReg of a PHI instruction during coalescing.
+ struct PHIValPos {
+ SlotIndex SI; ///< Slot where this PHI occurs.
+ Register Reg; ///< VReg the PHI occurs in.
+ unsigned SubReg; ///< Qualifying subregister for Reg.
+ };
+
+ /// Map from debug instruction number to PHI position during coalescing.
+ DenseMap<unsigned, PHIValPos> PHIValToPos;
+ /// Index of, for each VReg, which debug instruction numbers and
+ /// corresponding PHIs are sensitive to coalescing. Each VReg may have
+ /// multiple PHI defs, at different positions.
+ DenseMap<Register, SmallVector<unsigned, 2>> RegToPHIIdx;
+
/// Debug variable location tracking -- for each VReg, maintain an
/// ordered-by-slot-index set of DBG_VALUEs, to help quick
/// identification of whether coalescing may change location validity.
@@ -187,6 +201,11 @@ namespace {
/// Recursively eliminate dead defs in DeadDefs.
void eliminateDeadDefs();
+ /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+ /// OrigIdx are also available with the same value at UseIdx.
+ bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+ SlotIndex UseIdx);
+
/// LiveRangeEdit callback for eliminateDeadDefs().
void LRE_WillEraseInstruction(MachineInstr *MI) override;
@@ -590,6 +609,14 @@ void RegisterCoalescer::eliminateDeadDefs() {
nullptr, this).eliminateDeadDefs(DeadDefs);
}
+bool RegisterCoalescer::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx) {
+ SmallVector<Register, 8> NewRegs;
+ return LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this)
+ .allUsesAvailableAt(OrigMI, OrigIdx, UseIdx);
+}
+
void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
// MI may be in WorkList. Make sure we don't visit it.
ErasedInstrs.insert(MI);
@@ -914,7 +941,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
if (UseMO.isUndef())
continue;
MachineInstr *UseMI = UseMO.getParent();
- if (UseMI->isDebugValue()) {
+ if (UseMI->isDebugInstr()) {
// FIXME These don't have an instruction index. Not clear we have enough
// info to decide whether to do this replacement or not. For now do it.
UseMO.setReg(NewReg);
@@ -1329,6 +1356,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
}
}
+ if (!allUsesAvailableAt(DefMI, ValNo->def, CopyIdx))
+ return false;
+
DebugLoc DL = CopyMI->getDebugLoc();
MachineBasicBlock *MBB = CopyMI->getParent();
MachineBasicBlock::iterator MII =
@@ -1543,9 +1573,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
// to describe DstReg instead.
if (MRI->use_nodbg_empty(SrcReg)) {
- for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
+ UI != MRI->use_end();) {
+ MachineOperand &UseMO = *UI++;
MachineInstr *UseMI = UseMO.getParent();
- if (UseMI->isDebugValue()) {
+ if (UseMI->isDebugInstr()) {
if (Register::isPhysicalRegister(DstReg))
UseMO.substPhysReg(DstReg, *TRI);
else
@@ -1726,7 +1758,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (SubReg == 0 || MO.isUndef())
continue;
MachineInstr &MI = *MO.getParent();
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
addUndefFlag(*DstInt, UseIdx, MO, SubReg);
@@ -1753,7 +1785,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// If SrcReg wasn't read, it may still be the case that DstReg is live-in
// because SrcReg is a sub-register.
- if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue())
+ if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
// Replace SrcReg with DstReg in all UseMI operands.
@@ -1768,24 +1800,27 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
- if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
- if (!DstInt->hasSubRanges()) {
- BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
- LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
- LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
- DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
- // The unused lanes are just empty live-ranges at this point.
- // It is the caller responsibility to set the proper
- // dead segments if there is an actual dead def of the
- // unused lanes. This may happen with rematerialization.
- DstInt->createSubRange(Allocator, UnusedLanes);
+ if (MO.isUse() && !DstIsPhys) {
+ unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
+ if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ if (!DstInt->hasSubRanges()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
+ DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
+ // The unused lanes are just empty live-ranges at this point.
+ // It is the caller responsibility to set the proper
+ // dead segments if there is an actual dead def of the
+ // unused lanes. This may happen with rematerialization.
+ DstInt->createSubRange(Allocator, UnusedLanes);
+ }
+ SlotIndex MIIdx = UseMI->isDebugInstr()
+ ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
+ : LIS->getInstructionIndex(*UseMI);
+ SlotIndex UseIdx = MIIdx.getRegSlot(true);
+ addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx);
}
- SlotIndex MIIdx = UseMI->isDebugValue()
- ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
- : LIS->getInstructionIndex(*UseMI);
- SlotIndex UseIdx = MIIdx.getRegSlot(true);
- addUndefFlag(*DstInt, UseIdx, MO, SubIdx);
}
if (DstIsPhys)
@@ -1796,7 +1831,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
LLVM_DEBUG({
dbgs() << "\t\tupdated: ";
- if (!UseMI->isDebugValue())
+ if (!UseMI->isDebugInstr())
dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
dbgs() << *UseMI;
});
@@ -2837,9 +2872,39 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
return CR_Impossible;
- // We need to verify that no instructions are reading the clobbered lanes. To
- // save compile time, we'll only check that locally. Don't allow the tainted
- // value to escape the basic block.
+ if (TrackSubRegLiveness) {
+ auto &OtherLI = LIS->getInterval(Other.Reg);
+ // If OtherVNI does not have subranges, it means all the lanes of OtherVNI
+ // share the same live range, so we just need to check whether they have
+ // any conflict bit in their LaneMask.
+ if (!OtherLI.hasSubRanges()) {
+ LaneBitmask OtherMask = TRI->getSubRegIndexLaneMask(Other.SubIdx);
+ return (OtherMask & V.WriteLanes).none() ? CR_Replace : CR_Impossible;
+ }
+
+ // If we are clobbering some active lanes of OtherVNI at VNI->def, it is
+ // impossible to resolve the conflict. Otherwise, we can just replace
+ // OtherVNI because of no real conflict.
+ for (LiveInterval::SubRange &OtherSR : OtherLI.subranges()) {
+ LaneBitmask OtherMask =
+ TRI->composeSubRegIndexLaneMask(Other.SubIdx, OtherSR.LaneMask);
+ if ((OtherMask & V.WriteLanes).none())
+ continue;
+
+ auto OtherSRQ = OtherSR.Query(VNI->def);
+ if (OtherSRQ.valueIn() && OtherSRQ.endPoint() > VNI->def) {
+ // VNI is clobbering some lanes of OtherVNI, they have real conflict.
+ return CR_Impossible;
+ }
+ }
+
+ // VNI is NOT clobbering any lane of OtherVNI, just replace OtherVNI.
+ return CR_Replace;
+ }
+
+ // We need to verify that no instructions are reading the clobbered lanes.
+ // To save compile time, we'll only check that locally. Don't allow the
+ // tainted value to escape the basic block.
MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
return CR_Impossible;
@@ -2959,7 +3024,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
LaneBitmask Lanes) const {
- if (MI.isDebugInstr())
+ if (MI.isDebugOrPseudoInstr())
return false;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
@@ -3006,8 +3071,10 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
MachineBasicBlock::iterator MI = MBB->begin();
if (!VNI->isPHIDef()) {
MI = Indexes->getInstructionFromIndex(VNI->def);
- // No need to check the instruction defining VNI for reads.
- ++MI;
+ if (!VNI->def.isEarlyClobber()) {
+ // No need to check the instruction defining VNI for reads.
+ ++MI;
+ }
}
assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
"Interference ends on VNI->def. Should have been handled earlier");
@@ -3114,6 +3181,13 @@ void JoinVals::pruneValues(JoinVals &Other,
}
}
+// Check if the segment consists of a copied live-through value (i.e. the copy
+// in the block only extended the liveness, of an undef value which we may need
+// to handle).
+static bool isLiveThrough(const LiveQueryResult Q) {
+ return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut();
+}
+
/// Consider the following situation when coalescing the copy between
/// %31 and %45 at 800. (The vertical lines represent live range segments.)
///
@@ -3196,11 +3270,21 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// with V.OtherVNI.
LIS->extendToIndices(S, EndPoints);
}
+
+ // We may need to eliminate the subrange if the copy introduced a live
+ // out undef value.
+ if (ValueOut->isPHIDef())
+ ShrinkMask |= S.LaneMask;
continue;
}
+
// If a subrange ends at the copy, then a value was copied but only
// partially used later. Shrink the subregister range appropriately.
- if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
+ //
+ // Ultimately this calls shrinkToUses, so assuming ShrinkMask is
+ // conservatively correct.
+ if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) ||
+ (V.Resolution == CR_Erase && isLiveThrough(Q))) {
LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
<< PrintLaneMask(S.LaneMask) << " at " << Def
<< "\n");
@@ -3526,6 +3610,64 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// Scan and mark undef any DBG_VALUEs that would refer to a different value.
checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
+ // If the RHS covers any PHI locations that were tracked for debug-info, we
+ // must update tracking information to reflect the join.
+ auto RegIt = RegToPHIIdx.find(CP.getSrcReg());
+ if (RegIt != RegToPHIIdx.end()) {
+ // Iterate over all the debug instruction numbers assigned this register.
+ for (unsigned InstID : RegIt->second) {
+ auto PHIIt = PHIValToPos.find(InstID);
+ assert(PHIIt != PHIValToPos.end());
+ const SlotIndex &SI = PHIIt->second.SI;
+
+ // Does the RHS cover the position of this PHI?
+ auto LII = RHS.find(SI);
+ if (LII == RHS.end() || LII->start > SI)
+ continue;
+
+ // Accept two kinds of subregister movement:
+ // * When we merge from one register class into a larger register:
+ // %1:gr16 = some-inst
+ // ->
+ // %2:gr32.sub_16bit = some-inst
+ // * When the PHI is already in a subregister, and the larger class
+ // is coalesced:
+ // %2:gr32.sub_16bit = some-inst
+ // %3:gr32 = COPY %2
+ // ->
+ // %3:gr32.sub_16bit = some-inst
+ // Test for subregister move:
+ if (CP.getSrcIdx() != 0 || CP.getDstIdx() != 0)
+ // If we're moving between different subregisters, ignore this join.
+ // The PHI will not get a location, dropping variable locations.
+ if (PHIIt->second.SubReg && PHIIt->second.SubReg != CP.getSrcIdx())
+ continue;
+
+ // Update our tracking of where the PHI is.
+ PHIIt->second.Reg = CP.getDstReg();
+
+ // If we merge into a sub-register of a larger class (test above),
+ // update SubReg.
+ if (CP.getSrcIdx() != 0)
+ PHIIt->second.SubReg = CP.getSrcIdx();
+ }
+
+ // Rebuild the register index in RegToPHIIdx to account for PHIs tracking
+ // different VRegs now. Copy old collection of debug instruction numbers and
+ // erase the old one:
+ auto InstrNums = RegIt->second;
+ RegToPHIIdx.erase(RegIt);
+
+ // There might already be PHIs being tracked in the destination VReg. Insert
+ // into an existing tracking collection, or insert a new one.
+ RegIt = RegToPHIIdx.find(CP.getDstReg());
+ if (RegIt != RegToPHIIdx.end())
+ RegIt->second.insert(RegIt->second.end(), InstrNums.begin(),
+ InstrNums.end());
+ else
+ RegToPHIIdx.insert({CP.getDstReg(), InstrNums});
+ }
+
// Join RHS into LHS.
LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
@@ -3565,8 +3707,12 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
// After collecting a block of DBG_VALUEs into ToInsert, enter them into the
// vreg => DbgValueLoc map.
auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
- for (auto *X : ToInsert)
- DbgVRegToValues[X->getDebugOperand(0).getReg()].push_back({Slot, X});
+ for (auto *X : ToInsert) {
+ for (auto Op : X->debug_operands()) {
+ if (Op.isReg() && Op.getReg().isVirtual())
+ DbgVRegToValues[Op.getReg()].push_back({Slot, X});
+ }
+ }
ToInsert.clear();
};
@@ -3578,10 +3724,12 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
for (auto &MI : MBB) {
- if (MI.isDebugValue() && MI.getDebugOperand(0).isReg() &&
- MI.getDebugOperand(0).getReg().isVirtual()) {
- ToInsert.push_back(&MI);
- } else if (!MI.isDebugInstr()) {
+ if (MI.isDebugValue()) {
+ if (any_of(MI.debug_operands(), [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isVirtual();
+ }))
+ ToInsert.push_back(&MI);
+ } else if (!MI.isDebugOrPseudoInstr()) {
CurrentSlot = Slots.getInstructionIndex(MI);
CloseNewDVRange(CurrentSlot);
}
@@ -3677,12 +3825,14 @@ void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
if (DbgValueSetIt->first < SegmentIt->end) {
// "Other" is live and there is a DBG_VALUE of Reg: test if we should
// set it undef.
- if (DbgValueSetIt->first >= SegmentIt->start &&
- DbgValueSetIt->second->getDebugOperand(0).getReg() != 0 &&
- ShouldUndef(DbgValueSetIt->first)) {
- // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
- DbgValueSetIt->second->setDebugValueUndef();
- continue;
+ if (DbgValueSetIt->first >= SegmentIt->start) {
+ bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg);
+ bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first);
+ if (HasReg && ShouldUndefReg) {
+ // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
+ DbgValueSetIt->second->setDebugValueUndef();
+ continue;
+ }
}
++DbgValueSetIt;
} else {
@@ -3857,21 +4007,20 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
// are not inherently easier to resolve, but slightly preferable until we
// have local live range splitting. In particular this is required by
// cmp+jmp macro fusion.
- for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
- MII != E; ++MII) {
- if (!MII->isCopyLike())
+ for (MachineInstr &MI : *MBB) {
+ if (!MI.isCopyLike())
continue;
- bool ApplyTerminalRule = applyTerminalRule(*MII);
- if (isLocalCopy(&(*MII), LIS)) {
+ bool ApplyTerminalRule = applyTerminalRule(MI);
+ if (isLocalCopy(&MI, LIS)) {
if (ApplyTerminalRule)
- LocalTerminals.push_back(&(*MII));
+ LocalTerminals.push_back(&MI);
else
- LocalWorkList.push_back(&(*MII));
+ LocalWorkList.push_back(&MI);
} else {
if (ApplyTerminalRule)
- GlobalTerminals.push_back(&(*MII));
+ GlobalTerminals.push_back(&MI);
else
- WorkList.push_back(&(*MII));
+ WorkList.push_back(&MI);
}
}
// Append the copies evicted by the terminal rule at the end of the list.
@@ -3915,10 +4064,9 @@ void RegisterCoalescer::joinAllIntervals() {
std::vector<MBBPriorityInfo> MBBs;
MBBs.reserve(MF->size());
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
- MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
- JoinSplitEdges && isSplitEdge(MBB)));
+ for (MachineBasicBlock &MBB : *MF) {
+ MBBs.push_back(MBBPriorityInfo(&MBB, Loops->getLoopDepth(&MBB),
+ JoinSplitEdges && isSplitEdge(&MBB)));
}
array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
@@ -3981,6 +4129,19 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
else
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
+ // If there are PHIs tracked by debug-info, they will need updating during
+ // coalescing. Build an index of those PHIs to ease updating.
+ SlotIndexes *Slots = LIS->getSlotIndexes();
+ for (const auto &DebugPHI : MF->DebugPHIPositions) {
+ MachineBasicBlock *MBB = DebugPHI.second.MBB;
+ Register Reg = DebugPHI.second.Reg;
+ unsigned SubReg = DebugPHI.second.SubReg;
+ SlotIndex SI = Slots->getMBBStartIdx(MBB);
+ PHIValPos P = {SI, Reg, SubReg};
+ PHIValToPos.insert(std::make_pair(DebugPHI.first, P));
+ RegToPHIIdx[Reg].push_back(DebugPHI.first);
+ }
+
// The MachineScheduler does not currently require JoinSplitEdges. This will
// either be enabled unconditionally or replaced by a more general live range
// splitting optimization.
@@ -4036,6 +4197,18 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
}
}
+ // After coalescing, update any PHIs that are being tracked by debug-info
+ // with their new VReg locations.
+ for (auto &p : MF->DebugPHIPositions) {
+ auto it = PHIValToPos.find(p.first);
+ assert(it != PHIValToPos.end());
+ p.second.Reg = it->second.Reg;
+ p.second.SubReg = it->second.SubReg;
+ }
+
+ PHIValToPos.clear();
+ RegToPHIIdx.clear();
+
LLVM_DEBUG(dump());
if (VerifyCoalescing)
MF->verify(this, "After register coalescing");
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index 8f1fc103e869..62a459fca611 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -764,7 +764,7 @@ void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
/// instruction independent of liveness.
void RegPressureTracker::recede(const RegisterOperands &RegOpers,
SmallVectorImpl<RegisterMaskPair> *LiveUses) {
- assert(!CurrPos->isDebugInstr());
+ assert(!CurrPos->isDebugOrPseudoInstr());
// Boost pressure for all dead defs together.
bumpDeadDefs(RegOpers.DeadDefs);
@@ -863,7 +863,7 @@ void RegPressureTracker::recedeSkipDebugValues() {
CurrPos = prev_nodbg(CurrPos, MBB->begin());
SlotIndex SlotIdx;
- if (RequireIntervals && !CurrPos->isDebugInstr())
+ if (RequireIntervals && !CurrPos->isDebugOrPseudoInstr())
SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
// Open the top of the region using slot indexes.
@@ -873,9 +873,9 @@ void RegPressureTracker::recedeSkipDebugValues() {
void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) {
recedeSkipDebugValues();
- if (CurrPos->isDebugValue()) {
- // It's possible to only have debug_value instructions and hit the start of
- // the block.
+ if (CurrPos->isDebugInstr() || CurrPos->isPseudoProbe()) {
+ // It's possible to only have debug_value and pseudo probe instructions and
+ // hit the start of the block.
assert(CurrPos == MBB->begin());
return;
}
@@ -1041,7 +1041,7 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
/// This is intended for speculative queries. It leaves pressure inconsistent
/// with the current position, so must be restored by the caller.
void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
- assert(!MI->isDebugInstr() && "Expect a nondebug instruction.");
+ assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
SlotIndex SlotIdx;
if (RequireIntervals)
@@ -1282,7 +1282,7 @@ LaneBitmask RegPressureTracker::getLiveThroughAt(Register RegUnit,
/// This is intended for speculative queries. It leaves pressure inconsistent
/// with the current position, so must be restored by the caller.
void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
- assert(!MI->isDebugInstr() && "Expect a nondebug instruction.");
+ assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
SlotIndex SlotIdx;
if (RequireIntervals)
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index a833895c115d..e35cf7aa6958 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -119,7 +119,7 @@ void RegScavenger::determineKillsAndDefs() {
DefRegUnits.reset();
for (const MachineOperand &MO : MI.operands()) {
if (MO.isRegMask()) {
- TmpRegUnits.clear();
+ TmpRegUnits.reset();
for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) {
for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
if (MO.clobbersPhysReg(*RURI)) {
@@ -167,16 +167,15 @@ void RegScavenger::forward() {
MachineInstr &MI = *MBBI;
- for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
- IE = Scavenged.end(); I != IE; ++I) {
- if (I->Restore != &MI)
+ for (ScavengedInfo &I : Scavenged) {
+ if (I.Restore != &MI)
continue;
- I->Reg = 0;
- I->Restore = nullptr;
+ I.Reg = 0;
+ I.Restore = nullptr;
}
- if (MI.isDebugInstr())
+ if (MI.isDebugOrPseudoInstr())
return;
determineKillsAndDefs();
@@ -299,7 +298,7 @@ Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
bool inVirtLiveRange = false;
for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
- if (MI->isDebugInstr()) {
+ if (MI->isDebugOrPseudoInstr()) {
++InstrLimit; // Don't count debug instructions
continue;
}
@@ -370,6 +369,10 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
LiveRegUnits Used(TRI);
+ assert(From->getParent() == To->getParent() &&
+ "Target instruction is in other than current basic block, use "
+ "enterBasicBlockEnd first");
+
for (MachineBasicBlock::iterator I = From;; --I) {
const MachineInstr &MI = *I;
@@ -424,6 +427,8 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI,
if (I == MBB.begin())
break;
}
+ assert(I != MBB.begin() && "Did not find target instruction while "
+ "iterating backwards");
}
return std::make_pair(Survivor, Pos);
diff --git a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
new file mode 100644
index 000000000000..de6129a912d3
--- /dev/null
+++ b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
@@ -0,0 +1,231 @@
+//===- RemoveRedundantDebugValues.cpp - Remove Redundant Debug Value MIs --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+/// \file RemoveRedundantDebugValues.cpp
+///
+/// The RemoveRedundantDebugValues pass removes redundant DBG_VALUEs that
+/// appear in MIR after the register allocator.
+
+#define DEBUG_TYPE "removeredundantdebugvalues"
+
+using namespace llvm;
+
+STATISTIC(NumRemovedBackward, "Number of DBG_VALUEs removed (backward scan)");
+STATISTIC(NumRemovedForward, "Number of DBG_VALUEs removed (forward scan)");
+
+namespace {
+
+class RemoveRedundantDebugValues : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RemoveRedundantDebugValues();
+
+ bool reduceDbgValues(MachineFunction &MF);
+
+ /// Remove redundant debug value MIs for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char RemoveRedundantDebugValues::ID = 0;
+
+char &llvm::RemoveRedundantDebugValuesID = RemoveRedundantDebugValues::ID;
+
+INITIALIZE_PASS(RemoveRedundantDebugValues, DEBUG_TYPE,
+ "Remove Redundant DEBUG_VALUE analysis", false, false)
+
+/// Default construct and initialize the pass.
+RemoveRedundantDebugValues::RemoveRedundantDebugValues()
+ : MachineFunctionPass(ID) {
+ initializeRemoveRedundantDebugValuesPass(*PassRegistry::getPassRegistry());
+}
+
+// This analysis aims to remove redundant DBG_VALUEs by going forward
+// in the basic block by considering the first DBG_VALUE as a valid
+// until its first (location) operand is not clobbered/modified.
+// For example:
+// (1) DBG_VALUE $edi, !"var1", ...
+// (2) <block of code that does affect $edi>
+// (3) DBG_VALUE $edi, !"var1", ...
+// ...
+// in this case, we can remove (3).
+// TODO: Support DBG_VALUE_LIST and other debug instructions.
+static bool reduceDbgValsForwardScan(MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "\n == Forward Scan == \n");
+
+ SmallVector<MachineInstr *, 8> DbgValsToBeRemoved;
+ DenseMap<DebugVariable, std::pair<MachineOperand *, const DIExpression *>>
+ VariableMap;
+ const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+
+ for (auto &MI : MBB) {
+ if (MI.isDebugValue()) {
+ DebugVariable Var(MI.getDebugVariable(), NoneType(),
+ MI.getDebugLoc()->getInlinedAt());
+ auto VMI = VariableMap.find(Var);
+ // Just stop tracking this variable, until we cover DBG_VALUE_LIST.
+ // 1 DBG_VALUE $rax, "x", DIExpression()
+ // ...
+ // 2 DBG_VALUE_LIST "x", DIExpression(...), $rax, $rbx
+ // ...
+ // 3 DBG_VALUE $rax, "x", DIExpression()
+ if (MI.isDebugValueList() && VMI != VariableMap.end()) {
+ VariableMap.erase(VMI);
+ continue;
+ }
+
+ MachineOperand &Loc = MI.getDebugOperand(0);
+ if (!Loc.isReg()) {
+ // If it it's not a register, just stop tracking such variable.
+ if (VMI != VariableMap.end())
+ VariableMap.erase(VMI);
+ continue;
+ }
+
+ // We have found a new value for a variable.
+ if (VMI == VariableMap.end() ||
+ VMI->second.first->getReg() != Loc.getReg() ||
+ VMI->second.second != MI.getDebugExpression()) {
+ VariableMap[Var] = {&Loc, MI.getDebugExpression()};
+ continue;
+ }
+
+ // Found an identical DBG_VALUE, so it can be considered
+ // for later removal.
+ DbgValsToBeRemoved.push_back(&MI);
+ }
+
+ if (MI.isMetaInstruction())
+ continue;
+
+ // Stop tracking any location that is clobbered by this instruction.
+ for (auto &Var : VariableMap) {
+ auto &LocOp = Var.second.first;
+ if (MI.modifiesRegister(LocOp->getReg(), TRI))
+ VariableMap.erase(Var.first);
+ }
+ }
+
+ for (auto &Instr : DbgValsToBeRemoved) {
+ LLVM_DEBUG(dbgs() << "removing "; Instr->dump());
+ Instr->eraseFromParent();
+ ++NumRemovedForward;
+ }
+
+ return !DbgValsToBeRemoved.empty();
+}
+
+// This analysis aims to remove redundant DBG_VALUEs by going backward
+// in the basic block and removing all but the last DBG_VALUE for any
+// given variable in a set of consecutive DBG_VALUE instructions.
+// For example:
+// (1) DBG_VALUE $edi, !"var1", ...
+// (2) DBG_VALUE $esi, !"var2", ...
+// (3) DBG_VALUE $edi, !"var1", ...
+// ...
+// in this case, we can remove (1).
+static bool reduceDbgValsBackwardScan(MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "\n == Backward Scan == \n");
+ SmallVector<MachineInstr *, 8> DbgValsToBeRemoved;
+ SmallDenseSet<DebugVariable> VariableSet;
+
+ for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
+ I != E; ++I) {
+ MachineInstr *MI = &*I;
+
+ if (MI->isDebugValue()) {
+ DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
+ MI->getDebugLoc()->getInlinedAt());
+ auto R = VariableSet.insert(Var);
+ // If it is a DBG_VALUE describing a constant as:
+ // DBG_VALUE 0, ...
+ // we just don't consider such instructions as candidates
+ // for redundant removal.
+ if (MI->isNonListDebugValue()) {
+ MachineOperand &Loc = MI->getDebugOperand(0);
+ if (!Loc.isReg()) {
+ // If we have already encountered this variable, just stop
+ // tracking it.
+ if (!R.second)
+ VariableSet.erase(Var);
+ continue;
+ }
+ }
+
+ // We have already encountered the value for this variable,
+ // so this one can be deleted.
+ if (!R.second)
+ DbgValsToBeRemoved.push_back(MI);
+ continue;
+ }
+
+ // If we encountered a non-DBG_VALUE, try to find the next
+ // sequence with consecutive DBG_VALUE instructions.
+ VariableSet.clear();
+ }
+
+ for (auto &Instr : DbgValsToBeRemoved) {
+ LLVM_DEBUG(dbgs() << "removing "; Instr->dump());
+ Instr->eraseFromParent();
+ ++NumRemovedBackward;
+ }
+
+ return !DbgValsToBeRemoved.empty();
+}
+
+bool RemoveRedundantDebugValues::reduceDbgValues(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "\nDebug Value Reduction\n");
+
+ bool Changed = false;
+
+ for (auto &MBB : MF) {
+ Changed |= reduceDbgValsBackwardScan(MBB);
+ Changed |= reduceDbgValsForwardScan(MBB);
+ }
+
+ return Changed;
+}
+
+bool RemoveRedundantDebugValues::runOnMachineFunction(MachineFunction &MF) {
+ // Skip functions without debugging information.
+ if (!MF.getFunction().getSubprogram())
+ return false;
+
+ // Skip functions from NoDebug compilation units.
+ if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return false;
+
+ bool Changed = reduceDbgValues(MF);
+ return Changed;
+}
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
new file mode 100644
index 000000000000..1619381967c4
--- /dev/null
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -0,0 +1,254 @@
+//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics
+// with vector operands) with matching calls to functions from a vector
+// library (e.g., libmvec, SVML) according to TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "replace-with-veclib"
+
+STATISTIC(NumCallsReplaced,
+ "Number of calls to intrinsics that have been replaced.");
+
+STATISTIC(NumTLIFuncDeclAdded,
+ "Number of vector library function declarations added.");
+
+STATISTIC(NumFuncUsedAdded,
+ "Number of functions added to `llvm.compiler.used`");
+
+static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
+ Module *M = CI.getModule();
+
+ Function *OldFunc = CI.getCalledFunction();
+
+ // Check if the vector library function is already declared in this module,
+ // otherwise insert it.
+ Function *TLIFunc = M->getFunction(TLIName);
+ if (!TLIFunc) {
+ TLIFunc = Function::Create(OldFunc->getFunctionType(),
+ Function::ExternalLinkage, TLIName, *M);
+ TLIFunc->copyAttributesFrom(OldFunc);
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
+ << TLIName << "` of type `" << *(TLIFunc->getType())
+ << "` to module.\n");
+
+ ++NumTLIFuncDeclAdded;
+
+ // Add the freshly created function to llvm.compiler.used,
+ // similar to as it is done in InjectTLIMappings
+ appendToCompilerUsed(*M, {TLIFunc});
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
+ << "` to `@llvm.compiler.used`.\n");
+ ++NumFuncUsedAdded;
+ }
+
+ // Replace the call to the vector intrinsic with a call
+ // to the corresponding function from the vector library.
+ IRBuilder<> IRBuilder(&CI);
+ SmallVector<Value *> Args(CI.arg_operands());
+ // Preserve the operand bundles.
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CI.getOperandBundlesAsDefs(OpBundles);
+ CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
+ assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() &&
+ "Expecting function types to be identical");
+ CI.replaceAllUsesWith(Replacement);
+ if (isa<FPMathOperator>(Replacement)) {
+ // Preserve fast math flags for FP math.
+ Replacement->copyFastMathFlags(&CI);
+ }
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
+ << OldFunc->getName() << "` with call to `" << TLIName
+ << "`.\n");
+ ++NumCallsReplaced;
+ return true;
+}
+
+static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
+ CallInst &CI) {
+ if (!CI.getCalledFunction()) {
+ return false;
+ }
+
+ auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
+ if (IntrinsicID == Intrinsic::not_intrinsic) {
+ // Replacement is only performed for intrinsic functions
+ return false;
+ }
+
+ // Convert vector arguments to scalar type and check that
+ // all vector operands have identical vector width.
+ ElementCount VF = ElementCount::getFixed(0);
+ SmallVector<Type *> ScalarTypes;
+ for (auto Arg : enumerate(CI.arg_operands())) {
+ auto *ArgType = Arg.value()->getType();
+ // Vector calls to intrinsics can still have
+ // scalar operands for specific arguments.
+ if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) {
+ ScalarTypes.push_back(ArgType);
+ } else {
+ // The argument in this place should be a vector if
+ // this is a call to a vector intrinsic.
+ auto *VectorArgTy = dyn_cast<VectorType>(ArgType);
+ if (!VectorArgTy) {
+ // The argument is not a vector, do not perform
+ // the replacement.
+ return false;
+ }
+ ElementCount NumElements = VectorArgTy->getElementCount();
+ if (NumElements.isScalable()) {
+ // The current implementation does not support
+ // scalable vectors.
+ return false;
+ }
+ if (VF.isNonZero() && VF != NumElements) {
+ // The different arguments differ in vector size.
+ return false;
+ } else {
+ VF = NumElements;
+ }
+ ScalarTypes.push_back(VectorArgTy->getElementType());
+ }
+ }
+
+ // Try to reconstruct the name for the scalar version of this
+ // intrinsic using the intrinsic ID and the argument types
+ // converted to scalar above.
+ std::string ScalarName;
+ if (Intrinsic::isOverloaded(IntrinsicID)) {
+ ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule());
+ } else {
+ ScalarName = Intrinsic::getName(IntrinsicID).str();
+ }
+
+ if (!TLI.isFunctionVectorizable(ScalarName)) {
+ // The TargetLibraryInfo does not contain a vectorized version of
+ // the scalar function.
+ return false;
+ }
+
+ // Try to find the mapping for the scalar version of this intrinsic
+ // and the exact vector width of the call operands in the
+ // TargetLibraryInfo.
+ const std::string TLIName =
+ std::string(TLI.getVectorizedFunction(ScalarName, VF));
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
+ << ScalarName << "` and vector width " << VF << ".\n");
+
+ if (!TLIName.empty()) {
+ // Found the correct mapping in the TargetLibraryInfo,
+ // replace the call to the intrinsic with a call to
+ // the vector library function.
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
+ << "`.\n");
+ return replaceWithTLIFunction(CI, TLIName);
+ }
+
+ return false;
+}
+
+static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
+ bool Changed = false;
+ SmallVector<CallInst *> ReplacedCalls;
+ for (auto &I : instructions(F)) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ if (replaceWithCallToVeclib(TLI, *CI)) {
+ ReplacedCalls.push_back(CI);
+ Changed = true;
+ }
+ }
+ }
+ // Erase the calls to the intrinsics that have been replaced
+ // with calls to the vector library.
+ for (auto *CI : ReplacedCalls) {
+ CI->eraseFromParent();
+ }
+ return Changed;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// New pass manager implementation.
+////////////////////////////////////////////////////////////////////////////////
+PreservedAnalyses ReplaceWithVeclib::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto Changed = runImpl(TLI, F);
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<TargetLibraryAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<LoopAccessAnalysis>();
+ PA.preserve<DemandedBitsAnalysis>();
+ PA.preserve<OptimizationRemarkEmitterAnalysis>();
+ return PA;
+ } else {
+ // The pass did not replace any calls, hence it preserves all analyses.
+ return PreservedAnalyses::all();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy PM Implementation.
+////////////////////////////////////////////////////////////////////////////////
+bool ReplaceWithVeclibLegacy::runOnFunction(Function &F) {
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ return runImpl(TLI, F);
+}
+
+void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<LoopAccessLegacyAnalysis>();
+ AU.addPreserved<DemandedBitsWrapperPass>();
+ AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy Pass manager initialization
+////////////////////////////////////////////////////////////////////////////////
+char ReplaceWithVeclibLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+ "Replace intrinsics with calls to vector library", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+ "Replace intrinsics with calls to vector library", false,
+ false)
+
+FunctionPass *llvm::createReplaceWithVeclibLegacyPass() {
+ return new ReplaceWithVeclibLegacy();
+}
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index 31797631c97b..94add920f284 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -130,6 +131,7 @@ class SafeStack {
Function &F;
const TargetLoweringBase &TL;
const DataLayout &DL;
+ DomTreeUpdater *DTU;
ScalarEvolution &SE;
Type *StackPtrTy;
@@ -207,8 +209,8 @@ class SafeStack {
public:
SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL,
- ScalarEvolution &SE)
- : F(F), TL(TL), DL(DL), SE(SE),
+ DomTreeUpdater *DTU, ScalarEvolution &SE)
+ : F(F), TL(TL), DL(DL), DTU(DTU), SE(SE),
StackPtrTy(Type::getInt8PtrTy(F.getContext())),
IntPtrTy(DL.getIntPtrType(F.getContext())),
Int32Ty(Type::getInt32Ty(F.getContext())),
@@ -371,9 +373,13 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
Value *StackGuardVar = TL.getIRStackGuard(IRB);
- if (!StackGuardVar)
- StackGuardVar =
- F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
+ Module *M = F.getParent();
+
+ if (!StackGuardVar) {
+ TL.insertSSPDeclarations(*M);
+ return IRB.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackguard));
+ }
+
return IRB.CreateLoad(StackPtrTy, StackGuardVar, "StackGuard");
}
@@ -419,8 +425,7 @@ void SafeStack::findInsts(Function &F,
for (Argument &Arg : F.args()) {
if (!Arg.hasByValAttr())
continue;
- uint64_t Size =
- DL.getTypeStoreSize(Arg.getType()->getPointerElementType());
+ uint64_t Size = DL.getTypeStoreSize(Arg.getParamByValType());
if (IsSafeStackAlloca(&Arg, Size))
continue;
@@ -477,8 +482,7 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI,
.createBranchWeights(SuccessProb.getNumerator(),
FailureProb.getNumerator());
Instruction *CheckTerm =
- SplitBlockAndInsertIfThen(Cmp, &RI,
- /* Unreachable */ true, Weights);
+ SplitBlockAndInsertIfThen(Cmp, &RI, /* Unreachable */ true, Weights, DTU);
IRBuilder<> IRBFail(CheckTerm);
// FIXME: respect -fsanitize-trap / -ftrap-function here?
FunctionCallee StackChkFail =
@@ -522,7 +526,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
}
for (Argument *Arg : ByValArguments) {
- Type *Ty = Arg->getType()->getPointerElementType();
+ Type *Ty = Arg->getParamByValType();
uint64_t Size = DL.getTypeStoreSize(Ty);
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
@@ -579,7 +583,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
for (Argument *Arg : ByValArguments) {
unsigned Offset = SSL.getObjectOffset(Arg);
MaybeAlign Align(SSL.getObjectAlignment(Arg));
- Type *Ty = Arg->getType()->getPointerElementType();
+ Type *Ty = Arg->getParamByValType();
uint64_t Size = DL.getTypeStoreSize(Ty);
if (Size == 0)
@@ -864,6 +868,7 @@ public:
AU.addRequired<TargetPassConfig>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
bool runOnFunction(Function &F) override {
@@ -893,15 +898,34 @@ public:
// Compute DT and LI only for functions that have the attribute.
// This is only useful because the legacy pass manager doesn't let us
// compute analyzes lazily.
- // In the backend pipeline, nothing preserves DT before SafeStack, so we
- // would otherwise always compute it wastefully, even if there is no
- // function with the safestack attribute.
- DominatorTree DT(F);
- LoopInfo LI(DT);
- ScalarEvolution SE(F, TLI, ACT, DT, LI);
+ DominatorTree *DT;
+ bool ShouldPreserveDominatorTree;
+ Optional<DominatorTree> LazilyComputedDomTree;
+
+ // Do we already have a DominatorTree avaliable from the previous pass?
+ // Note that we should *NOT* require it, to avoid the case where we end up
+ // not needing it, but the legacy PM would have computed it for us anyways.
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DT = &DTWP->getDomTree();
+ ShouldPreserveDominatorTree = true;
+ } else {
+ // Otherwise, we need to compute it.
+ LazilyComputedDomTree.emplace(F);
+ DT = LazilyComputedDomTree.getPointer();
+ ShouldPreserveDominatorTree = false;
+ }
+
+ // Likewise, lazily compute loop info.
+ LoopInfo LI(*DT);
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ ScalarEvolution SE(F, TLI, ACT, *DT, LI);
- return SafeStack(F, *TL, *DL, SE).run();
+ return SafeStack(F, *TL, *DL, ShouldPreserveDominatorTree ? &DTU : nullptr,
+ SE)
+ .run();
}
};
@@ -912,6 +936,7 @@ char SafeStackLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE,
"Safe Stack instrumentation pass", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(SafeStackLegacyPass, DEBUG_TYPE,
"Safe Stack instrumentation pass", false, false)
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 5899da777fe9..daff3af3bc3c 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -514,7 +514,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
/// TODO: Handle ExitSU "uses" properly.
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
- assert(!MI->isDebugInstr());
+ assert(!MI->isDebugOrPseudoInstr());
const MachineOperand &MO = MI->getOperand(OperIdx);
Register Reg = MO.getReg();
@@ -572,7 +572,7 @@ void ScheduleDAGInstrs::initSUnits() {
SUnits.reserve(NumRegionInstrs);
for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) {
- if (MI.isDebugInstr())
+ if (MI.isDebugOrPseudoInstr())
continue;
SUnit *SU = newSUnit(&MI);
@@ -807,11 +807,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
DbgMI = nullptr;
}
- if (MI.isDebugValue() || MI.isDebugRef()) {
+ if (MI.isDebugValue() || MI.isDebugPHI()) {
DbgMI = &MI;
continue;
}
- if (MI.isDebugLabel())
+
+ if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe())
continue;
SUnit *SU = MISUnitMap[&MI];
@@ -1117,7 +1118,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
// Examine block from end to start...
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
- if (MI.isDebugInstr())
+ if (MI.isDebugOrPseudoInstr())
continue;
// Update liveness. Registers that are defed but not used in this
@@ -1152,7 +1153,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
while (I->isBundledWithSucc())
++I;
do {
- if (!I->isDebugInstr())
+ if (!I->isDebugOrPseudoInstr())
toggleKills(MRI, LiveRegs, *I, true);
--I;
} while (I != Bundle);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 615bea2a4905..b104e995019f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -461,8 +461,7 @@ namespace {
SDValue visitAssertExt(SDNode *N);
SDValue visitAssertAlign(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
- SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
- SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
+ SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
SDValue visitBITCAST(SDNode *N);
SDValue visitFREEZE(SDNode *N);
@@ -547,8 +546,11 @@ namespace {
SDValue foldSignChangeInBitcast(SDNode *N);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
+ SDValue foldSelectOfBinops(SDNode *N);
+ SDValue foldSextSetcc(SDNode *N);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL);
+ SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
SDValue unfoldMaskedMerge(SDNode *N);
SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
@@ -1673,8 +1675,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::AssertZext: return visitAssertExt(N);
case ISD::AssertAlign: return visitAssertAlign(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
- case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
- case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
@@ -2259,9 +2261,9 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return FoldedVOp;
// fold (add x, 0) -> x, vector edition
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
return N1;
}
@@ -2337,6 +2339,23 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
return RADD;
+
+ // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
+ // equivalent to (add x, c).
+ auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
+ if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
+ DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
+ return DAG.getNode(ISD::ADD, DL, VT,
+ DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
+ N0.getOperand(1));
+ }
+ return SDValue();
+ };
+ if (SDValue Add = ReassociateAddOr(N0, N1))
+ return Add;
+ if (SDValue Add = ReassociateAddOr(N1, N0))
+ return Add;
}
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
@@ -2502,6 +2521,26 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
}
+ // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
+ if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ N1.getOpcode() == ISD::STEP_VECTOR) {
+ const APInt &C0 = N0->getConstantOperandAPInt(0);
+ const APInt &C1 = N1->getConstantOperandAPInt(0);
+ APInt NewStep = C0 + C1;
+ return DAG.getStepVector(DL, VT, NewStep);
+ }
+
+ // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
+ if ((N0.getOpcode() == ISD::ADD) &&
+ (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
+ (N1.getOpcode() == ISD::STEP_VECTOR)) {
+ const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
+ const APInt &SV1 = N1->getConstantOperandAPInt(0);
+ APInt NewStep = SV0 + SV1;
+ SDValue SV = DAG.getStepVector(DL, VT, NewStep);
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
+ }
+
return SDValue();
}
@@ -2517,9 +2556,9 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
// TODO SimplifyVBinOp
// fold (add_sat x, 0) -> x, vector edition
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
return N1;
}
@@ -3125,6 +3164,82 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
return SDValue();
}
+// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
+// clamp/truncation if necessary.
+static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ const SDLoc &DL) {
+ assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
+ "Illegal truncation");
+
+ if (DstVT == SrcVT)
+ return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
+
+ // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
+ // clamping RHS.
+ APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
+ DstVT.getScalarSizeInBits());
+ if (!DAG.MaskedValueIsZero(LHS, UpperBits))
+ return SDValue();
+
+ SDValue SatLimit =
+ DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
+ DstVT.getScalarSizeInBits()),
+ DL, SrcVT);
+ RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
+ RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
+ LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
+ return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
+}
+
+// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
+// usubsat(a,b), optionally as a truncated type.
+SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
+ if (N->getOpcode() != ISD::SUB ||
+ !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
+ return SDValue();
+
+ EVT SubVT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // Try to find umax(a,b) - b or a - umin(a,b) patterns
+ // they may be converted to usubsat(a,b).
+ if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
+ SDValue MaxLHS = Op0.getOperand(0);
+ SDValue MaxRHS = Op0.getOperand(1);
+ if (MaxLHS == Op1)
+ return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
+ if (MaxRHS == Op1)
+ return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
+ }
+
+ if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
+ SDValue MinLHS = Op1.getOperand(0);
+ SDValue MinRHS = Op1.getOperand(1);
+ if (MinLHS == Op0)
+ return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
+ if (MinRHS == Op0)
+ return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
+ }
+
+ // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
+ if (Op1.getOpcode() == ISD::TRUNCATE &&
+ Op1.getOperand(0).getOpcode() == ISD::UMIN &&
+ Op1.getOperand(0).hasOneUse()) {
+ SDValue MinLHS = Op1.getOperand(0).getOperand(0);
+ SDValue MinRHS = Op1.getOperand(0).getOperand(1);
+ if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
+ return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
+ DAG, SDLoc(N));
+ if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
+ return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
+ DAG, SDLoc(N));
+ }
+
+ return SDValue();
+}
+
// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
@@ -3148,7 +3263,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return FoldedVOp;
// fold (sub x, 0) -> x, vector edition
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
}
@@ -3207,6 +3322,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
!TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
TLI.expandABS(N1.getNode(), Result, DAG, true))
return Result;
+
+ // Fold neg(splat(neg(x)) -> splat(x)
+ if (VT.isVector()) {
+ SDValue N1S = DAG.getSplatValue(N1, true);
+ if (N1S && N1S.getOpcode() == ISD::SUB &&
+ isNullConstant(N1S.getOperand(0))) {
+ if (VT.isScalableVector())
+ return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
+ return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
+ }
+ }
}
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
@@ -3343,6 +3469,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
return V;
+ if (SDValue V = foldSubToUSubSat(VT, N))
+ return V;
+
// (x - y) - 1 -> add (xor y, -1), x
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
@@ -3434,12 +3563,19 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
- // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
+ // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
if (N1.getOpcode() == ISD::VSCALE) {
const APInt &IntVal = N1.getConstantOperandAPInt(0);
return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
}
+ // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
+ if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
+ APInt NewStep = -N1.getConstantOperandAPInt(0);
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getStepVector(DL, VT, NewStep));
+ }
+
// Prefer an add for more folding potential and possibly better codegen:
// sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
@@ -3478,7 +3614,7 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
// TODO SimplifyVBinOp
// fold (sub_sat x, 0) -> x, vector edition
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
}
@@ -3814,6 +3950,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return DAG.getVScale(SDLoc(N), VT, C0 * C1);
}
+ // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
+ APInt MulVal;
+ if (N0.getOpcode() == ISD::STEP_VECTOR)
+ if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ APInt NewStep = C0 * MulVal;
+ return DAG.getStepVector(SDLoc(N), VT, NewStep);
+ }
+
// Fold ((mul x, 0/undef) -> 0,
// (mul x, 1) -> x) -> x)
// -> and(x, mask)
@@ -4323,11 +4468,15 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (VT.isVector()) {
// fold (mulhs x, 0) -> 0
// do not return N0/N1, because undef node may exist.
- if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
- ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
+ ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return DAG.getConstant(0, DL, VT);
}
+ // fold (mulhs c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
+ return C;
+
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4371,11 +4520,15 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (VT.isVector()) {
// fold (mulhu x, 0) -> 0
// do not return N0/N1, because undef node may exist.
- if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
- ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
+ ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return DAG.getConstant(0, DL, VT);
}
+ // fold (mulhu c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
+ return C;
+
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4551,6 +4704,21 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // fold operation with constant operands.
+ // TODO: Move this to FoldConstantArithmetic when it supports nodes with
+ // multiple results.
+ if (N0C && N1C) {
+ bool Overflow;
+ APInt Result =
+ IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
+ : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
+ return CombineTo(N, DAG.getConstant(Result, DL, VT),
+ DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
+ }
+
// canonicalize constant to RHS.
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
@@ -4562,10 +4730,37 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
// (mulo x, 2) -> (addo x, x)
- if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
- if (C2->getAPIntValue() == 2)
- return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
- N->getVTList(), N0, N0);
+ if (N1C && N1C->getAPIntValue() == 2)
+ return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
+ N->getVTList(), N0, N0);
+
+ if (IsSigned) {
+ // A 1 bit SMULO overflows if both inputs are 1.
+ if (VT.getScalarSizeInBits() == 1) {
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
+ return CombineTo(N, And,
+ DAG.getSetCC(DL, CarryVT, And,
+ DAG.getConstant(0, DL, VT), ISD::SETNE));
+ }
+
+ // Multiplying n * m significant bits yields a result of n + m significant
+ // bits. If the total number of significant bits does not exceed the
+ // result bit width (minus 1), there is no overflow.
+ unsigned SignBits = DAG.ComputeNumSignBits(N0);
+ if (SignBits > 1)
+ SignBits += DAG.ComputeNumSignBits(N1);
+ if (SignBits > VT.getScalarSizeInBits() + 1)
+ return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+ } else {
+ KnownBits N1Known = DAG.computeKnownBits(N1);
+ KnownBits N0Known = DAG.computeKnownBits(N0);
+ bool Overflow;
+ (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
+ if (!Overflow)
+ return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+ }
return SDValue();
}
@@ -4883,20 +5078,20 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
ConstantSDNode *C0 = isConstOrConstSplat(LR);
ConstantSDNode *C1 = isConstOrConstSplat(RR);
if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
- // Canonicalize larger constant as C0.
- if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
- std::swap(C0, C1);
-
+ const APInt &CMax =
+ APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
+ const APInt &CMin =
+ APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
// The difference of the constants must be a single bit.
- const APInt &C0Val = C0->getAPIntValue();
- const APInt &C1Val = C1->getAPIntValue();
- if ((C0Val - C1Val).isPowerOf2()) {
- // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
- // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
- SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
- SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
- SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
+ if ((CMax - CMin).isPowerOf2()) {
+ // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
+ // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
+ SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
+ SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
+ SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
+ SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
SDValue Zero = DAG.getConstant(0, DL, OpVT);
return DAG.getSetCC(DL, VT, And, Zero, CC0);
}
@@ -5428,19 +5623,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
// do not return N0, because undef node may exist in N0
return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
SDLoc(N), N0.getValueType());
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
- if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
return N1;
- if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
return N0;
// fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
@@ -6194,16 +6389,16 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return FoldedVOp;
// fold (or x, 0) -> x, vector edition
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
return N1;
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
// fold (or x, -1) -> -1, vector edition
- if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
// do not return N0, because undef node may exist in N0
return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
- if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
@@ -6517,8 +6712,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
+//
+// The IsRotate flag should be set when the LHS of both shifts is the same.
+// Otherwise if matching a general funnel shift, it should be clear.
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG, bool IsRotate) {
// If EltSize is a power of 2 then:
//
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -6550,8 +6748,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// always invokes undefined behavior for 32-bit X.
//
// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
+ //
+ // NOTE: We can only do this when matching an AND and not a general
+ // funnel shift.
unsigned MaskLoBits = 0;
- if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+ if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
unsigned Bits = Log2_64(EltSize);
@@ -6641,7 +6842,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
- if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
+ /*IsRotate*/ true)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg);
@@ -6670,7 +6872,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
// fold (or (shl x0, (*ext (sub 32, y))),
// (srl x1, (*ext y))) ->
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
- if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
+ if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
HasPos ? Pos : Neg);
@@ -7098,14 +7300,22 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
if (LegalOperations)
return SDValue();
- // Collect all the stores in the chain.
- SDValue Chain;
- SmallVector<StoreSDNode *, 8> Stores;
- for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
- // TODO: Allow unordered atomics when wider type is legal (see D66309)
- EVT MemVT = Store->getMemoryVT();
- if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
- !Store->isSimple() || Store->isIndexed())
+ // We only handle merging simple stores of 1-4 bytes.
+ // TODO: Allow unordered atomics when wider type is legal (see D66309)
+ EVT MemVT = N->getMemoryVT();
+ if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
+ !N->isSimple() || N->isIndexed())
+ return SDValue();
+
+ // Collect all of the stores in the chain.
+ SDValue Chain = N->getChain();
+ SmallVector<StoreSDNode *, 8> Stores = {N};
+ while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
+ // All stores must be the same size to ensure that we are writing all of the
+ // bytes in the wide value.
+ // TODO: We could allow multiple sizes by tracking each stored byte.
+ if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
+ Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
@@ -7548,9 +7758,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return FoldedVOp;
// fold (xor x, 0) -> x, vector edition
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
return N1;
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
}
@@ -8253,6 +8463,17 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getVScale(SDLoc(N), VT, C0 << C1);
}
+ // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
+ APInt ShlVal;
+ if (N0.getOpcode() == ISD::STEP_VECTOR)
+ if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ if (ShlVal.ult(C0.getBitWidth())) {
+ APInt NewStep = C0 << ShlVal;
+ return DAG.getStepVector(SDLoc(N), VT, NewStep);
+ }
+ }
+
return SDValue();
}
@@ -8361,13 +8582,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
if (VT.isVector())
- ExtVT = EVT::getVectorVT(*DAG.getContext(),
- ExtVT, VT.getVectorNumElements());
+ ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
+ VT.getVectorElementCount());
if (!LegalOperations ||
TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
TargetLowering::Legal)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
N0.getOperand(0), DAG.getValueType(ExtVT));
+ // Even if we can't convert to sext_inreg, we might be able to remove
+ // this shift pair if the input is already sign extended.
+ if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
+ return N0.getOperand(0);
}
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
@@ -8390,9 +8615,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
SDValue ShiftValue;
- if (VT.isVector())
+ if (N1.getOpcode() == ISD::BUILD_VECTOR)
ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
- else
+ else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(ShiftValues.size() == 1 &&
+ "Expected matchBinaryPredicate to return one element for "
+ "SPLAT_VECTORs");
+ ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
+ } else
ShiftValue = ShiftValues[0];
return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
}
@@ -8412,7 +8642,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
if (VT.isVector())
- TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
// Determine the residual right-shift amount.
int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
@@ -8452,7 +8682,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
unsigned ShiftAmt = N1C->getZExtValue();
EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
if (VT.isVector())
- TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
// TODO: The simple type check probably belongs in the default hook
// implementation and/or target-specific overrides (because
@@ -8865,6 +9095,40 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
return SDValue();
}
+// Given a ABS node, detect the following pattern:
+// (ABS (SUB (EXTEND a), (EXTEND b))).
+// Generates UABD/SABD instruction.
+static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ SDValue AbsOp1 = N->getOperand(0);
+ SDValue Op0, Op1;
+
+ if (AbsOp1.getOpcode() != ISD::SUB)
+ return SDValue();
+
+ Op0 = AbsOp1.getOperand(0);
+ Op1 = AbsOp1.getOperand(1);
+
+ unsigned Opc0 = Op0.getOpcode();
+ // Check if the operands of the sub are (zero|sign)-extended.
+ if (Opc0 != Op1.getOpcode() ||
+ (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
+ return SDValue();
+
+ EVT VT1 = Op0.getOperand(0).getValueType();
+ EVT VT2 = Op1.getOperand(0).getValueType();
+ // Check if the operands are of same type and valid size.
+ unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
+ if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
+ return SDValue();
+
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.getOperand(0);
+ SDValue ABD =
+ DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
+}
+
SDValue DAGCombiner::visitABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -8878,6 +9142,10 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
// fold (abs x) -> x iff not-negative
if (DAG.SignBitIsZero(N0))
return N0;
+
+ if (SDValue ABD = combineABSToABD(N, DAG, TLI))
+ return ABD;
+
return SDValue();
}
@@ -9038,8 +9306,8 @@ static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
SDValue Cond = N->getOperand(0);
SDValue C1 = N->getOperand(1);
SDValue C2 = N->getOperand(2);
- assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
- "Expected select-of-constants");
+ if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
+ return SDValue();
EVT VT = N->getValueType(0);
if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
@@ -9177,6 +9445,40 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
return SDValue();
}
+static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
+ "Expected a (v)select");
+ SDValue Cond = N->getOperand(0);
+ SDValue T = N->getOperand(1), F = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
+ return SDValue();
+
+ // select Cond, Cond, F --> or Cond, F
+ // select Cond, 1, F --> or Cond, F
+ if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
+
+ // select Cond, T, Cond --> and Cond, T
+ // select Cond, T, 0 --> and Cond, T
+ if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
+
+ // select Cond, T, 1 --> or (not Cond), T
+ if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
+ SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
+ }
+
+ // select Cond, 0, F --> and (not Cond), F
+ if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
+ SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -9189,30 +9491,11 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
- // fold (select X, X, Y) -> (or X, Y)
- // fold (select X, 1, Y) -> (or C, Y)
- if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
- return DAG.getNode(ISD::OR, DL, VT, N0, N2);
-
if (SDValue V = foldSelectOfConstants(N))
return V;
- // fold (select C, 0, X) -> (and (not C), X)
- if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
- SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
- AddToWorklist(NOTNode.getNode());
- return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
- }
- // fold (select C, X, 1) -> (or (not C), X)
- if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
- SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
- AddToWorklist(NOTNode.getNode());
- return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
- }
- // fold (select X, Y, X) -> (and X, Y)
- // fold (select X, Y, 0) -> (and X, Y)
- if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
- return DAG.getNode(ISD::AND, DL, VT, N0, N1);
+ if (SDValue V = foldBoolSelectToLogic(N, DAG))
+ return V;
// If we can fold this based on the true/false value, do so.
if (SimplifySelectOps(N, N1, N2))
@@ -9358,9 +9641,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SelectNode;
}
- return SimplifySelect(DL, N0, N1, N2);
+ if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
+ return NewSel;
}
+ if (!VT.isVector())
+ if (SDValue BinOp = foldSelectOfBinops(N))
+ return BinOp;
+
return SDValue();
}
@@ -9471,20 +9759,20 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
SDLoc DL(N);
// Zap scatters with a zero mask.
- if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
if (refineUniformBase(BasePtr, Index, DAG)) {
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
return DAG.getMaskedScatter(
- DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+ DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
}
if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
return DAG.getMaskedScatter(
- DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+ DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
}
@@ -9498,12 +9786,12 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
SDLoc DL(N);
// Zap masked stores with a zero mask.
- if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, compressing, or truncating stores?
- if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
+ if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
MST->isUnindexed() && !MST->isCompressingStore() &&
!MST->isTruncatingStore())
return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
@@ -9527,13 +9815,13 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
SDLoc DL(N);
// Zap gathers with a zero mask.
- if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return CombineTo(N, PassThru, MGT->getChain());
if (refineUniformBase(BasePtr, Index, DAG)) {
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
- PassThru.getValueType(), DL, Ops,
+ MGT->getMemoryVT(), DL, Ops,
MGT->getMemOperand(), MGT->getIndexType(),
MGT->getExtensionType());
}
@@ -9541,7 +9829,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
- PassThru.getValueType(), DL, Ops,
+ MGT->getMemoryVT(), DL, Ops,
MGT->getMemOperand(), MGT->getIndexType(),
MGT->getExtensionType());
}
@@ -9555,12 +9843,12 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
SDLoc DL(N);
// Zap masked loads with a zero mask.
- if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, expanding, or extending loads?
- if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
+ if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
MLD->isUnindexed() && !MLD->isExpandingLoad() &&
MLD->getExtensionType() == ISD::NON_EXTLOAD) {
SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
@@ -9650,6 +9938,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
+ if (SDValue V = foldBoolSelectToLogic(N, DAG))
+ return V;
+
// vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
return DAG.getSelect(DL, VT, F, N2, N1);
@@ -9734,10 +10025,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// If it's on the left side invert the predicate to simplify logic below.
SDValue Other;
ISD::CondCode SatCC = CC;
- if (ISD::isBuildVectorAllOnes(N1.getNode())) {
+ if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
Other = N2;
SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
- } else if (ISD::isBuildVectorAllOnes(N2.getNode())) {
+ } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
Other = N1;
}
@@ -9758,7 +10049,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
(OpLHS == CondLHS || OpRHS == CondLHS))
return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
- if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
+ if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
+ (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
+ OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
CondLHS == OpLHS) {
// If the RHS is a constant we have to reverse the const
// canonicalization.
@@ -9779,54 +10072,71 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// the left side invert the predicate to simplify logic below.
SDValue Other;
ISD::CondCode SatCC = CC;
- if (ISD::isBuildVectorAllZeros(N1.getNode())) {
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
Other = N2;
SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
- } else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
+ } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
Other = N1;
}
- if (Other && Other.getNumOperands() == 2 && Other.getOperand(0) == LHS) {
+ if (Other && Other.getNumOperands() == 2) {
SDValue CondRHS = RHS;
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
- // Look for a general sub with unsigned saturation first.
- // x >= y ? x-y : 0 --> usubsat x, y
- // x > y ? x-y : 0 --> usubsat x, y
- if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
- Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
- return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
-
- if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
- if (isa<BuildVectorSDNode>(CondRHS)) {
- // If the RHS is a constant we have to reverse the const
- // canonicalization.
- // x > C-1 ? x+-C : 0 --> usubsat x, C
- auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
- return (!Op && !Cond) ||
- (Op && Cond &&
- Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
- };
- if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
- ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
- /*AllowUndefs*/ true)) {
- OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- OpRHS);
- return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
- }
+ if (Other.getOpcode() == ISD::SUB &&
+ LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
+ OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
+ // Look for a general sub with unsigned saturation first.
+ // zext(x) >= y ? x - trunc(y) : 0
+ // --> usubsat(x,trunc(umin(y,SatLimit)))
+ // zext(x) > y ? x - trunc(y) : 0
+ // --> usubsat(x,trunc(umin(y,SatLimit)))
+ if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
+ return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
+ DL);
+ }
+
+ if (OpLHS == LHS) {
+ // Look for a general sub with unsigned saturation first.
+ // x >= y ? x-y : 0 --> usubsat x, y
+ // x > y ? x-y : 0 --> usubsat x, y
+ if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
+ Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+
+ if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
+ OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
+ if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
+ CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x > C-1 ? x+-C : 0 --> usubsat x, C
+ auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return (!Op && !Cond) ||
+ (Op && Cond &&
+ Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
+ };
+ if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
+ /*AllowUndefs*/ true)) {
+ OpRHS = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT), OpRHS);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+ }
- // Another special case: If C was a sign bit, the sub has been
- // canonicalized into a xor.
- // FIXME: Would it be better to use computeKnownBits to determine
- // whether it's safe to decanonicalize the xor?
- // x s< 0 ? x^C : 0 --> usubsat x, C
- if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
+ // Another special case: If C was a sign bit, the sub has been
+ // canonicalized into a xor.
+ // FIXME: Would it be better to use computeKnownBits to determine
+ // whether it's safe to decanonicalize the xor?
+ // x s< 0 ? x^C : 0 --> usubsat x, C
+ APInt SplatValue;
if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
- OpRHSConst->getAPIntValue().isSignMask()) {
- // Note that we have to rebuild the RHS constant here to ensure
- // we don't rely on particular values of undef lanes.
- OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
+ ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
+ ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
+ SplatValue.isSignMask()) {
+ // Note that we have to rebuild the RHS constant here to
+ // ensure we don't rely on particular values of undef lanes.
+ OpRHS = DAG.getConstant(SplatValue, DL, VT);
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
}
@@ -9839,11 +10149,11 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
- // Fold (vselect (build_vector all_ones), N1, N2) -> N1
- if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ // Fold (vselect all_ones, N1, N2) -> N1
+ if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
return N1;
- // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ // Fold (vselect all_zeros, N1, N2) -> N2
+ if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
return N2;
// The ConvertSelectToConcatVector function is assuming both the above
@@ -9913,9 +10223,62 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
bool PreferSetCC =
N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
- SDValue Combined = SimplifySetCC(
- N->getValueType(0), N->getOperand(0), N->getOperand(1),
- cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
+ ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+
+ // SETCC(FREEZE(X), CONST, Cond)
+ // =>
+ // FREEZE(SETCC(X, CONST, Cond))
+ // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
+ // isn't equivalent to true or false.
+ // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
+ // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
+ //
+ // This transformation is beneficial because visitBRCOND can fold
+ // BRCOND(FREEZE(X)) to BRCOND(X).
+
+ // Conservatively optimize integer comparisons only.
+ if (PreferSetCC) {
+ // Do this only when SETCC is going to be used by BRCOND.
+
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ bool Updated = false;
+
+ // Is 'X Cond C' always true or false?
+ auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
+ bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
+ (Cond == ISD::SETLT && C->isMinSignedValue()) ||
+ (Cond == ISD::SETUGT && C->isAllOnesValue()) ||
+ (Cond == ISD::SETGT && C->isMaxSignedValue());
+ bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||
+ (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
+ (Cond == ISD::SETUGE && C->isNullValue()) ||
+ (Cond == ISD::SETGE && C->isMinSignedValue());
+ return True || False;
+ };
+
+ if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
+ if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
+ N0 = N0->getOperand(0);
+ Updated = true;
+ }
+ }
+ if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
+ if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
+ N0C)) {
+ N1 = N1->getOperand(0);
+ Updated = true;
+ }
+ }
+
+ if (Updated)
+ return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
+ }
+
+ SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
+ SDLoc(N), !PreferSetCC);
if (!Combined)
return SDValue();
@@ -9949,6 +10312,77 @@ SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
return SDValue();
}
+/// Check if N satisfies:
+/// N is used once.
+/// N is a Load.
+/// The load is compatible with ExtOpcode. It means
+/// If load has explicit zero/sign extension, ExpOpcode must have the same
+/// extension.
+/// Otherwise returns true.
+static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
+ if (!N.hasOneUse())
+ return false;
+
+ if (!isa<LoadSDNode>(N))
+ return false;
+
+ LoadSDNode *Load = cast<LoadSDNode>(N);
+ ISD::LoadExtType LoadExt = Load->getExtensionType();
+ if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
+ return true;
+
+ // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
+ // extension.
+ if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
+ (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
+ return false;
+
+ return true;
+}
+
+/// Fold
+/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
+/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
+/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
+/// This function is called by the DAGCombiner when visiting sext/zext/aext
+/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
+ Opcode == ISD::ANY_EXTEND) &&
+ "Expected EXTEND dag node in input!");
+
+ if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
+ !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Op1 = N0->getOperand(1);
+ SDValue Op2 = N0->getOperand(2);
+ if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
+ return SDValue();
+
+ auto ExtLoadOpcode = ISD::EXTLOAD;
+ if (Opcode == ISD::SIGN_EXTEND)
+ ExtLoadOpcode = ISD::SEXTLOAD;
+ else if (Opcode == ISD::ZERO_EXTEND)
+ ExtLoadOpcode = ISD::ZEXTLOAD;
+
+ LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
+ LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
+ if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
+ !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
+ return SDValue();
+
+ SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
+ SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
+ return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
+}
+
/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
/// a build_vector of constants.
/// This function is called by the DAGCombiner when visiting sext/zext/aext
@@ -10481,6 +10915,128 @@ static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+ EVT N00VT = N00.getValueType();
+ SDLoc DL(N);
+
+ // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+ // the same size as the compared operands. Try to optimize sext(setcc())
+ // if this is the case.
+ if (VT.isVector() && !LegalOperations &&
+ TLI.getBooleanContents(N00VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ EVT SVT = getSetCCResultType(N00VT);
+
+ // If we already have the desired type, don't change it.
+ if (SVT != N0.getValueType()) {
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
+ return DAG.getSetCC(DL, VT, N00, N01, CC);
+
+ // If the desired elements are smaller or larger than the source
+ // elements, we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+ if (SVT == MatchingVecType) {
+ SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
+ return DAG.getSExtOrTrunc(VsetCC, DL, VT);
+ }
+ }
+
+ // Try to eliminate the sext of a setcc by zexting the compare operands.
+ if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
+ !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
+ bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
+ unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
+ // We have an unsupported narrow vector compare op that would be legal
+ // if extended to the destination type. See if the compare operands
+ // can be freely extended to the destination type.
+ auto IsFreeToExtend = [&](SDValue V) {
+ if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
+ return true;
+ // Match a simple, non-extended load that can be converted to a
+ // legal {z/s}ext-load.
+ // TODO: Allow widening of an existing {z/s}ext-load?
+ if (!(ISD::isNON_EXTLoad(V.getNode()) &&
+ ISD::isUNINDEXEDLoad(V.getNode()) &&
+ cast<LoadSDNode>(V)->isSimple() &&
+ TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
+ return false;
+
+ // Non-chain users of this value must either be the setcc in this
+ // sequence or extends that can be folded into the new {z/s}ext-load.
+ for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ // Skip uses of the chain and the setcc.
+ SDNode *User = *UI;
+ if (UI.getUse().getResNo() != 0 || User == N0.getNode())
+ continue;
+ // Extra users must have exactly the same cast we are about to create.
+ // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
+ // is enhanced similarly.
+ if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
+ return false;
+ }
+ return true;
+ };
+
+ if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
+ SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
+ SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
+ return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
+ }
+ }
+ }
+
+ // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
+ // Here, T can be 1 or -1, depending on the type of the setcc and
+ // getBooleanContents().
+ unsigned SetCCWidth = N0.getScalarValueSizeInBits();
+
+ // To determine the "true" side of the select, we need to know the high bit
+ // of the value returned by the setcc if it evaluates to true.
+ // If the type of the setcc is i1, then the true case of the select is just
+ // sext(i1 1), that is, -1.
+ // If the type of the setcc is larger (say, i8) then the value of the high
+ // bit depends on getBooleanContents(), so ask TLI for a real "true" value
+ // of the appropriate width.
+ SDValue ExtTrueVal = (SetCCWidth == 1)
+ ? DAG.getAllOnesConstant(DL, VT)
+ : DAG.getBoolConstant(true, DL, VT, N00VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
+ return SCC;
+
+ if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
+ EVT SetCCVT = getSetCCResultType(N00VT);
+ // Don't do this transform for i1 because there's a select transform
+ // that would reverse it.
+ // TODO: We should not do this transform at all without a target hook
+ // because a sext is likely cheaper than a select?
+ if (SetCCVT.getScalarSizeInBits() != 1 &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
+ SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
+ return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -10612,76 +11168,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
return V;
- if (N0.getOpcode() == ISD::SETCC) {
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- EVT N00VT = N00.getValueType();
-
- // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
- // Only do this before legalize for now.
- if (VT.isVector() && !LegalOperations &&
- TLI.getBooleanContents(N00VT) ==
- TargetLowering::ZeroOrNegativeOneBooleanContent) {
- // On some architectures (such as SSE/NEON/etc) the SETCC result type is
- // of the same size as the compared operands. Only optimize sext(setcc())
- // if this is the case.
- EVT SVT = getSetCCResultType(N00VT);
-
- // If we already have the desired type, don't change it.
- if (SVT != N0.getValueType()) {
- // We know that the # elements of the results is the same as the
- // # elements of the compare (and the # elements of the compare result
- // for that matter). Check to see that they are the same size. If so,
- // we know that the element size of the sext'd result matches the
- // element size of the compare operands.
- if (VT.getSizeInBits() == SVT.getSizeInBits())
- return DAG.getSetCC(DL, VT, N00, N01, CC);
-
- // If the desired elements are smaller or larger than the source
- // elements, we can use a matching integer vector type and then
- // truncate/sign extend.
- EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
- if (SVT == MatchingVecType) {
- SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
- return DAG.getSExtOrTrunc(VsetCC, DL, VT);
- }
- }
- }
-
- // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
- // Here, T can be 1 or -1, depending on the type of the setcc and
- // getBooleanContents().
- unsigned SetCCWidth = N0.getScalarValueSizeInBits();
-
- // To determine the "true" side of the select, we need to know the high bit
- // of the value returned by the setcc if it evaluates to true.
- // If the type of the setcc is i1, then the true case of the select is just
- // sext(i1 1), that is, -1.
- // If the type of the setcc is larger (say, i8) then the value of the high
- // bit depends on getBooleanContents(), so ask TLI for a real "true" value
- // of the appropriate width.
- SDValue ExtTrueVal = (SetCCWidth == 1)
- ? DAG.getAllOnesConstant(DL, VT)
- : DAG.getBoolConstant(true, DL, VT, N00VT);
- SDValue Zero = DAG.getConstant(0, DL, VT);
- if (SDValue SCC =
- SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
- return SCC;
-
- if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
- EVT SetCCVT = getSetCCResultType(N00VT);
- // Don't do this transform for i1 because there's a select transform
- // that would reverse it.
- // TODO: We should not do this transform at all without a target hook
- // because a sext is likely cheaper than a select?
- if (SetCCVT.getScalarSizeInBits() != 1 &&
- (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
- SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
- return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
- }
- }
- }
+ if (SDValue V = foldSextSetcc(N))
+ return V;
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -10733,6 +11221,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
}
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
+ return Res;
+
return SDValue();
}
@@ -11045,6 +11536,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue NewCtPop = widenCtPop(N, DAG))
return NewCtPop;
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
+ return Res;
+
return SDValue();
}
@@ -11197,6 +11691,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
if (SDValue NewCtPop = widenCtPop(N, DAG))
return NewCtPop;
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
+ return Res;
+
return SDValue();
}
@@ -11542,14 +12039,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
}
// fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
- if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
- N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
- N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
- N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
- return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
- N0.getOperand(0));
+ // if x is small enough or if we know that x has more than 1 sign bit and the
+ // sign_extend_inreg is extending from one of them.
+ if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+ SDValue N00 = N0.getOperand(0);
+ unsigned N00Bits = N00.getScalarValueSizeInBits();
+ unsigned DstElts = N0.getValueType().getVectorMinNumElements();
+ unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
+ bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
+ APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
+ if ((N00Bits == ExtVTBits ||
+ (!IsZext && (N00Bits < ExtVTBits ||
+ (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
+ ExtVTBits))) &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
}
// fold (sext_in_reg (zext x)) -> (sext x)
@@ -11610,6 +12117,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
AddToWorklist(ExtLoad.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+
// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse() &&
@@ -11671,28 +12179,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- EVT VT = N->getValueType(0);
-
- // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
- if (N0.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
-
- if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
- return Res;
-
- if (SimplifyDemandedVectorElts(SDValue(N, 0)))
- return SDValue(N, 0);
-
- return SDValue();
-}
-
-SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
+SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- // zext_vector_inreg(undef) = 0 because the top bits will be zero.
+ // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
if (N0.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
@@ -11812,6 +12303,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
+ if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
+ return V;
+
// Attempt to pre-truncate BUILD_VECTOR sources.
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
@@ -12013,6 +12507,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
}
}
+ break;
+ case ISD::USUBSAT:
+ // Truncate the USUBSAT only if LHS is a known zero-extension, its not
+ // enough to know that the upper bits are zero we must ensure that we don't
+ // introduce an extra truncate.
+ if (!LegalOperations && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
+ VT.getScalarSizeInBits() &&
+ hasOperation(N0.getOpcode(), VT)) {
+ return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
+ DAG, SDLoc(N));
+ }
+ break;
}
return SDValue();
@@ -12141,7 +12649,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
VT.getVectorElementType());
// If the input is a constant, let getNode fold it.
- if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ if (isIntOrFPConstant(N0)) {
// If we can't allow illegal operations, we need to check that this is just
// a fp -> int or int -> conversion and that the resulting operation will
// be legal.
@@ -12374,12 +12882,7 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
SDValue DAGCombiner::visitFREEZE(SDNode *N) {
SDValue N0 = N->getOperand(0);
- // (freeze (freeze x)) -> (freeze x)
- if (N0.getOpcode() == ISD::FREEZE)
- return N0;
-
- // If the input is a constant, return it.
- if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0))
+ if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;
return SDValue();
@@ -12500,11 +13003,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return DAG.getBuildVector(VT, DL, Ops);
}
-static bool isContractable(SDNode *N) {
- SDNodeFlags F = N->getFlags();
- return F.hasAllowContract() || F.hasAllowReassociation();
-}
-
/// Try to perform FMA combining on a given FADD node.
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
@@ -12526,16 +13024,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
- bool CanFuse = Options.UnsafeFPMath || isContractable(N);
bool CanReassociate =
Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- CanFuse || HasFMAD);
+ Options.UnsafeFPMath || HasFMAD);
// If the addition is not contractable, do not combine.
- if (!AllowFusionGlobally && !isContractable(N))
+ if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
return SDValue();
- if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
@@ -12547,7 +13044,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
if (N.getOpcode() != ISD::FMUL)
return false;
- return AllowFusionGlobally || isContractable(N.getNode());
+ return AllowFusionGlobally || N->getFlags().hasAllowContract();
};
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
@@ -12736,15 +13233,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return SDValue();
const SDNodeFlags Flags = N->getFlags();
- bool CanFuse = Options.UnsafeFPMath || isContractable(N);
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- CanFuse || HasFMAD);
+ Options.UnsafeFPMath || HasFMAD);
// If the subtraction is not contractable, do not combine.
- if (!AllowFusionGlobally && !isContractable(N))
+ if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
return SDValue();
- if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
@@ -12757,7 +13253,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
if (N.getOpcode() != ISD::FMUL)
return false;
- return AllowFusionGlobally || isContractable(N.getNode());
+ return AllowFusionGlobally || N->getFlags().hasAllowContract();
};
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
@@ -12887,13 +13383,23 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
}
+ auto isReassociable = [Options](SDNode *N) {
+ return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+ };
+
+ auto isContractableAndReassociableFMUL = [isContractableFMUL,
+ isReassociable](SDValue N) {
+ return isContractableFMUL(N) && isReassociable(N.getNode());
+ };
+
// More folding opportunities when target permits.
- if (Aggressive) {
+ if (Aggressive && isReassociable(N)) {
+ bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
- isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
- N0.getOperand(2)->hasOneUse()) {
+ isContractableAndReassociableFMUL(N0.getOperand(2)) &&
+ N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -12905,7 +13411,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
- isContractableFMUL(N1.getOperand(2)) &&
+ isContractableAndReassociableFMUL(N1.getOperand(2)) &&
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
@@ -12916,7 +13422,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
}
-
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode &&
@@ -12924,7 +13429,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (isContractableFMUL(N020) &&
+ if (isContractableAndReassociableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N020.getValueType())) {
return DAG.getNode(
@@ -12948,7 +13453,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (isContractableFMUL(N002) &&
+ if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(
@@ -12970,7 +13475,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
- if (isContractableFMUL(N120) &&
+ if (isContractableAndReassociableFMUL(N120) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
@@ -12997,7 +13502,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
SDValue N102 = CvtSrc.getOperand(2);
- if (isContractableFMUL(N102) &&
+ if (isContractableAndReassociableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
@@ -13933,13 +14438,25 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue N1 = N->getOperand(1);
if ((N1.getOpcode() == ISD::FP_EXTEND ||
N1.getOpcode() == ISD::FP_ROUND)) {
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0).getValueType();
+
+ // Always fold no-op FP casts.
+ if (N1VT == N1Op0VT)
+ return true;
+
// Do not optimize out type conversion of f128 type yet.
// For some targets like x86_64, configuration is changed to keep one f128
// value in one SSE register, but instruction selection cannot handle
// FCOPYSIGN on SSE registers yet.
- EVT N1VT = N1->getValueType(0);
- EVT N1Op0VT = N1->getOperand(0).getValueType();
- return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+ if (N1Op0VT == MVT::f128)
+ return false;
+
+ // Avoid mismatched vector operand types, for better instruction selection.
+ if (N1Op0VT.isVector())
+ return false;
+
+ return true;
}
return false;
}
@@ -15971,12 +16488,9 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
// Prepare the argument for the new token factor for all the slices.
SmallVector<SDValue, 8> ArgChains;
- for (SmallVectorImpl<LoadedSlice>::const_iterator
- LSIt = LoadedSlices.begin(),
- LSItEnd = LoadedSlices.end();
- LSIt != LSItEnd; ++LSIt) {
- SDValue SliceInst = LSIt->loadSlice();
- CombineTo(LSIt->Inst, SliceInst, true);
+ for (const LoadedSlice &LS : LoadedSlices) {
+ SDValue SliceInst = LS.loadSlice();
+ CombineTo(LS.Inst, SliceInst, true);
if (SliceInst.getOpcode() != ISD::LOAD)
SliceInst = SliceInst.getOperand(0);
assert(SliceInst->getOpcode() == ISD::LOAD &&
@@ -16408,6 +16922,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
if (NumStores < 2)
return false;
+ assert((!UseTrunc || !UseVector) &&
+ "This optimization cannot emit a vector truncating store");
+
// The latest Node in the DAG.
SDLoc DL(StoreNodes[0].MemNode);
@@ -16631,7 +17148,7 @@ void DAGCombiner::getStoreMergeCandidates(
case StoreSource::Constant:
if (NoTypeMatch)
return false;
- if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
+ if (!isIntOrFPConstant(OtherBC))
return false;
break;
case StoreSource::Extract:
@@ -16903,6 +17420,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+ bool UseTrunc = LastIntegerTrunc && !UseVector;
// Check if we found a legal integer type that creates a meaningful
// merge.
@@ -16933,8 +17451,9 @@ bool DAGCombiner::tryStoreMergeOfConstants(
continue;
}
- MadeChange |= mergeStoresOfConstantsOrVecElts(
- StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
+ MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ /*IsConstantSrc*/ true,
+ UseVector, UseTrunc);
// Remove merged stores for next iteration.
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
@@ -17003,7 +17522,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
}
MadeChange |= mergeStoresOfConstantsOrVecElts(
- StoreNodes, MemVT, NumStoresToMerge, false, true, false);
+ StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
+ /*UseVector*/ true, /*UseTrunc*/ false);
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
NumConsecutiveStores -= NumStoresToMerge;
@@ -17022,8 +17542,6 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
bool MadeChange = false;
- int64_t StartAddress = StoreNodes[0].OffsetFromBase;
-
// Look for load nodes which are used by the stored values.
SmallVector<MemOpLink, 8> LoadNodes;
@@ -17091,7 +17609,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned LastLegalIntegerType = 1;
bool isDereferenceable = true;
bool DoIntegerTruncate = false;
- StartAddress = LoadNodes[0].OffsetFromBase;
+ int64_t StartAddress = LoadNodes[0].OffsetFromBase;
SDValue LoadChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
// All loads must share the same chain.
@@ -17582,6 +18100,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
ST->isUnindexed() && ST->isSimple() &&
+ Ld->getAddressSpace() == ST->getAddressSpace() &&
// There can't be any side effects between the load and store, such as
// a call or store.
Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
@@ -17595,7 +18114,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (ST->isUnindexed() && ST->isSimple() &&
ST1->isUnindexed() && ST1->isSimple()) {
if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
- ST->getMemoryVT() == ST1->getMemoryVT()) {
+ ST->getMemoryVT() == ST1->getMemoryVT() &&
+ ST->getAddressSpace() == ST1->getAddressSpace()) {
// If this is a store followed by a store with the same value to the
// same location, then the store is dead/noop.
return Chain;
@@ -17606,7 +18126,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// BaseIndexOffset and the code below requires knowing the size
// of a vector, so bail out if MemoryVT is scalable.
!ST->getMemoryVT().isScalableVector() &&
- !ST1->getMemoryVT().isScalableVector()) {
+ !ST1->getMemoryVT().isScalableVector() &&
+ ST->getAddressSpace() == ST1->getAddressSpace()) {
const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
@@ -17625,10 +18146,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
// truncating store. We can do this even if this is already a truncstore.
- if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
- && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
- TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
- ST->getMemoryVT())) {
+ if ((Value.getOpcode() == ISD::FP_ROUND ||
+ Value.getOpcode() == ISD::TRUNCATE) &&
+ Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
+ ST->getMemoryVT(), LegalOperations)) {
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
Ptr, ST->getMemoryVT(), ST->getMemOperand());
}
@@ -18086,26 +18608,19 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
Alignment = NewAlign;
- SDValue NewPtr = OriginalLoad->getBasePtr();
- SDValue Offset;
- EVT PtrType = NewPtr.getValueType();
MachinePointerInfo MPI;
SDLoc DL(EVE);
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
int Elt = ConstEltNo->getZExtValue();
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
- Offset = DAG.getConstant(PtrOff, DL, PtrType);
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
} else {
- Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
- Offset = DAG.getNode(
- ISD::MUL, DL, PtrType, Offset,
- DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
// Discard the pointer info except the address space because the memory
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
}
- NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
+ SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
+ InVecVT, EltNo);
// The replacement we need to do here is a little tricky: we need to
// replace an extractelement of a load with a load.
@@ -18710,6 +19225,9 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
uint64_t InVT1Size = InVT1.getFixedSizeInBits();
uint64_t InVT2Size = InVT2.getFixedSizeInBits();
+ assert(InVT2Size <= InVT1Size &&
+ "Inputs must be sorted to be in non-increasing vector size order.");
+
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
if (InVT1 != VT || InVT2 != VT) {
@@ -18736,7 +19254,10 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// Since we now have shorter input vectors, adjust the offset of the
// second vector's start.
Vec2Offset = NumElems;
- } else if (InVT2Size <= InVT1Size) {
+ } else {
+ assert(InVT2Size <= InVT1Size &&
+ "Second input is not going to be larger than the first one.");
+
// VecIn1 is wider than the output, and we have another, possibly
// smaller input. Pad the smaller input with undefs, shuffle at the
// input vector width, and extract the output.
@@ -18755,11 +19276,6 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
}
ShuffleNumElems = NumElems * 2;
- } else {
- // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
- // than VecIn1. We can't handle this for now - this case will disappear
- // when we start sorting the vectors by type.
- return SDValue();
}
} else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
@@ -18884,6 +19400,15 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
return DAG.getBitcast(VT, Shuf);
}
+// FIXME: promote to STLExtras.
+template <typename R, typename T>
+static auto getFirstIndexOf(R &&Range, const T &Val) {
+ auto I = find(Range, Val);
+ if (I == Range.end())
+ return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
+ return std::distance(Range.begin(), I);
+}
+
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If the types of the vectors we're extracting from allow it,
// turn this into a vector_shuffle node.
@@ -18952,9 +19477,11 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// Have we seen this input vector before?
// The vectors are expected to be tiny (usually 1 or 2 elements), so using
// a map back from SDValues to numbers isn't worth it.
- unsigned Idx = std::distance(VecIn.begin(), find(VecIn, ExtractedFromVec));
- if (Idx == VecIn.size())
+ int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
+ if (Idx == -1) { // A new source vector?
+ Idx = VecIn.size();
VecIn.push_back(ExtractedFromVec);
+ }
VectorMask[i] = Idx;
}
@@ -18989,7 +19516,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
unsigned SplitSize = NearestPow2 / 2;
EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
InVT.getVectorElementType(), SplitSize);
- if (TLI.isTypeLegal(SplitVT)) {
+ if (TLI.isTypeLegal(SplitVT) &&
+ SplitSize + SplitVT.getVectorNumElements() <=
+ InVT.getVectorNumElements()) {
SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
DAG.getVectorIdxConstant(SplitSize, DL));
SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
@@ -19008,9 +19537,28 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
}
}
- // TODO: We want to sort the vectors by descending length, so that adjacent
- // pairs have similar length, and the longer vector is always first in the
- // pair.
+ // Sort input vectors by decreasing vector element count,
+ // while preserving the relative order of equally-sized vectors.
+ // Note that we keep the first "implicit zero vector as-is.
+ SmallVector<SDValue, 8> SortedVecIn(VecIn);
+ llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
+ [](const SDValue &a, const SDValue &b) {
+ return a.getValueType().getVectorNumElements() >
+ b.getValueType().getVectorNumElements();
+ });
+
+ // We now also need to rebuild the VectorMask, because it referenced element
+ // order in VecIn, and we just sorted them.
+ for (int &SourceVectorIndex : VectorMask) {
+ if (SourceVectorIndex <= 0)
+ continue;
+ unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
+ assert(Idx > 0 && Idx < SortedVecIn.size() &&
+ VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
+ SourceVectorIndex = Idx;
+ }
+
+ VecIn = std::move(SortedVecIn);
// TODO: Should this fire if some of the input vectors has illegal type (like
// it does now), or should we let legalization run its course first?
@@ -19183,13 +19731,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
}
}
- // A splat of a single element is a SPLAT_VECTOR if supported on the target.
- if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
- if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
- assert(!V.isUndef() && "Splat of undef should have been handled earlier");
- return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
- }
-
// Check if we can express BUILD VECTOR via subvector extract.
if (!LegalTypes && (N->getNumOperands() > 1)) {
SDValue Op0 = N->getOperand(0);
@@ -19231,6 +19772,14 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (SDValue V = reduceBuildVecToShuffle(N))
return V;
+ // A splat of a single element is a SPLAT_VECTOR if supported on the target.
+ // Do this late as some of the above may replace the splat.
+ if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
+ if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ assert(!V.isUndef() && "Splat of undef should have been handled earlier");
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
+ }
+
return SDValue();
}
@@ -19879,7 +20428,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
if (V.getOpcode() == ISD::BITCAST &&
- V.getOperand(0).getValueType().isVector()) {
+ V.getOperand(0).getValueType().isVector() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
SDValue SrcOp = V.getOperand(0);
EVT SrcVT = SrcOp.getValueType();
unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
@@ -20052,6 +20602,9 @@ static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
for (unsigned i = 0; i != NumElts; ++i) {
if (Mask[i] == -1)
continue;
+ // If we reference the upper (undef) subvector then the element is undef.
+ if ((Mask[i] % NumElts) >= HalfNumElts)
+ continue;
int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
if (i < HalfNumElts)
Mask0[i] = M;
@@ -20213,7 +20766,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
// generating a splat; semantically, this is fine, but it's likely to
// generate low-quality code if the target can't reconstruct an appropriate
// shuffle.
- if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
+ if (!Op.isUndef() && !isIntOrFPConstant(Op))
if (!IsSplat && !DuplicateOps.insert(Op).second)
return SDValue();
@@ -20798,44 +21351,15 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
- if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
- // Canonicalize shuffles according to rules:
- // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
- // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
- // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
- if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
- N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
- // The incoming shuffle must be of the same type as the result of the
- // current shuffle.
- assert(N1->getOperand(0).getValueType() == VT &&
- "Shuffle types don't match");
-
- SDValue SV0 = N1->getOperand(0);
- SDValue SV1 = N1->getOperand(1);
- bool HasSameOp0 = N0 == SV0;
- bool IsSV1Undef = SV1.isUndef();
- if (HasSameOp0 || IsSV1Undef || N0 == SV1)
- // Commute the operands of this shuffle so merging below will trigger.
- return DAG.getCommutedVectorShuffle(*SVN);
- }
-
- // Canonicalize splat shuffles to the RHS to improve merging below.
- // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
- N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
- cast<ShuffleVectorSDNode>(N0)->isSplat() &&
- !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
- return DAG.getCommutedVectorShuffle(*SVN);
- }
- }
-
// Compute the combined shuffle mask for a shuffle with SV0 as the first
// operand, and SV1 as the second operand.
- // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask).
- auto MergeInnerShuffle = [NumElts](ShuffleVectorSDNode *SVN,
- ShuffleVectorSDNode *OtherSVN, SDValue N1,
- SDValue &SV0, SDValue &SV1,
- SmallVectorImpl<int> &Mask) -> bool {
+ // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
+ // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
+ auto MergeInnerShuffle =
+ [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
+ ShuffleVectorSDNode *OtherSVN, SDValue N1,
+ const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
+ SmallVectorImpl<int> &Mask) -> bool {
// Don't try to fold splats; they're likely to simplify somehow, or they
// might be free.
if (OtherSVN->isSplat())
@@ -20852,6 +21376,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
continue;
}
+ if (Commute)
+ Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
+
SDValue CurrentVec;
if (Idx < (int)NumElts) {
// This shuffle index refers to the inner shuffle N0. Lookup the inner
@@ -20922,44 +21449,161 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Bail out if we cannot convert the shuffle pair into a single shuffle.
return false;
}
- return true;
+
+ if (llvm::all_of(Mask, [](int M) { return M < 0; }))
+ return true;
+
+ // Avoid introducing shuffles with illegal mask.
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ if (TLI.isShuffleMaskLegal(Mask, VT))
+ return true;
+
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ return TLI.isShuffleMaskLegal(Mask, VT);
};
- // Try to fold according to rules:
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- // Don't try to fold shuffles with illegal type.
- // Only fold if this shuffle is the only user of the other shuffle.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
- Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
- ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
-
- // The incoming shuffle must be of the same type as the result of the
- // current shuffle.
- assert(OtherSV->getOperand(0).getValueType() == VT &&
- "Shuffle types don't match");
-
- SDValue SV0, SV1;
- SmallVector<int, 4> Mask;
- if (MergeInnerShuffle(SVN, OtherSV, N1, SV0, SV1, Mask)) {
- // Check if all indices in Mask are Undef. In case, propagate Undef.
- if (llvm::all_of(Mask, [](int M) { return M < 0; }))
- return DAG.getUNDEF(VT);
-
- if (!SV0.getNode())
- SV0 = DAG.getUNDEF(VT);
- if (!SV1.getNode())
- SV1 = DAG.getUNDEF(VT);
-
- // Avoid introducing shuffles with illegal mask.
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
- return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ // Canonicalize shuffles according to rules:
+ // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+ // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+ // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(N1->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = N1->getOperand(0);
+ SDValue SV1 = N1->getOperand(1);
+ bool HasSameOp0 = N0 == SV0;
+ bool IsSV1Undef = SV1.isUndef();
+ if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+ // Commute the operands of this shuffle so merging below will trigger.
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Canonicalize splat shuffles to the RHS to improve merging below.
+ // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(N0)->isSplat() &&
+ !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Try to fold according to rules:
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // Don't try to fold shuffles with illegal type.
+ // Only fold if this shuffle is the only user of the other shuffle.
+ // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
+ for (int i = 0; i != 2; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N->isOnlyUserOf(N->getOperand(i).getNode())) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0, SV1;
+ SmallVector<int, 4> Mask;
+ if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
+ SV0, SV1, Mask)) {
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ if (llvm::all_of(Mask, [](int M) { return M < 0; }))
+ return DAG.getUNDEF(VT);
+
+ return DAG.getVectorShuffle(VT, SDLoc(N),
+ SV0 ? SV0 : DAG.getUNDEF(VT),
+ SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
+ }
+ }
+ }
+
+ // Merge shuffles through binops if we are able to merge it with at least
+ // one other shuffles.
+ // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
+ // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
+ unsigned SrcOpcode = N0.getOpcode();
+ if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
+ (N1.isUndef() ||
+ (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
+ // Get binop source ops, or just pass on the undef.
+ SDValue Op00 = N0.getOperand(0);
+ SDValue Op01 = N0.getOperand(1);
+ SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
+ SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
+ // TODO: We might be able to relax the VT check but we don't currently
+ // have any isBinOp() that has different result/ops VTs so play safe until
+ // we have test coverage.
+ if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
+ Op01.getValueType() == VT && Op11.getValueType() == VT &&
+ (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
+ auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
+ SmallVectorImpl<int> &Mask, bool LeftOp,
+ bool Commute) {
+ SDValue InnerN = Commute ? N1 : N0;
+ SDValue Op0 = LeftOp ? Op00 : Op01;
+ SDValue Op1 = LeftOp ? Op10 : Op11;
+ if (Commute)
+ std::swap(Op0, Op1);
+ // Only accept the merged shuffle if we don't introduce undef elements,
+ // or the inner shuffle already contained undef elements.
+ auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
+ return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
+ MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
+ Mask) &&
+ (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
+ llvm::none_of(Mask, [](int M) { return M < 0; }));
+ };
+
+ // Ensure we don't increase the number of shuffles - we must merge a
+ // shuffle from at least one of the LHS and RHS ops.
+ bool MergedLeft = false;
+ SDValue LeftSV0, LeftSV1;
+ SmallVector<int, 4> LeftMask;
+ if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
+ CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
+ MergedLeft = true;
+ } else {
+ LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
+ LeftSV0 = Op00, LeftSV1 = Op10;
+ }
+
+ bool MergedRight = false;
+ SDValue RightSV0, RightSV1;
+ SmallVector<int, 4> RightMask;
+ if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
+ CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
+ MergedRight = true;
+ } else {
+ RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
+ RightSV0 = Op01, RightSV1 = Op11;
+ }
+
+ if (MergedLeft || MergedRight) {
+ SDLoc DL(N);
+ SDValue LHS = DAG.getVectorShuffle(
+ VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
+ LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
+ SDValue RHS = DAG.getVectorShuffle(
+ VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
+ RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
+ return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
+ }
+ }
}
}
@@ -21174,7 +21818,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
// fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
- if (N0->getOpcode() == ISD::AND) {
+ if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
if (AndConst && AndConst->getAPIntValue() == 0xffff) {
return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
@@ -21775,6 +22419,50 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
}
+// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
+SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ unsigned BinOpc = N1.getOpcode();
+ if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
+ return SDValue();
+
+ if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))
+ return SDValue();
+
+ // Fold select(cond, binop(x, y), binop(z, y))
+ // --> binop(select(cond, x, z), y)
+ if (N1.getOperand(1) == N2.getOperand(1)) {
+ SDValue NewSel =
+ DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
+ NewBinOp->setFlags(N1->getFlags());
+ NewBinOp->intersectFlagsWith(N2->getFlags());
+ return NewBinOp;
+ }
+
+ // Fold select(cond, binop(x, y), binop(x, z))
+ // --> binop(x, select(cond, y, z))
+ // Second op VT might be different (e.g. shift amount type)
+ if (N1.getOperand(0) == N2.getOperand(0) &&
+ VT == N1.getOperand(1).getValueType() &&
+ VT == N2.getOperand(1).getValueType()) {
+ SDValue NewSel =
+ DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
+ NewBinOp->setFlags(N1->getFlags());
+ NewBinOp->intersectFlagsWith(N2->getFlags());
+ return NewBinOp;
+ }
+
+ // TODO: Handle isCommutativeBinOp patterns as well?
+ return SDValue();
+}
+
// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
SDValue N0 = N->getOperand(0);
@@ -22426,12 +23114,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
- AliasResult AAResult = AA->alias(
- MemoryLocation(MUC0.MMO->getValue(), Overlap0,
- UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
- MemoryLocation(MUC1.MMO->getValue(), Overlap1,
- UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
- if (AAResult == NoAlias)
+ if (AA->isNoAlias(
+ MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
+ MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
return false;
}
@@ -22614,6 +23301,10 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
if (BasePtr.getBase().isUndef())
return false;
+ // Do not handle stores to opaque types
+ if (St->getMemoryVT().isZeroSized())
+ return false;
+
// BaseIndexOffset assumes that offsets are fixed-size, which
// is not valid for scalable vectors where the offsets are
// scaled by `vscale`, so bail out early.
@@ -22624,6 +23315,9 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
+ if (Chain->getMemoryVT().isScalableVector())
+ return false;
+
// If the chain has more than one use, then we can't reorder the mem ops.
if (!SDValue(Chain, 0)->hasOneUse())
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 62f7f3d98ba6..4ca731cfdf62 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -238,38 +238,6 @@ void FastISel::flushLocalValueMap() {
SavedInsertPt = FuncInfo.InsertPt;
}
-bool FastISel::hasTrivialKill(const Value *V) {
- // Don't consider constants or arguments to have trivial kills.
- const Instruction *I = dyn_cast<Instruction>(V);
- if (!I)
- return false;
-
- // No-op casts are trivially coalesced by fast-isel.
- if (const auto *Cast = dyn_cast<CastInst>(I))
- if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0)))
- return false;
-
- // Even the value might have only one use in the LLVM IR, it is possible that
- // FastISel might fold the use into another instruction and now there is more
- // than one use at the Machine Instruction level.
- Register Reg = lookUpRegForValue(V);
- if (Reg && !MRI.use_empty(Reg))
- return false;
-
- // GEPs with all zero indices are trivially coalesced by fast-isel.
- if (const auto *GEP = dyn_cast<GetElementPtrInst>(I))
- if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
- return false;
-
- // Only instructions with a single use in the same basic block are considered
- // to have trivial kills.
- return I->hasOneUse() &&
- !(I->getOpcode() == Instruction::BitCast ||
- I->getOpcode() == Instruction::PtrToInt ||
- I->getOpcode() == Instruction::IntToPtr) &&
- cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
-}
-
Register FastISel::getRegForValue(const Value *V) {
EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true);
// Don't handle non-simple values in FastISel.
@@ -342,8 +310,8 @@ Register FastISel::materializeConstant(const Value *V, MVT VT) {
Register IntegerReg =
getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
if (IntegerReg)
- Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
- /*Op0IsKill=*/false);
+ Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+ IntegerReg);
}
}
} else if (const auto *Op = dyn_cast<Operator>(V)) {
@@ -415,27 +383,22 @@ void FastISel::updateValueMap(const Value *I, Register Reg, unsigned NumRegs) {
}
}
-std::pair<Register, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+Register FastISel::getRegForGEPIndex(const Value *Idx) {
Register IdxN = getRegForValue(Idx);
if (!IdxN)
// Unhandled operand. Halt "fast" selection and bail.
- return std::pair<Register, bool>(Register(), false);
-
- bool IdxNIsKill = hasTrivialKill(Idx);
+ return Register();
// If the index is smaller or larger than intptr_t, truncate or extend it.
MVT PtrVT = TLI.getPointerTy(DL);
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
if (IdxVT.bitsLT(PtrVT)) {
- IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN,
- IdxNIsKill);
- IdxNIsKill = true;
+ IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
} else if (IdxVT.bitsGT(PtrVT)) {
IdxN =
- fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill);
- IdxNIsKill = true;
+ fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN);
}
- return std::pair<Register, bool>(IdxN, IdxNIsKill);
+ return IdxN;
}
void FastISel::recomputeInsertPt() {
@@ -513,11 +476,10 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
Register Op1 = getRegForValue(I->getOperand(1));
if (!Op1)
return false;
- bool Op1IsKill = hasTrivialKill(I->getOperand(1));
Register ResultReg =
- fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill,
- CI->getZExtValue(), VT.getSimpleVT());
+ fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, CI->getZExtValue(),
+ VT.getSimpleVT());
if (!ResultReg)
return false;
@@ -529,7 +491,6 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
Register Op0 = getRegForValue(I->getOperand(0));
if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
- bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// Check if the second operand is a constant and handle it appropriately.
if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -549,8 +510,8 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
ISDOpcode = ISD::AND;
}
- Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
- Op0IsKill, Imm, VT.getSimpleVT());
+ Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Imm,
+ VT.getSimpleVT());
if (!ResultReg)
return false;
@@ -562,11 +523,10 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
Register Op1 = getRegForValue(I->getOperand(1));
if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
return false;
- bool Op1IsKill = hasTrivialKill(I->getOperand(1));
// Now we have both operands in registers. Emit the instruction.
Register ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill);
+ ISDOpcode, Op0, Op1);
if (!ResultReg)
// Target-specific code wasn't able to find a machine opcode for
// the given ISD opcode and type. Halt "fast" selection and bail.
@@ -587,8 +547,6 @@ bool FastISel::selectGetElementPtr(const User *I) {
if (isa<VectorType>(I->getType()))
return false;
- bool NIsKill = hasTrivialKill(I->getOperand(0));
-
// Keep a running tab of the total offset to coalesce multiple N = N + Offset
// into a single N = N + TotalOffset.
uint64_t TotalOffs = 0;
@@ -604,10 +562,9 @@ bool FastISel::selectGetElementPtr(const User *I) {
// N = N + Offset
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
if (TotalOffs >= MaxOffs) {
- N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
- NIsKill = true;
TotalOffs = 0;
}
}
@@ -622,43 +579,38 @@ bool FastISel::selectGetElementPtr(const User *I) {
uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
if (TotalOffs >= MaxOffs) {
- N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
- NIsKill = true;
TotalOffs = 0;
}
continue;
}
if (TotalOffs) {
- N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
- NIsKill = true;
TotalOffs = 0;
}
// N = N + Idx * ElementSize;
uint64_t ElementSize = DL.getTypeAllocSize(Ty);
- std::pair<Register, bool> Pair = getRegForGEPIndex(Idx);
- Register IdxN = Pair.first;
- bool IdxNIsKill = Pair.second;
+ Register IdxN = getRegForGEPIndex(Idx);
if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
return false;
if (ElementSize != 1) {
- IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+ IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT);
if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
return false;
- IdxNIsKill = true;
}
- N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
}
}
if (TotalOffs) {
- N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
}
@@ -1081,9 +1033,9 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
for (auto &Arg : CLI.getArgs()) {
Type *FinalType = Arg.Ty;
if (Arg.IsByVal)
- FinalType = cast<PointerType>(Arg.Ty)->getElementType();
+ FinalType = Arg.IndirectType;
bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
- FinalType, CLI.CallConv, CLI.IsVarArg);
+ FinalType, CLI.CallConv, CLI.IsVarArg, DL);
ISD::ArgFlagsTy Flags;
if (Arg.IsZExt)
@@ -1096,6 +1048,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
Flags.setSRet();
if (Arg.IsSwiftSelf)
Flags.setSwiftSelf();
+ if (Arg.IsSwiftAsync)
+ Flags.setSwiftAsync();
if (Arg.IsSwiftError)
Flags.setSwiftError();
if (Arg.IsCFGuardTarget)
@@ -1120,26 +1074,24 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
// preallocated handling in the various CC lowering callbacks.
Flags.setByVal();
}
+ MaybeAlign MemAlign = Arg.Alignment;
if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) {
- PointerType *Ty = cast<PointerType>(Arg.Ty);
- Type *ElementTy = Ty->getElementType();
- unsigned FrameSize =
- DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy);
+ unsigned FrameSize = DL.getTypeAllocSize(Arg.IndirectType);
// For ByVal, alignment should come from FE. BE will guess if this info
// is not there, but there are cases it cannot get right.
- MaybeAlign FrameAlign = Arg.Alignment;
- if (!FrameAlign)
- FrameAlign = Align(TLI.getByValTypeAlignment(ElementTy, DL));
+ if (!MemAlign)
+ MemAlign = Align(TLI.getByValTypeAlignment(Arg.IndirectType, DL));
Flags.setByValSize(FrameSize);
- Flags.setByValAlign(*FrameAlign);
+ } else if (!MemAlign) {
+ MemAlign = DL.getABITypeAlign(Arg.Ty);
}
+ Flags.setMemAlign(*MemAlign);
if (Arg.IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
-
CLI.OutVals.push_back(Arg.Val);
CLI.OutFlags.push_back(Flags);
}
@@ -1192,7 +1144,7 @@ bool FastISel::lowerCall(const CallInst *CI) {
IsTailCall = false;
if (IsTailCall && MF->getFunction()
.getFnAttribute("disable-tail-calls")
- .getValueAsString() == "true")
+ .getValueAsBool())
IsTailCall = false;
CallLoweringInfo CLI;
@@ -1304,9 +1256,21 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
"Expected inlined-at fields to agree");
// A dbg.declare describes the address of a source variable, so lower it
// into an indirect DBG_VALUE.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
- *Op, DI->getVariable(), DI->getExpression());
+ auto Builder =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op,
+ DI->getVariable(), DI->getExpression());
+
+ // If using instruction referencing, mutate this into a DBG_INSTR_REF,
+ // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
+ // the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
+ if (TM.Options.ValueTrackingVariableLocations && Op->isReg()) {
+ Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
+ Builder->getOperand(1).ChangeToImmediate(0);
+ auto *NewExpr =
+ DIExpression::prepend(DI->getExpression(), DIExpression::DerefBefore);
+ Builder->getOperand(3).setMetadata(NewExpr);
+ }
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
@@ -1322,9 +1286,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const Value *V = DI->getValue();
assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
"Expected inlined-at fields to agree");
- if (!V || isa<UndefValue>(V)) {
- // Currently the optimizer can produce this; insert an undef to
- // help debugging.
+ if (!V || isa<UndefValue>(V) || DI->hasArgList()) {
+ // DI is either undef or cannot produce a valid DBG_VALUE, so produce an
+ // undef DBG_VALUE to terminate any prior location.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
@@ -1349,8 +1313,16 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
} else if (Register Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
bool IsIndirect = false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
- DI->getVariable(), DI->getExpression());
+ auto Builder =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
+ DI->getVariable(), DI->getExpression());
+
+ // If using instruction referencing, mutate this into a DBG_INSTR_REF,
+ // to be later patched up by finalizeDebugInstrRefs.
+ if (TM.Options.ValueTrackingVariableLocations) {
+ Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
+ Builder->getOperand(1).ChangeToImmediate(0);
+ }
} else {
// We don't know how to handle other cases, so we drop.
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
@@ -1421,10 +1393,8 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
- bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
-
Register ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
- Opcode, InputReg, InputRegIsKill);
+ Opcode, InputReg);
if (!ResultReg)
return false;
@@ -1455,7 +1425,6 @@ bool FastISel::selectBitCast(const User *I) {
Register Op0 = getRegForValue(I->getOperand(0));
if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
- bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// First, try to perform the bitcast by inserting a reg-reg copy.
Register ResultReg;
@@ -1472,7 +1441,7 @@ bool FastISel::selectBitCast(const User *I) {
// If the reg-reg copy failed, select a BITCAST opcode.
if (!ResultReg)
- ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
+ ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0);
if (!ResultReg)
return false;
@@ -1648,12 +1617,11 @@ bool FastISel::selectFNeg(const User *I, const Value *In) {
Register OpReg = getRegForValue(In);
if (!OpReg)
return false;
- bool OpRegIsKill = hasTrivialKill(In);
// If the target has ISD::FNEG, use it.
EVT VT = TLI.getValueType(DL, I->getType());
Register ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
- OpReg, OpRegIsKill);
+ OpReg);
if (ResultReg) {
updateValueMap(I, ResultReg);
return true;
@@ -1668,18 +1636,18 @@ bool FastISel::selectFNeg(const User *I, const Value *In) {
return false;
Register IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
- ISD::BITCAST, OpReg, OpRegIsKill);
+ ISD::BITCAST, OpReg);
if (!IntReg)
return false;
Register IntResultReg = fastEmit_ri_(
- IntVT.getSimpleVT(), ISD::XOR, IntReg, /*Op0IsKill=*/true,
+ IntVT.getSimpleVT(), ISD::XOR, IntReg,
UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
if (!IntResultReg)
return false;
ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST,
- IntResultReg, /*Op0IsKill=*/true);
+ IntResultReg);
if (!ResultReg)
return false;
@@ -1879,14 +1847,12 @@ bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) {
unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; }
-unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/,
- bool /*Op0IsKill*/) {
+unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/) {
return 0;
}
unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/,
- bool /*Op0IsKill*/, unsigned /*Op1*/,
- bool /*Op1IsKill*/) {
+ unsigned /*Op1*/) {
return 0;
}
@@ -1900,7 +1866,7 @@ unsigned FastISel::fastEmit_f(MVT, MVT, unsigned,
}
unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
- bool /*Op0IsKill*/, uint64_t /*Imm*/) {
+ uint64_t /*Imm*/) {
return 0;
}
@@ -1909,7 +1875,7 @@ unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
/// If that fails, it materializes the immediate into a register and try
/// fastEmit_rr instead.
Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
- bool Op0IsKill, uint64_t Imm, MVT ImmType) {
+ uint64_t Imm, MVT ImmType) {
// If this is a multiply by a power of two, emit this as a shift left.
if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
Opcode = ISD::SHL;
@@ -1927,11 +1893,10 @@ Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
return 0;
// First check if immediate type is legal. If not, we can't use the ri form.
- Register ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ Register ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Imm);
if (ResultReg)
return ResultReg;
Register MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
- bool IsImmKill = true;
if (!MaterialReg) {
// This is a bit ugly/slow, but failing here means falling out of
// fast-isel, which would be very slow.
@@ -1940,15 +1905,8 @@ Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
if (!MaterialReg)
return 0;
- // FIXME: If the materialized register here has no uses yet then this
- // will be the first use and we should be able to mark it as killed.
- // However, the local value area for materialising constant expressions
- // grows down, not up, which means that any constant expressions we generate
- // later which also use 'Imm' could be after this instruction and therefore
- // after this kill.
- IsImmKill = false;
}
- return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill);
+ return fastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
}
Register FastISel::createResultReg(const TargetRegisterClass *RC) {
@@ -1982,8 +1940,7 @@ Register FastISel::fastEmitInst_(unsigned MachineInstOpcode,
}
Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill) {
+ const TargetRegisterClass *RC, unsigned Op0) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
Register ResultReg = createResultReg(RC);
@@ -1991,10 +1948,10 @@ Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill));
+ .addReg(Op0);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill));
+ .addReg(Op0);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
@@ -2004,8 +1961,7 @@ Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill, unsigned Op1,
- bool Op1IsKill) {
+ unsigned Op1) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
Register ResultReg = createResultReg(RC);
@@ -2014,12 +1970,12 @@ Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill));
+ .addReg(Op0)
+ .addReg(Op1);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill));
+ .addReg(Op0)
+ .addReg(Op1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
@@ -2028,9 +1984,7 @@ Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill, unsigned Op1,
- bool Op1IsKill, unsigned Op2,
- bool Op2IsKill) {
+ unsigned Op1, unsigned Op2) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
Register ResultReg = createResultReg(RC);
@@ -2040,14 +1994,14 @@ Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill))
- .addReg(Op2, getKillRegState(Op2IsKill));
+ .addReg(Op0)
+ .addReg(Op1)
+ .addReg(Op2);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill))
- .addReg(Op2, getKillRegState(Op2IsKill));
+ .addReg(Op0)
+ .addReg(Op1)
+ .addReg(Op2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
@@ -2056,7 +2010,7 @@ Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill, uint64_t Imm) {
+ uint64_t Imm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
Register ResultReg = createResultReg(RC);
@@ -2064,11 +2018,11 @@ Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op0)
.addImm(Imm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op0)
.addImm(Imm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
@@ -2078,8 +2032,7 @@ Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill, uint64_t Imm1,
- uint64_t Imm2) {
+ uint64_t Imm1, uint64_t Imm2) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
Register ResultReg = createResultReg(RC);
@@ -2087,12 +2040,12 @@ Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op0)
.addImm(Imm1)
.addImm(Imm2);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op0)
.addImm(Imm1)
.addImm(Imm2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -2122,8 +2075,7 @@ Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill, unsigned Op1,
- bool Op1IsKill, uint64_t Imm) {
+ unsigned Op1, uint64_t Imm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
Register ResultReg = createResultReg(RC);
@@ -2132,13 +2084,13 @@ Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op0)
+ .addReg(Op1)
.addImm(Imm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op0)
+ .addReg(Op1)
.addImm(Imm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
@@ -2163,21 +2115,21 @@ Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
}
Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
- bool Op0IsKill, uint32_t Idx) {
+ uint32_t Idx) {
Register ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
assert(Register::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx);
+ ResultReg).addReg(Op0, 0, Idx);
return ResultReg;
}
/// Emit MachineInstrs to compute the value of Op with all but the least
/// significant bit set to zero.
-Register FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
- return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+Register FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0) {
+ return fastEmit_ri(VT, VT, ISD::AND, Op0, 1);
}
/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 32a4f60df097..85c6eca5775e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -192,10 +192,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
MF->getFrameInfo().CreateVariableSizedObject(
Alignment <= StackAlign ? Align(1) : Alignment, AI);
}
- }
-
- // Look for inline asm that clobbers the SP register.
- if (auto *Call = dyn_cast<CallBase>(&I)) {
+ } else if (auto *Call = dyn_cast<CallBase>(&I)) {
+ // Look for inline asm that clobbers the SP register.
if (Call->isInlineAsm()) {
Register SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -214,21 +212,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
}
- }
-
- // Look for calls to the @llvm.va_start intrinsic. We can omit some
- // prologue boilerplate for variadic functions that don't examine their
- // arguments.
- if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
- if (II->getIntrinsicID() == Intrinsic::vastart)
- MF->getFrameInfo().setHasVAStart(true);
- }
+ // Look for calls to the @llvm.va_start intrinsic. We can omit some
+ // prologue boilerplate for variadic functions that don't examine their
+ // arguments.
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ MF->getFrameInfo().setHasVAStart(true);
+ }
- // If we have a musttail call in a variadic function, we need to ensure we
- // forward implicit register parameters.
- if (const auto *CI = dyn_cast<CallInst>(&I)) {
- if (CI->isMustTailCall() && Fn->isVarArg())
- MF->getFrameInfo().setHasMustTailInVarArgFunc(true);
+ // If we have a musttail call in a variadic function, we need to ensure
+ // we forward implicit register parameters.
+ if (const auto *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->isMustTailCall() && Fn->isVarArg())
+ MF->getFrameInfo().setHasMustTailInVarArgFunc(true);
+ }
}
// Mark values used outside their block as exported, by allocating
@@ -333,14 +330,23 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
else if (Personality == EHPersonality::Wasm_CXX) {
WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
- // Map all BB references in the WinEH data to MBBs.
- DenseMap<BBOrMBB, BBOrMBB> NewMap;
- for (auto &KV : EHInfo.EHPadUnwindMap) {
+ // Map all BB references in the Wasm EH data to MBBs.
+ DenseMap<BBOrMBB, BBOrMBB> SrcToUnwindDest;
+ for (auto &KV : EHInfo.SrcToUnwindDest) {
const auto *Src = KV.first.get<const BasicBlock *>();
- const auto *Dst = KV.second.get<const BasicBlock *>();
- NewMap[MBBMap[Src]] = MBBMap[Dst];
+ const auto *Dest = KV.second.get<const BasicBlock *>();
+ SrcToUnwindDest[MBBMap[Src]] = MBBMap[Dest];
+ }
+ EHInfo.SrcToUnwindDest = std::move(SrcToUnwindDest);
+ DenseMap<BBOrMBB, SmallPtrSet<BBOrMBB, 4>> UnwindDestToSrcs;
+ for (auto &KV : EHInfo.UnwindDestToSrcs) {
+ const auto *Dest = KV.first.get<const BasicBlock *>();
+ UnwindDestToSrcs[MBBMap[Dest]] = SmallPtrSet<BBOrMBB, 4>();
+ for (const auto P : KV.second)
+ UnwindDestToSrcs[MBBMap[Dest]].insert(
+ MBBMap[P.get<const BasicBlock *>()]);
}
- EHInfo.EHPadUnwindMap = std::move(NewMap);
+ EHInfo.UnwindDestToSrcs = std::move(UnwindDestToSrcs);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a5978711b871..348fad6daf8f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -166,9 +166,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
assert(TRI->isTypeLegalForClass(*UseRC, VT) &&
"Incompatible phys register def and uses!");
DstRC = UseRC;
- } else {
- DstRC = TLI->getRegClassFor(VT, Node->isDivergent());
- }
+ } else
+ DstRC = SrcRC;
// If all uses are reading from the src physical register and copying the
// register is either impossible or very expensive, then don't create a copy.
@@ -684,144 +683,213 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
SD->setIsEmitted();
- if (SD->isInvalidated()) {
- // An invalidated SDNode must generate an undef DBG_VALUE: although the
- // original value is no longer computed, earlier DBG_VALUEs live ranges
- // must not leak into later code.
- auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
- MIB.addReg(0U);
- MIB.addReg(0U, RegState::Debug);
+ ArrayRef<SDDbgOperand> LocationOps = SD->getLocationOps();
+ assert(!LocationOps.empty() && "dbg_value with no location operands?");
+
+ if (SD->isInvalidated())
+ return EmitDbgNoLocation(SD);
+
+ // Emit variadic dbg_value nodes as DBG_VALUE_LIST.
+ if (SD->isVariadic()) {
+ // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)*
+ const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST);
+ // Build the DBG_VALUE_LIST instruction base.
+ auto MIB = BuildMI(*MF, DL, DbgValDesc);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
+ AddDbgValueLocationOps(MIB, DbgValDesc, LocationOps, VRBaseMap);
return &*MIB;
}
// Attempt to produce a DBG_INSTR_REF if we've been asked to.
+ // We currently exclude the possibility of instruction references for
+ // variadic nodes; if at some point we enable them, this should be moved
+ // above the variadic block.
if (EmitDebugInstrRefs)
if (auto *InstrRef = EmitDbgInstrRef(SD, VRBaseMap))
return InstrRef;
- if (SD->getKind() == SDDbgValue::FRAMEIX) {
- // Stack address; this needs to be lowered in target-dependent fashion.
- // EmitTargetCodeForFrameDebugValue is responsible for allocation.
- auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(SD->getFrameIx());
- if (SD->isIndirect())
- // Push [fi + 0] onto the DIExpression stack.
- FrameMI.addImm(0);
- else
- // Push fi onto the DIExpression stack.
- FrameMI.addReg(0);
- return FrameMI.addMetadata(Var).addMetadata(Expr);
- }
- // Otherwise, we're going to create an instruction here.
- const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
- MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
- if (SD->getKind() == SDDbgValue::SDNODE) {
- SDNode *Node = SD->getSDNode();
- SDValue Op = SDValue(Node, SD->getResNo());
- // It's possible we replaced this SDNode with other(s) and therefore
- // didn't generate code for it. It's better to catch these cases where
- // they happen and transfer the debug info, but trying to guarantee that
- // in all cases would be very fragile; this is a safeguard for any
- // that were missed.
- DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
- if (I==VRBaseMap.end())
- MIB.addReg(0U); // undef
- else
- AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
- /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
- } else if (SD->getKind() == SDDbgValue::VREG) {
- MIB.addReg(SD->getVReg(), RegState::Debug);
- } else if (SD->getKind() == SDDbgValue::CONST) {
- const Value *V = SD->getConst();
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- if (CI->getBitWidth() > 64)
- MIB.addCImm(CI);
+ return EmitDbgValueFromSingleOp(SD, VRBaseMap);
+}
+
+void InstrEmitter::AddDbgValueLocationOps(
+ MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc,
+ ArrayRef<SDDbgOperand> LocationOps,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ for (const SDDbgOperand &Op : LocationOps) {
+ switch (Op.getKind()) {
+ case SDDbgOperand::FRAMEIX:
+ MIB.addFrameIndex(Op.getFrameIx());
+ break;
+ case SDDbgOperand::VREG:
+ MIB.addReg(Op.getVReg(), RegState::Debug);
+ break;
+ case SDDbgOperand::SDNODE: {
+ SDValue V = SDValue(Op.getSDNode(), Op.getResNo());
+ // It's possible we replaced this SDNode with other(s) and therefore
+ // didn't generate code for it. It's better to catch these cases where
+ // they happen and transfer the debug info, but trying to guarantee that
+ // in all cases would be very fragile; this is a safeguard for any
+ // that were missed.
+ if (VRBaseMap.count(V) == 0)
+ MIB.addReg(0U); // undef
else
- MIB.addImm(CI->getSExtValue());
- } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- MIB.addFPImm(CF);
- } else if (isa<ConstantPointerNull>(V)) {
- // Note: This assumes that all nullptr constants are zero-valued.
- MIB.addImm(0);
- } else {
- // Could be an Undef. In any case insert an Undef so we can see what we
- // dropped.
- MIB.addReg(0U);
+ AddOperand(MIB, V, (*MIB).getNumOperands(), &DbgValDesc, VRBaseMap,
+ /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+ } break;
+ case SDDbgOperand::CONST: {
+ const Value *V = Op.getConst();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
+ else
+ MIB.addImm(CI->getSExtValue());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ MIB.addFPImm(CF);
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Note: This assumes that all nullptr constants are zero-valued.
+ MIB.addImm(0);
+ } else {
+ // Could be an Undef. In any case insert an Undef so we can see what we
+ // dropped.
+ MIB.addReg(0U);
+ }
+ } break;
}
- } else {
- // Insert an Undef so we can see what we dropped.
- MIB.addReg(0U);
}
-
- // Indirect addressing is indicated by an Imm as the second parameter.
- if (SD->isIndirect())
- MIB.addImm(0U);
- else
- MIB.addReg(0U, RegState::Debug);
-
- MIB.addMetadata(Var);
- MIB.addMetadata(Expr);
-
- return &*MIB;
}
MachineInstr *
InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap) {
- // Instruction referencing is still in a prototype state: for now we're only
- // going to support SDNodes within a block. Copies are not supported, they
- // don't actually define a value.
- if (SD->getKind() != SDDbgValue::SDNODE)
- return nullptr;
-
- SDNode *Node = SD->getSDNode();
- SDValue Op = SDValue(Node, SD->getResNo());
- DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
- if (I==VRBaseMap.end())
- return nullptr; // undef value: let EmitDbgValue produce a DBG_VALUE $noreg.
-
+ assert(!SD->isVariadic());
+ SDDbgOperand DbgOperand = SD->getLocationOps()[0];
MDNode *Var = SD->getVariable();
MDNode *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
+ const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
+
+ // Handle variable locations that don't actually depend on the instructions
+ // in the program: constants and stack locations.
+ if (DbgOperand.getKind() == SDDbgOperand::FRAMEIX ||
+ DbgOperand.getKind() == SDDbgOperand::CONST)
+ return EmitDbgValueFromSingleOp(SD, VRBaseMap);
+
+ // It may not be immediately possible to identify the MachineInstr that
+ // defines a VReg, it can depend for example on the order blocks are
+ // emitted in. When this happens, or when further analysis is needed later,
+ // produce an instruction like this:
+ //
+ // DBG_INSTR_REF %0:gr64, 0, !123, !456
+ //
+ // i.e., point the instruction at the vreg, and patch it up later in
+ // MachineFunction::finalizeDebugInstrRefs.
+ auto EmitHalfDoneInstrRef = [&](unsigned VReg) -> MachineInstr * {
+ auto MIB = BuildMI(*MF, DL, RefII);
+ MIB.addReg(VReg);
+ MIB.addImm(0);
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
+ return MIB;
+ };
- // Try to pick out a defining instruction at this point.
- unsigned VReg = getVR(Op, VRBaseMap);
- MachineInstr *ResultInstr = nullptr;
+ // Try to find both the defined register and the instruction defining it.
+ MachineInstr *DefMI = nullptr;
+ unsigned VReg;
- // No definition corresponds to scenarios where a vreg is live-in to a block,
- // and doesn't have a defining instruction (yet). This can be patched up
- // later; at this early stage of implementation, fall back to using DBG_VALUE.
- if (!MRI->hasOneDef(VReg))
- return nullptr;
+ if (DbgOperand.getKind() == SDDbgOperand::VREG) {
+ VReg = DbgOperand.getVReg();
- MachineInstr &DefMI = *MRI->def_instr_begin(VReg);
- // Some target specific opcodes can become copies. As stated above, we're
- // ignoring those for now.
- if (DefMI.isCopy() || DefMI.getOpcode() == TargetOpcode::SUBREG_TO_REG)
- return nullptr;
+ // No definition means that block hasn't been emitted yet. Leave a vreg
+ // reference to be fixed later.
+ if (!MRI->hasOneDef(VReg))
+ return EmitHalfDoneInstrRef(VReg);
+
+ DefMI = &*MRI->def_instr_begin(VReg);
+ } else {
+ assert(DbgOperand.getKind() == SDDbgOperand::SDNODE);
+ // Look up the corresponding VReg for the given SDNode, if any.
+ SDNode *Node = DbgOperand.getSDNode();
+ SDValue Op = SDValue(Node, DbgOperand.getResNo());
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
+ // No VReg -> produce a DBG_VALUE $noreg instead.
+ if (I==VRBaseMap.end())
+ return EmitDbgNoLocation(SD);
+
+ // Try to pick out a defining instruction at this point.
+ VReg = getVR(Op, VRBaseMap);
+
+ // Again, if there's no instruction defining the VReg right now, fix it up
+ // later.
+ if (!MRI->hasOneDef(VReg))
+ return EmitHalfDoneInstrRef(VReg);
+
+ DefMI = &*MRI->def_instr_begin(VReg);
+ }
+
+ // Avoid copy like instructions: they don't define values, only move them.
+ // Leave a virtual-register reference until it can be fixed up later, to find
+ // the underlying value definition.
+ if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI))
+ return EmitHalfDoneInstrRef(VReg);
- const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
auto MIB = BuildMI(*MF, DL, RefII);
- // Find the operand which defines the specified VReg.
+ // Find the operand number which defines the specified VReg.
unsigned OperandIdx = 0;
- for (const auto &MO : DefMI.operands()) {
+ for (const auto &MO : DefMI->operands()) {
if (MO.isReg() && MO.isDef() && MO.getReg() == VReg)
break;
++OperandIdx;
}
- assert(OperandIdx < DefMI.getNumOperands());
+ assert(OperandIdx < DefMI->getNumOperands());
// Make the DBG_INSTR_REF refer to that instruction, and that operand.
- unsigned InstrNum = DefMI.getDebugInstrNum();
+ unsigned InstrNum = DefMI->getDebugInstrNum();
MIB.addImm(InstrNum);
MIB.addImm(OperandIdx);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
- ResultInstr = &*MIB;
- return ResultInstr;
+ return &*MIB;
+}
+
+MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) {
+ // An invalidated SDNode must generate an undef DBG_VALUE: although the
+ // original value is no longer computed, earlier DBG_VALUEs live ranges
+ // must not leak into later code.
+ MDNode *Var = SD->getVariable();
+ MDNode *Expr = SD->getExpression();
+ DebugLoc DL = SD->getDebugLoc();
+ auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
+ MIB.addReg(0U);
+ MIB.addReg(0U, RegState::Debug);
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
+ return &*MIB;
+}
+
+MachineInstr *
+InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ MDNode *Var = SD->getVariable();
+ MDNode *Expr = SD->getExpression();
+ DebugLoc DL = SD->getDebugLoc();
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+
+ assert(SD->getLocationOps().size() == 1 &&
+ "Non variadic dbg_value should have only one location op");
+
+ // Emit non-variadic dbg_value nodes as DBG_VALUE.
+ // DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr
+ auto MIB = BuildMI(*MF, DL, II);
+ AddDbgValueLocationOps(MIB, II, SD->getLocationOps(), VRBaseMap);
+
+ if (SD->isIndirect())
+ MIB.addImm(0U);
+ else
+ MIB.addReg(0U, RegState::Debug);
+
+ return MIB.addMetadata(Var).addMetadata(Expr);
}
MachineInstr *
@@ -1116,10 +1184,10 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::LIFETIME_START:
case ISD::LIFETIME_END: {
- unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ?
- TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END;
-
- FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1));
+ unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START)
+ ? TargetOpcode::LIFETIME_START
+ : TargetOpcode::LIFETIME_END;
+ auto *FI = cast<FrameIndexSDNode>(Node->getOperand(1));
BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
.addFrameIndex(FI->getIndex());
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 09658b8143fe..ac8a70156522 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -25,6 +25,7 @@ class MachineInstrBuilder;
class MCInstrDesc;
class SDDbgLabel;
class SDDbgValue;
+class SDDbgOperand;
class TargetLowering;
class TargetMachine;
@@ -108,16 +109,29 @@ public:
/// (which do not go into the machine instrs.)
static unsigned CountResults(SDNode *Node);
+ void AddDbgValueLocationOps(MachineInstrBuilder &MIB,
+ const MCInstrDesc &DbgValDesc,
+ ArrayRef<SDDbgOperand> Locations,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
/// EmitDbgValue - Generate machine instruction for a dbg_value node.
///
MachineInstr *EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap);
- /// Attempt to emit a dbg_value as a DBG_INSTR_REF. May fail and return
- /// nullptr, in which case we fall back to plain EmitDbgValue.
+ /// Emit a dbg_value as a DBG_INSTR_REF. May produce DBG_VALUE $noreg instead
+ /// if there is no variable location; alternately a half-formed DBG_INSTR_REF
+ /// that refers to a virtual register and is corrected later in isel.
MachineInstr *EmitDbgInstrRef(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap);
+ /// Emit a DBG_VALUE $noreg, indicating a variable has no location.
+ MachineInstr *EmitDbgNoLocation(SDDbgValue *SD);
+
+ /// Emit a DBG_VALUE from the operands to SDDbgValue.
+ MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
/// Generate machine instruction for a dbg_label node.
MachineInstr *EmitDbgLabel(SDDbgLabel *SD);
@@ -148,7 +162,6 @@ private:
void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
DenseMap<SDValue, Register> &VRBaseMap);
};
-
-}
+} // namespace llvm
#endif
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 62d7191036ca..d92b23f56e4d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -133,12 +133,10 @@ private:
SDValue N1, SDValue N2,
ArrayRef<int> Mask) const;
- bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
- bool &NeedInvert, const SDLoc &dl, SDValue &Chain,
- bool IsSignaling = false);
-
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall LC,
+ SmallVectorImpl<SDValue> &Results);
void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_F128,
@@ -181,8 +179,6 @@ private:
SmallVectorImpl<SDValue> &Results);
SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl);
- SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
- SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
SDValue ExpandPARITY(SDValue Op, const SDLoc &dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
@@ -1261,6 +1257,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
if (Node->getNumValues() == 1) {
+ // Verify the new types match the original. Glue is waived because
+ // ISD::ADDC can be legalized by replacing Glue with an integer type.
+ assert((Res.getValueType() == Node->getValueType(0) ||
+ Node->getValueType(0) == MVT::Glue) &&
+ "Type mismatch for custom legalized operation");
LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
// We can just directly replace this node with the lowered value.
ReplaceNode(SDValue(Node, 0), Res);
@@ -1268,8 +1269,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
SmallVector<SDValue, 8> ResultVals;
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+ // Verify the new types match the original. Glue is waived because
+ // ISD::ADDC can be legalized by replacing Glue with an integer type.
+ assert((Res->getValueType(i) == Node->getValueType(i) ||
+ Node->getValueType(i) == MVT::Glue) &&
+ "Type mismatch for custom legalized operation");
ResultVals.push_back(Res.getValue(i));
+ }
LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
ReplaceNode(Node, ResultVals.data());
return;
@@ -1363,17 +1370,19 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
MachinePointerInfo());
}
- StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
-
SDValue NewLoad;
- if (Op.getValueType().isVector())
+ if (Op.getValueType().isVector()) {
+ StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT,
+ Op.getValueType(), Idx);
NewLoad =
DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo());
- else
+ } else {
+ StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
MachinePointerInfo(),
VecVT.getVectorElementType());
+ }
// Replace the chain going out of the store, by the one out of the load.
DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
@@ -1398,6 +1407,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
// Store the value to a temporary stack slot, then LOAD the returned part.
EVT VecVT = Vec.getValueType();
+ EVT SubVecVT = Part.getValueType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo PtrInfo =
@@ -1407,7 +1417,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
// Then store the inserted part.
- SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+ SDValue SubStackPtr =
+ TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
// Store the subvector.
Ch = DAG.getStore(
@@ -1676,152 +1687,6 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
Results.push_back(Tmp2);
}
-/// Legalize a SETCC with given LHS and RHS and condition code CC on the current
-/// target.
-///
-/// If the SETCC has been legalized using AND / OR, then the legalized node
-/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
-/// will be set to false.
-///
-/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
-/// then the values of LHS and RHS will be swapped, CC will be set to the
-/// new condition, and NeedInvert will be set to false.
-///
-/// If the SETCC has been legalized using the inverse condcode, then LHS and
-/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
-/// will be set to true. The caller must invert the result of the SETCC with
-/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect
-/// of a true/false result.
-///
-/// \returns true if the SetCC has been legalized, false if it hasn't.
-bool SelectionDAGLegalize::LegalizeSetCCCondCode(
- EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert,
- const SDLoc &dl, SDValue &Chain, bool IsSignaling) {
- MVT OpVT = LHS.getSimpleValueType();
- ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
- NeedInvert = false;
- switch (TLI.getCondCodeAction(CCCode, OpVT)) {
- default: llvm_unreachable("Unknown condition code action!");
- case TargetLowering::Legal:
- // Nothing to do.
- break;
- case TargetLowering::Expand: {
- ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
- if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
- std::swap(LHS, RHS);
- CC = DAG.getCondCode(InvCC);
- return true;
- }
- // Swapping operands didn't work. Try inverting the condition.
- bool NeedSwap = false;
- InvCC = getSetCCInverse(CCCode, OpVT);
- if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
- // If inverting the condition is not enough, try swapping operands
- // on top of it.
- InvCC = ISD::getSetCCSwappedOperands(InvCC);
- NeedSwap = true;
- }
- if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
- CC = DAG.getCondCode(InvCC);
- NeedInvert = true;
- if (NeedSwap)
- std::swap(LHS, RHS);
- return true;
- }
-
- ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
- unsigned Opc = 0;
- switch (CCCode) {
- default: llvm_unreachable("Don't know how to expand this condition!");
- case ISD::SETUO:
- if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
- CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;
- break;
- }
- assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
- "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
- NeedInvert = true;
- LLVM_FALLTHROUGH;
- case ISD::SETO:
- assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
- && "If SETO is expanded, SETOEQ must be legal!");
- CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
- case ISD::SETONE:
- case ISD::SETUEQ:
- // If the SETUO or SETO CC isn't legal, we might be able to use
- // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
- // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
- // the operands.
- CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
- if (!TLI.isCondCodeLegal(CC2, OpVT) &&
- (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
- TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
- CC1 = ISD::SETOGT;
- CC2 = ISD::SETOLT;
- Opc = ISD::OR;
- NeedInvert = ((unsigned)CCCode & 0x8U);
- break;
- }
- LLVM_FALLTHROUGH;
- case ISD::SETOEQ:
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETOLT:
- case ISD::SETOLE:
- case ISD::SETUNE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETULT:
- case ISD::SETULE:
- // If we are floating point, assign and break, otherwise fall through.
- if (!OpVT.isInteger()) {
- // We can use the 4th bit to tell if we are the unordered
- // or ordered version of the opcode.
- CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
- Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
- CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
- break;
- }
- // Fallthrough if we are unsigned integer.
- LLVM_FALLTHROUGH;
- case ISD::SETLE:
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETLT:
- case ISD::SETNE:
- case ISD::SETEQ:
- // If all combinations of inverting the condition and swapping operands
- // didn't work then we have no means to expand the condition.
- llvm_unreachable("Don't know how to expand this condition!");
- }
-
- SDValue SetCC1, SetCC2;
- if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
- // If we aren't the ordered or unorder operation,
- // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
- SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain,
- IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain,
- IsSignaling);
- } else {
- // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
- SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain,
- IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain,
- IsSignaling);
- }
- if (Chain)
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
- SetCC2.getValue(1));
- LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
- RHS = SDValue();
- CC = SDValue();
- return true;
- }
- }
- return false;
-}
-
/// Emit a store/load combination to the stack. This stores
/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
/// a load from the stack slot to DestVT, extending it if needed.
@@ -2176,21 +2041,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
}
void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
- RTLIB::Libcall Call_F32,
- RTLIB::Libcall Call_F64,
- RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128,
+ RTLIB::Libcall LC,
SmallVectorImpl<SDValue> &Results) {
- RTLIB::Libcall LC;
- switch (Node->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unexpected request for libcall!");
- case MVT::f32: LC = Call_F32; break;
- case MVT::f64: LC = Call_F64; break;
- case MVT::f80: LC = Call_F80; break;
- case MVT::f128: LC = Call_F128; break;
- case MVT::ppcf128: LC = Call_PPCF128; break;
- }
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+ llvm_unreachable("Can't create an unknown libcall!");
if (Node->isStrictFPOpcode()) {
EVT RetVT = Node->getValueType(0);
@@ -2209,6 +2063,20 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
}
}
+/// Expand the node to a libcall based on the result type.
+void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results) {
+ RTLIB::Libcall LC = RTLIB::getFPLibCall(Node->getSimpleValueType(0),
+ Call_F32, Call_F64, Call_F80,
+ Call_F128, Call_PPCF128);
+ ExpandFPLibCall(Node, LC, Results);
+}
+
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
@@ -2237,32 +2105,10 @@ void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
RTLIB::Libcall Call_PPCF128,
SmallVectorImpl<SDValue> &Results) {
EVT InVT = Node->getOperand(Node->isStrictFPOpcode() ? 1 : 0).getValueType();
-
- RTLIB::Libcall LC;
- switch (InVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected request for libcall!");
- case MVT::f32: LC = Call_F32; break;
- case MVT::f64: LC = Call_F64; break;
- case MVT::f80: LC = Call_F80; break;
- case MVT::f128: LC = Call_F128; break;
- case MVT::ppcf128: LC = Call_PPCF128; break;
- }
-
- if (Node->isStrictFPOpcode()) {
- EVT RetVT = Node->getValueType(0);
- SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
- TargetLowering::MakeLibCallOptions CallOptions;
- // FIXME: This doesn't support tail calls.
- std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
- Ops, CallOptions,
- SDLoc(Node),
- Node->getOperand(0));
- Results.push_back(Tmp.first);
- Results.push_back(Tmp.second);
- } else {
- SDValue Tmp = ExpandLibCall(LC, Node, false);
- Results.push_back(Tmp);
- }
+ RTLIB::Libcall LC = RTLIB::getFPLibCall(InVT.getSimpleVT(),
+ Call_F32, Call_F64, Call_F80,
+ Call_F128, Call_PPCF128);
+ ExpandFPLibCall(Node, LC, Results);
}
/// Issue libcalls to __{u}divmod to compute div / rem pairs.
@@ -2782,122 +2628,6 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT_SAT(SDNode *Node,
return DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Result);
}
-/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
-SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
- EVT VT = Op.getValueType();
- EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- unsigned Sz = VT.getScalarSizeInBits();
-
- SDValue Tmp, Tmp2, Tmp3;
-
- // If we can, perform BSWAP first and then the mask+swap the i4, then i2
- // and finally the i1 pairs.
- // TODO: We can easily support i4/i2 legal types if any target ever does.
- if (Sz >= 8 && isPowerOf2_32(Sz)) {
- // Create the masks - repeating the pattern every byte.
- APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
- APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
- APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
- APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
- APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
- APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
-
- // BSWAP if the type is wider than a single byte.
- Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
-
- // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
- Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
-
- // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
- Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
-
- // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
- Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- return Tmp;
- }
-
- Tmp = DAG.getConstant(0, dl, VT);
- for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
- if (I < J)
- Tmp2 =
- DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
- else
- Tmp2 =
- DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
-
- APInt Shift(Sz, 1);
- Shift <<= J;
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
- Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
- }
-
- return Tmp;
-}
-
-/// Open code the operations for BSWAP of the specified operation.
-SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
- EVT VT = Op.getValueType();
- EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
- switch (VT.getSimpleVT().getScalarType().SimpleTy) {
- default: llvm_unreachable("Unhandled Expand type in BSWAP!");
- case MVT::i16:
- // Use a rotate by 8. This can be further expanded if necessary.
- return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- case MVT::i32:
- Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
- DAG.getConstant(0xFF0000, dl, VT));
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
- Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
- Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
- return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
- case MVT::i64:
- Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
- Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
- Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
- Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
- Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
- DAG.getConstant(255ULL<<48, dl, VT));
- Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
- DAG.getConstant(255ULL<<40, dl, VT));
- Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
- DAG.getConstant(255ULL<<32, dl, VT));
- Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
- DAG.getConstant(255ULL<<24, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
- DAG.getConstant(255ULL<<16, dl, VT));
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
- DAG.getConstant(255ULL<<8 , dl, VT));
- Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
- Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
- Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
- Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
- Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
- Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
- return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
- }
-}
-
/// Open code the operations for PARITY of the specified operation.
SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
EVT VT = Op.getValueType();
@@ -2946,10 +2676,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
case ISD::BITREVERSE:
- Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
+ if ((Tmp1 = TLI.expandBITREVERSE(Node, DAG)))
+ Results.push_back(Tmp1);
break;
case ISD::BSWAP:
- Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+ if ((Tmp1 = TLI.expandBSWAP(Node, DAG)))
+ Results.push_back(Tmp1);
break;
case ISD::PARITY:
Results.push_back(ExpandPARITY(Node->getOperand(0), dl));
@@ -3324,6 +3056,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
+ case ISD::VECTOR_SPLICE: {
+ Results.push_back(TLI.expandVectorSplice(Node, DAG));
+ break;
+ }
case ISD::EXTRACT_ELEMENT: {
EVT OpTy = Node->getOperand(0).getValueType();
if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
@@ -3830,8 +3566,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp2 = Node->getOperand(1 + Offset);
Tmp3 = Node->getOperand(2 + Offset);
bool Legalized =
- LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3,
- NeedInvert, dl, Chain, IsSignaling);
+ TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3,
+ NeedInvert, dl, Chain, IsSignaling);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
@@ -3926,8 +3662,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
if (!Legalized) {
- Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
- Tmp1, Tmp2, CC, NeedInvert, dl, Chain);
+ Legalized = TLI.LegalizeSetCCCondCode(
+ DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC,
+ NeedInvert, dl, Chain);
assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
@@ -3961,8 +3698,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp4 = Node->getOperand(1); // CC
bool Legalized =
- LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2,
- Tmp3, Tmp4, NeedInvert, dl, Chain);
+ TLI.LegalizeSetCCCondCode(DAG, getSetCCResultType(Tmp2.getValueType()),
+ Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain);
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
@@ -4145,7 +3882,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_CMP_SWAP: {
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- AtomicOrdering Order = cast<AtomicSDNode>(Node)->getOrdering();
+ AtomicOrdering Order = cast<AtomicSDNode>(Node)->getMergedOrdering();
RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT);
EVT RetVT = Node->getValueType(0);
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4299,15 +4036,8 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
case ISD::FPOWI:
case ISD::STRICT_FPOWI: {
- RTLIB::Libcall LC;
- switch (Node->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unexpected request for libcall!");
- case MVT::f32: LC = RTLIB::POWI_F32; break;
- case MVT::f64: LC = RTLIB::POWI_F64; break;
- case MVT::f80: LC = RTLIB::POWI_F80; break;
- case MVT::f128: LC = RTLIB::POWI_F128; break;
- case MVT::ppcf128: LC = RTLIB::POWI_PPCF128; break;
- }
+ RTLIB::Libcall LC = RTLIB::getPOWI(Node->getSimpleValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
if (!TLI.getLibcallName(LC)) {
// Some targets don't have a powi libcall; use pow instead.
SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node),
@@ -4318,9 +4048,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Exponent));
break;
}
- ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128, Results);
+ unsigned Offset = Node->isStrictFPOpcode() ? 1 : 0;
+ bool ExponentHasSizeOfInt =
+ DAG.getLibInfo().getIntSize() ==
+ Node->getOperand(1 + Offset).getValueType().getSizeInBits();
+ if (!ExponentHasSizeOfInt) {
+ // If the exponent does not match with sizeof(int) a libcall to
+ // RTLIB::POWI would use the wrong type for the argument.
+ DAG.getContext()->emitError("POWI exponent does not match sizeof(int)");
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ break;
+ }
+ ExpandFPLibCall(Node, LC, Results);
break;
}
case ISD::FPOW:
@@ -4634,11 +4373,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Node->getOpcode() == ISD::STRICT_FSETCC ||
Node->getOpcode() == ISD::STRICT_FSETCCS)
OVT = Node->getOperand(1).getSimpleValueType();
- if (Node->getOpcode() == ISD::BR_CC)
+ if (Node->getOpcode() == ISD::BR_CC ||
+ Node->getOpcode() == ISD::SELECT_CC)
OVT = Node->getOperand(2).getSimpleValueType();
MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
SDLoc dl(Node);
- SDValue Tmp1, Tmp2, Tmp3;
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
switch (Node->getOpcode()) {
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
@@ -4830,6 +4570,51 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
+ case ISD::VECTOR_SPLICE: {
+ Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ISD::VECTOR_SPLICE, dl, NVT, Tmp1, Tmp2,
+ Node->getOperand(2));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp3));
+ break;
+ }
+ case ISD::SELECT_CC: {
+ SDValue Cond = Node->getOperand(4);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Cond)->get();
+ // Type of the comparison operands.
+ MVT CVT = Node->getSimpleValueType(0);
+ assert(CVT == OVT && "not handled");
+
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+
+ // Promote the comparison operands, if needed.
+ if (TLI.isCondCodeLegal(CCCode, CVT)) {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ } else {
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ }
+ // Cast the true/false operands.
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ Tmp4 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3));
+
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, NVT, {Tmp1, Tmp2, Tmp3, Tmp4, Cond},
+ Node->getFlags());
+
+ // Cast the result back to the original type.
+ if (ExtOp != ISD::FP_EXTEND)
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1);
+ else
+ Tmp1 = DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp1,
+ DAG.getIntPtrConstant(0, dl));
+
+ Results.push_back(Tmp1);
+ break;
+ }
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 966645e3256d..3553f9ec16c2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -19,6 +19,7 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -26,6 +27,8 @@ using namespace llvm;
#define DEBUG_TYPE "legalize-types"
/// GetFPLibCall - Return the right libcall for the given floating point type.
+/// FIXME: This is a local version of RTLIB::getFPLibCall that should be
+/// refactored away (see RTLIB::getPOWI for an example).
static RTLIB::Libcall GetFPLibCall(EVT VT,
RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64,
@@ -570,14 +573,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
unsigned Offset = IsStrict ? 1 : 0;
- assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 &&
+ assert((N->getOperand(1 + Offset).getValueType() == MVT::i16 ||
+ N->getOperand(1 + Offset).getValueType() == MVT::i32) &&
"Unsupported power type!");
- RTLIB::Libcall LC = GetFPLibCall(N->getValueType(0),
- RTLIB::POWI_F32,
- RTLIB::POWI_F64,
- RTLIB::POWI_F80,
- RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128);
+ RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
if (!TLI.getLibcallName(LC)) {
// Some targets don't have a powi libcall; use pow instead.
// FIXME: Implement this if some target needs it.
@@ -585,6 +585,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
return DAG.getUNDEF(N->getValueType(0));
}
+ if (DAG.getLibInfo().getIntSize() !=
+ N->getOperand(1 + Offset).getValueType().getSizeInBits()) {
+ // If the exponent does not match with sizeof(int) a libcall to RTLIB::POWI
+ // would use the wrong type for the argument.
+ DAG.getContext()->emitError("POWI exponent does not match sizeof(int)");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
N->getOperand(1 + Offset) };
@@ -1515,10 +1523,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
- RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128), Lo, Hi);
+ ExpandFloatRes_Binary(N, RTLIB::getPOWI(N->getValueType(0)), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 4a686bc227de..b8a3dd014901 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -18,6 +18,7 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -96,8 +97,14 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR:
Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_SUBVECTOR:
+ Res = PromoteIntRes_INSERT_SUBVECTOR(N); break;
+ case ISD::VECTOR_REVERSE:
+ Res = PromoteIntRes_VECTOR_REVERSE(N); break;
case ISD::VECTOR_SHUFFLE:
Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+ case ISD::VECTOR_SPLICE:
+ Res = PromoteIntRes_VECTOR_SPLICE(N); break;
case ISD::INSERT_VECTOR_ELT:
Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
case ISD::BUILD_VECTOR:
@@ -106,6 +113,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
case ISD::SPLAT_VECTOR:
Res = PromoteIntRes_SPLAT_VECTOR(N); break;
+ case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
case ISD::CONCAT_VECTORS:
Res = PromoteIntRes_CONCAT_VECTORS(N); break;
@@ -455,6 +463,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
EVT NVT = Op.getValueType();
SDLoc dl(N);
+ // If the larger BSWAP isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type. We only do this for scalars since we have a shuffle
+ // based lowering for vectors in LegalizeVectorOps.
+ if (!OVT.isVector() &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::BSWAP, NVT)) {
+ if (SDValue Res = TLI.expandBSWAP(N, DAG))
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res);
+ }
+
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
@@ -467,6 +485,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
EVT NVT = Op.getValueType();
SDLoc dl(N);
+ // If the larger BITREVERSE isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type. We only do this for scalars since we have a shuffle
+ // based lowering for vectors in LegalizeVectorOps.
+ if (!OVT.isVector() && OVT.isSimple() &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::BITREVERSE, NVT)) {
+ if (SDValue Res = TLI.expandBITREVERSE(N, DAG))
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res);
+ }
+
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT,
@@ -763,6 +791,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
EVT PromotedType = Op1Promoted.getValueType();
unsigned NewBits = PromotedType.getScalarSizeInBits();
+ if (Opcode == ISD::UADDSAT) {
+ APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Add =
+ DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
+ }
+
+ // USUBSAT can always be promoted as long as we have zero-extended the args.
+ if (Opcode == ISD::USUBSAT)
+ return DAG.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted,
+ Op2Promoted);
+
// Shift cannot use a min/max expansion, we can't detect overflow if all of
// the bits have been shifted out.
if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
@@ -773,8 +814,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
case ISD::SSHLSAT:
ShiftOp = ISD::SRA;
break;
- case ISD::UADDSAT:
- case ISD::USUBSAT:
case ISD::USHLSAT:
ShiftOp = ISD::SRL;
break;
@@ -795,32 +834,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
SDValue Result =
DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
- } else {
- if (Opcode == ISD::USUBSAT) {
- SDValue Max =
- DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted);
- return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted);
- }
-
- if (Opcode == ISD::UADDSAT) {
- APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
- SDValue Add =
- DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
- return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
- }
-
- unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
- APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
- APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
- SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
- SDValue Result =
- DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
- Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
- Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
- return Result;
}
+
+ unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
+ APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
+ APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Result =
+ DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
+ Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
+ Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
+ return Result;
}
SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
@@ -1217,17 +1242,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
case TargetLowering::TypeSplitVector: {
EVT InVT = InOp.getValueType();
assert(InVT.isVector() && "Cannot split scalar types");
- unsigned NumElts = InVT.getVectorNumElements();
- assert(NumElts == NVT.getVectorNumElements() &&
+ ElementCount NumElts = InVT.getVectorElementCount();
+ assert(NumElts == NVT.getVectorElementCount() &&
"Dst and Src must have the same number of elements");
- assert(isPowerOf2_32(NumElts) &&
+ assert(isPowerOf2_32(NumElts.getKnownMinValue()) &&
"Promoted vector type must be a power of two");
SDValue EOp1, EOp2;
GetSplitVector(InOp, EOp1, EOp2);
EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
- NumElts/2);
+ NumElts.divideCoefficientBy(2));
EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
@@ -1535,6 +1560,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
+
+ case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -1963,8 +1990,37 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
- SDValue Op = SExtPromotedInteger(N->getOperand(1));
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
+ // FIXME: Support for promotion of STRICT_FPOWI is not implemented yet.
+ assert(N->getOpcode() == ISD::FPOWI && "No STRICT_FPOWI support here yet.");
+
+ // The integer operand is the last operand in FPOWI (so the result and
+ // floating point operand is already type legalized).
+
+ // We can't just promote the exponent type in FPOWI, since we want to lower
+ // the node to a libcall and we if we promote to a type larger than
+ // sizeof(int) the libcall might not be according to the targets ABI. Instead
+ // we rewrite to a libcall here directly, letting makeLibCall handle promotion
+ // if the target accepts it according to shouldSignExtendTypeInLibCall.
+ RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
+ if (!TLI.getLibcallName(LC)) {
+ // Some targets don't have a powi libcall; use pow instead.
+ // FIXME: Implement this if some target needs it.
+ DAG.getContext()->emitError("Don't know how to promote fpowi to fpow");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+ // The exponent should fit in a sizeof(int) type for the libcall to be valid.
+ assert(DAG.getLibInfo().getIntSize() ==
+ N->getOperand(1).getValueType().getSizeInBits() &&
+ "POWI exponent should match with sizeof(int) when doing the libcall.");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops,
+ CallOptions, SDLoc(N), SDValue());
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
}
SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
@@ -2000,6 +2056,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
+ SDValue Op = ZExtPromotedInteger(N->getOperand(1));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
+}
+
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
@@ -2186,6 +2247,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
ExpandIntRes_FunnelShift(N, Lo, Hi);
break;
+
+ case ISD::VSCALE:
+ ExpandIntRes_VSCALE(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2197,7 +2262,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
unsigned Opc = Node->getOpcode();
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- AtomicOrdering order = cast<AtomicSDNode>(Node)->getOrdering();
+ AtomicOrdering order = cast<AtomicSDNode>(Node)->getMergedOrdering();
// Lower to outline atomic libcall if outline atomics enabled,
// or to sync libcall otherwise
RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT);
@@ -3598,18 +3663,16 @@ void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue &Lo, SDValue &Hi) {
+ assert((Node->getOpcode() == ISD::SADDO || Node->getOpcode() == ISD::SSUBO) &&
+ "Node has unexpected Opcode");
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDLoc dl(Node);
SDValue Ovf;
- unsigned CarryOp;
- switch(Node->getOpcode()) {
- default: llvm_unreachable("Node has unexpected Opcode");
- case ISD::SADDO: CarryOp = ISD::SADDO_CARRY; break;
- case ISD::SSUBO: CarryOp = ISD::SSUBO_CARRY; break;
- }
+ bool IsAdd = Node->getOpcode() == ISD::SADDO;
+ unsigned CarryOp = IsAdd ? ISD::SADDO_CARRY : ISD::SSUBO_CARRY;
bool HasCarryOp = TLI.isOperationLegalOrCustom(
CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
@@ -3621,8 +3684,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
GetExpandedInteger(RHS, RHSL, RHSH);
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1));
- Lo = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
- ISD::UADDO : ISD::USUBO, dl, VTList, { LHSL, RHSL });
+ Lo = DAG.getNode(IsAdd ? ISD::UADDO : ISD::USUBO, dl, VTList, {LHSL, RHSL});
Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
Ovf = Hi.getValue(1);
@@ -3636,28 +3698,36 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
// Compute the overflow.
//
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Sum >= 0
+ // LHSSign -> LHS < 0
+ // RHSSign -> RHS < 0
+ // SumSign -> Sum < 0
//
// Add:
// Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
// Sub:
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
//
+ // To get better codegen we can rewrite this by doing bitwise math on
+ // the integers and extract the final sign bit at the end. So the
+ // above becomes:
+ //
+ // Add:
+ // Overflow -> (~(LHS ^ RHS) & (LHS ^ Sum)) < 0
+ // Sub:
+ // Overflow -> ((LHS ^ RHS) & (LHS ^ Sum)) < 0
+ //
+ // NOTE: This is different than the expansion we do in expandSADDSUBO
+ // because it is more costly to determine the RHS is > 0 for SSUBO with the
+ // integers split.
+ EVT VT = LHS.getValueType();
+ SDValue SignsMatch = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ if (IsAdd)
+ SignsMatch = DAG.getNOT(dl, SignsMatch, VT);
+
+ SDValue SumSignNE = DAG.getNode(ISD::XOR, dl, VT, LHS, Sum);
+ Ovf = DAG.getNode(ISD::AND, dl, VT, SignsMatch, SumSignNE);
EVT OType = Node->getValueType(1);
- SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
-
- SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
- SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
- SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
- Node->getOpcode() == ISD::SADDO ?
- ISD::SETEQ : ISD::SETNE);
-
- SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
- SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
-
- Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Ovf = DAG.getSetCC(dl, OType, Ovf, DAG.getConstant(0, dl, VT), ISD::SETLT);
}
// Use the calculated overflow everywhere.
@@ -3909,33 +3979,32 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
// %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
// %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
// %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
- // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh
- // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 )
+ // %4 = add iNh %1.0, %2.0 as iN
+ // %5 = { iNh, i1 } @uadd.with.overflow.iNh(iNh %4, iNh %3.HIGH)
//
- // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 }
+ // %lo = %3.LO
+ // %hi = %5.0
+ // %ovf = %0 || %1.1 || %2.1 || %5.1
SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
- SplitInteger(LHS, LHSLow, LHSHigh);
- SplitInteger(RHS, RHSLow, RHSHigh);
- EVT HalfVT = LHSLow.getValueType()
- , BitVT = N->getValueType(1);
- SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT);
- SDVTList VTFullAddO = DAG.getVTList(VT, BitVT);
+ GetExpandedInteger(LHS, LHSLow, LHSHigh);
+ GetExpandedInteger(RHS, RHSLow, RHSHigh);
+ EVT HalfVT = LHSLow.getValueType();
+ EVT BitVT = N->getValueType(1);
+ SDVTList VTHalfWithO = DAG.getVTList(HalfVT, BitVT);
SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));
- SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow);
+ SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, LHSHigh, RHSLow);
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));
- SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
- One.getValue(0));
- SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow);
+ SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, RHSHigh, LHSLow);
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));
- SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
- Two.getValue(0));
+
+ SDValue HighSum = DAG.getNode(ISD::ADD, dl, HalfVT, One, Two);
// Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
// know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this
@@ -3946,10 +4015,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
- SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh);
- SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four);
- Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1));
- SplitInteger(Five, Lo, Hi);
+ SplitInteger(Three, Lo, Hi);
+
+ Hi = DAG.getNode(ISD::UADDO, dl, VTHalfWithO, Hi, HighSum);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), Overflow);
return;
}
@@ -4133,6 +4202,21 @@ void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
SplitInteger(Res, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ EVT HalfVT =
+ EVT::getIntegerVT(*DAG.getContext(), N->getValueSizeInBits(0) / 2);
+ SDLoc dl(N);
+
+ // We assume VSCALE(1) fits into a legal integer.
+ APInt One(HalfVT.getSizeInBits(), 1);
+ SDValue VScaleBase = DAG.getVScale(dl, HalfVT, One);
+ VScaleBase = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, VScaleBase);
+ SDValue Res = DAG.getNode(ISD::MUL, dl, VT, VScaleBase, N->getOperand(0));
+ SplitInteger(Res, Lo, Hi);
+}
+
//===----------------------------------------------------------------------===//
// Integer Operand Expansion
//===----------------------------------------------------------------------===//
@@ -4163,6 +4247,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break;
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
@@ -4418,6 +4503,14 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
LowCmp.getValue(1), Cond);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_SPLAT_VECTOR(SDNode *N) {
+ // Split the operand and replace with SPLAT_VECTOR_PARTS.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ return DAG.getNode(ISD::SPLAT_VECTOR_PARTS, SDLoc(N), N->getValueType(0), Lo,
+ Hi);
+}
+
SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
// The value being shifted is legal, but the shift amount is too big.
// It follows that either the result of the shift is undefined, or the
@@ -4587,6 +4680,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
return Swap.getValue(1);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getNode(ISD::VECTOR_SPLICE, dl, OutVT, V0, V1, N->getOperand(2));
+}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
@@ -4648,6 +4750,35 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getBuildVector(NOutVT, dl, Ops);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_SUBVECTOR(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ SDLoc dl(N);
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+
+ EVT SubVecVT = SubVec.getValueType();
+ EVT NSubVT =
+ EVT::getVectorVT(*DAG.getContext(), NOutVT.getVectorElementType(),
+ SubVecVT.getVectorElementCount());
+
+ Vec = GetPromotedInteger(Vec);
+ SubVec = DAG.getNode(ISD::ANY_EXTEND, dl, NSubVT, SubVec);
+
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NOutVT, Vec, SubVec, Idx);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getNode(ISD::VECTOR_REVERSE, dl, OutVT, V0);
+}
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
@@ -4725,6 +4856,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+ APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
+ return DAG.getStepVector(dl, NOutVT,
+ StepVal.sext(NOutVT.getScalarSizeInBits()));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
SDLoc dl(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index a59f03854775..05a974af3b55 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -182,9 +182,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
// Checked that NewNodes are only used by other NewNodes.
for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
SDNode *N = NewNodes[i];
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- UI != UE; ++UI)
- assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+ for (SDNode *U : N->uses())
+ assert(U->getNodeId() == NewNode && "NewNode used by non-NewNode!");
}
#endif
}
@@ -396,9 +395,7 @@ NodeDone:
assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
N->setNodeId(Processed);
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
- UI != E; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : N->uses()) {
int NodeId = User->getNodeId();
// This node has two options: it can either be a new node or its Node ID
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 630a0a9adaf7..8d17d8fc68b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -298,10 +298,14 @@ private:
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_INSERT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N);
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_STEP_VECTOR(SDNode *N);
SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
@@ -390,6 +394,7 @@ private:
SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_FPOWI(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
+ SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -460,6 +465,8 @@ private:
void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi);
+
void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi);
bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -478,6 +485,7 @@ private:
SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_RETURNADDR(SDNode *N);
SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
+ SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, const SDLoc &dl);
@@ -832,9 +840,12 @@ private:
void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
+ void SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -920,6 +931,7 @@ private:
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
SDValue WidenVecOp_EXTEND(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 4015a5a0ce70..ebe3bfc4b75a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -138,6 +138,7 @@ class VectorLegalizer {
SDValue ExpandStore(SDNode *N);
SDValue ExpandFNEG(SDNode *Node);
void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -396,7 +397,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SELECT:
case ISD::VSELECT:
case ISD::SELECT_CC:
- case ISD::SETCC:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::TRUNCATE:
@@ -457,6 +457,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::USHLSAT:
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
+ case ISD::MGATHER:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
case ISD::SMULFIX:
@@ -495,6 +496,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(1).getValueType());
break;
+ case ISD::SETCC: {
+ MVT OpVT = Node->getOperand(0).getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal)
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ break;
+ }
}
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
@@ -529,7 +538,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return RecursivelyLegalizeResults(Op, ResultVals);
}
-// FIME: This is very similar to the X86 override of
+// FIXME: This is very similar to the X86 override of
// TargetLowering::LowerOperationWrapper. Can we merge them somehow?
bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
@@ -762,7 +771,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandFSUB(Node, Results);
return;
case ISD::SETCC:
- Results.push_back(UnrollVSETCC(Node));
+ ExpandSETCC(Node, Results);
return;
case ISD::ABS:
if (TLI.expandABS(Node, Tmp, DAG)) {
@@ -916,11 +925,16 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
// AND,OR,XOR, we will have to scalarize the op.
// Notice that the operation may be 'promoted' which means that it is
// 'bitcasted' to another type which is handled.
- // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
+ // Also, we need to be able to construct a splat vector using either
+ // BUILD_VECTOR or SPLAT_VECTOR.
+ // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
+ // BUILD_VECTOR?
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
+ : ISD::SPLAT_VECTOR,
+ VT) == TargetLowering::Expand)
return DAG.UnrollVectorOp(Node);
// Generate a mask operand.
@@ -934,8 +948,11 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
BitTy),
DAG.getConstant(0, DL, BitTy));
- // Broadcast the mask so that the entire vector is all-one or all zero.
- Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
+ // Broadcast the mask so that the entire vector is all one or all zero.
+ if (VT.isFixedLengthVector())
+ Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
+ else
+ Mask = DAG.getSplatVector(MaskTy, DL, Mask);
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
@@ -1163,14 +1180,19 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
// AND,OR,XOR, we will have to scalarize the op.
// Notice that the operation may be 'promoted' which means that it is
// 'bitcasted' to another type which is handled.
- // This operation also isn't safe with AND, OR, XOR when the boolean
- // type is 0/1 as we need an all ones vector constant to mask with.
- // FIXME: Sign extend 1 to all ones if thats legal on the target.
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
- TLI.getBooleanContents(Op1.getValueType()) !=
- TargetLowering::ZeroOrNegativeOneBooleanContent)
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Node);
+
+ // This operation also isn't safe with AND, OR, XOR when the boolean type is
+ // 0/1 and the select operands aren't also booleans, as we need an all-ones
+ // vector constant to mask with.
+ // FIXME: Sign extend 1 to all ones if that's legal on the target.
+ auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
+ if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
+ !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
+ Op1.getValueType().getVectorElementType() == MVT::i1))
return DAG.UnrollVectorOp(Node);
// If the mask and the type are different sizes, unroll the vector op. This
@@ -1331,6 +1353,50 @@ void VectorLegalizer::ExpandFSUB(SDNode *Node,
Results.push_back(Tmp);
}
+void VectorLegalizer::ExpandSETCC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ bool NeedInvert = false;
+ SDLoc dl(Node);
+ MVT OpVT = Node->getOperand(0).getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
+
+ if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
+ Results.push_back(UnrollVSETCC(Node));
+ return;
+ }
+
+ SDValue Chain;
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue CC = Node->getOperand(2);
+ bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS,
+ RHS, CC, NeedInvert, dl, Chain);
+
+ if (Legalized) {
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SETCC node.
+ if (CC.getNode())
+ LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
+ Node->getFlags());
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
+ } else {
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ EVT VT = Node->getValueType(0);
+ LHS =
+ DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
+ DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
+ DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
+ LHS->setFlags(Node->getFlags());
+ }
+
+ Results.push_back(LHS);
+}
+
void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
SDValue Result, Overflow;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 57cb364f1939..91242bbf866f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -90,6 +90,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FREEZE:
+ case ISD::ARITH_FENCE:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
@@ -318,10 +319,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
- EVT NewVT = N->getValueType(0).getVectorElementType();
- SDValue Op = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
- NewVT, Op, N->getOperand(1));
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ // The result needs scalarizing, but it's not a given that the source does.
+ // See similar logic in ScalarizeVecRes_UnaryOp.
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+ return DAG.getNode(ISD::FP_ROUND, DL,
+ N->getValueType(0).getVectorElementType(), Op,
+ N->getOperand(1));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
@@ -917,6 +929,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SCALAR_TO_VECTOR:
SplitVecRes_ScalarOp(N, Lo, Hi);
break;
+ case ISD::STEP_VECTOR:
+ SplitVecRes_STEP_VECTOR(N, Lo, Hi);
+ break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
@@ -930,9 +945,15 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SETCC:
SplitVecRes_SETCC(N, Lo, Hi);
break;
+ case ISD::VECTOR_REVERSE:
+ SplitVecRes_VECTOR_REVERSE(N, Lo, Hi);
+ break;
case ISD::VECTOR_SHUFFLE:
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
+ case ISD::VECTOR_SPLICE:
+ SplitVecRes_VECTOR_SPLICE(N, Lo, Hi);
+ break;
case ISD::VAARG:
SplitVecRes_VAARG(N, Lo, Hi);
break;
@@ -963,6 +984,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FREEZE:
+ case ISD::ARITH_FENCE:
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FP_TO_SINT:
@@ -1243,7 +1265,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
Hi = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
- DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorNumElements(), dl));
+ DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorMinNumElements(), dl));
}
void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
@@ -1255,22 +1277,29 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
GetSplitVector(Vec, Lo, Hi);
EVT VecVT = Vec.getValueType();
- unsigned VecElems = VecVT.getVectorNumElements();
- unsigned SubElems = SubVec.getValueType().getVectorNumElements();
-
- // If we know the index is 0, and we know the subvector doesn't cross the
- // boundary between the halves, we can avoid spilling the vector, and insert
- // into the lower half of the split vector directly.
- // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever
- // there is no boundary crossing. But those cases don't seem to get hit in
- // practice.
+ EVT LoVT = Lo.getValueType();
+ EVT SubVecVT = SubVec.getValueType();
+ unsigned VecElems = VecVT.getVectorMinNumElements();
+ unsigned SubElems = SubVecVT.getVectorMinNumElements();
+ unsigned LoElems = LoVT.getVectorMinNumElements();
+
+ // If we know the index is in the first half, and we know the subvector
+ // doesn't cross the boundary between the halves, we can avoid spilling the
+ // vector, and insert into the lower half of the split vector directly.
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ if (IdxVal + SubElems <= LoElems) {
Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
return;
}
+ // Similarly if the subvector is fully in the high half, but mind that we
+ // can't tell whether a fixed-length subvector is fully within the high half
+ // of a scalable vector.
+ if (VecVT.isScalableVector() == SubVecVT.isScalableVector() &&
+ IdxVal >= LoElems && IdxVal + SubElems <= VecElems) {
+ Hi = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, Hi.getValueType(), Hi, SubVec,
+ DAG.getVectorIdxConstant(IdxVal - LoElems, dl));
+ return;
+ }
// Spill the vector to the stack.
// In cases where the vector is illegal it will be broken down into parts
@@ -1286,7 +1315,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
SmallestAlign);
// Store the new subvector into the specified index.
- SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+ SDValue SubVecPtr =
+ TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
Store = DAG.getStore(Store, dl, SubVec, SubVecPtr,
MachinePointerInfo::getUnknownStack(MF));
@@ -1295,13 +1325,12 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
SmallestAlign);
// Increment the pointer to the other part.
- unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
- StackPtr =
- DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl);
+ auto *Load = cast<LoadSDNode>(Lo);
+ MachinePointerInfo MPI = Load->getPointerInfo();
+ IncrementPointer(Load, LoVT, MPI, StackPtr);
// Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr,
- PtrInfo.getWithOffset(IncrementSize), SmallestAlign);
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign);
}
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -1616,6 +1645,29 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
}
+void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ assert(N->getValueType(0).isScalableVector() &&
+ "Only scalable vectors are supported for STEP_VECTOR");
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ SDValue Step = N->getOperand(0);
+
+ Lo = DAG.getNode(ISD::STEP_VECTOR, dl, LoVT, Step);
+
+ // Hi = Lo + (EltCnt * Step)
+ EVT EltVT = Step.getValueType();
+ APInt StepVal = cast<ConstantSDNode>(Step)->getAPIntValue();
+ SDValue StartOfHi =
+ DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements());
+ StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType());
+ StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi);
+
+ Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step);
+ Hi = DAG.getNode(ISD::ADD, dl, HiVT, Hi, StartOfHi);
+}
+
void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT LoVT, HiVT;
@@ -2802,6 +2854,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
}
+
// Concatenate them to get the full intermediate truncation result.
EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@@ -3097,6 +3150,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ_ZERO_UNDEF:
case ISD::FNEG:
case ISD::FREEZE:
+ case ISD::ARITH_FENCE:
case ISD::FCANONICALIZE:
Res = WidenVecRes_Unary(N);
break;
@@ -3911,7 +3965,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
SDValue InOp = N->getOperand(0);
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
@@ -3926,7 +3979,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
if (IdxVal == 0 && InVT == WidenVT)
return InOp;
+ if (VT.isScalableVector())
+ report_fatal_error("Don't know how to widen the result of "
+ "EXTRACT_SUBVECTOR for scalable vectors");
+
// Check if we can extract from the vector.
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
unsigned InNumElts = InVT.getVectorNumElements();
if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
@@ -4201,6 +4259,12 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
return SDValue();
EVT VSelVT = N->getValueType(0);
+
+ // This method can't handle scalable vector types.
+ // FIXME: This support could be added in the future.
+ if (VSelVT.isScalableVector())
+ return SDValue();
+
// Only handle vector types which are a power of 2.
if (!isPowerOf2_64(VSelVT.getSizeInBits()))
return SDValue();
@@ -4471,6 +4535,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::INSERT_SUBVECTOR: Res = WidenVecOp_INSERT_SUBVECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
@@ -4569,8 +4634,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
EVT InVT = InOp.getValueType();
if (InVT.getSizeInBits() != VT.getSizeInBits()) {
EVT InEltVT = InVT.getVectorElementType();
- for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
- EVT FixedVT = (MVT::SimpleValueType)i;
+ for (EVT FixedVT : MVT::vector_valuetypes()) {
EVT FixedEltVT = FixedVT.getVectorElementType();
if (TLI.isTypeLegal(FixedVT) &&
FixedVT.getSizeInBits() == VT.getSizeInBits() &&
@@ -4785,6 +4849,24 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(VT, dl, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
+ SDValue SubVec = N->getOperand(1);
+ SDValue InVec = N->getOperand(0);
+
+ if (getTypeAction(InVec.getValueType()) == TargetLowering::TypeWidenVector)
+ InVec = GetWidenedVector(InVec);
+
+ if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
+ SubVec = GetWidenedVector(SubVec);
+
+ if (SubVec.getValueType() == InVec.getValueType() && InVec.isUndef() &&
+ N->getConstantOperandVal(2) == 0)
+ return SubVec;
+
+ report_fatal_error("Don't know how to widen the operands for "
+ "INSERT_SUBVECTOR");
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue InOp = GetWidenedVector(N->getOperand(0));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
@@ -5079,14 +5161,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
if (!Scalable && Width == WidenEltWidth)
return RetVT;
- // See if there is larger legal integer than the element type to load/store.
- unsigned VT;
// Don't bother looking for an integer type if the vector is scalable, skip
// to vector types.
if (!Scalable) {
- for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
- VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
- EVT MemVT((MVT::SimpleValueType) VT);
+ // See if there is larger legal integer than the element type to load/store.
+ for (EVT MemVT : reverse(MVT::integer_valuetypes())) {
unsigned MemVTWidth = MemVT.getSizeInBits();
if (MemVT.getSizeInBits() <= WidenEltWidth)
break;
@@ -5107,9 +5186,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
// See if there is a larger vector type to load/store that has the same vector
// element type and is evenly divisible with the WidenVT.
- for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
- VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
- EVT MemVT = (MVT::SimpleValueType) VT;
+ for (EVT MemVT : reverse(MVT::vector_valuetypes())) {
// Skip vector MVTs which don't match the scalable property of WidenVT.
if (Scalable != MemVT.isScalableVector())
continue;
@@ -5492,3 +5569,29 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
Ops[Idx] = FillVal;
return DAG.getBuildVector(NVT, dl, Ops);
}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue InLo, InHi;
+ GetSplitVector(N->getOperand(0), InLo, InHi);
+ SDLoc DL(N);
+
+ Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
+ Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+ SDValue Expanded = TLI.expandVectorSplice(N, DAG);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded,
+ DAG.getVectorIdxConstant(0, DL));
+ Hi =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded,
+ DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 65b9d017fc5c..75b4242a415c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -25,69 +25,155 @@ class SDNode;
class Value;
class raw_ostream;
-/// Holds the information from a dbg_value node through SDISel.
-/// We do not use SDValue here to avoid including its header.
-class SDDbgValue {
+/// Holds the information for a single machine location through SDISel; either
+/// an SDNode, a constant, a stack location, or a virtual register.
+class SDDbgOperand {
public:
- enum DbgValueKind {
- SDNODE = 0, ///< Value is the result of an expression.
- CONST = 1, ///< Value is a constant.
- FRAMEIX = 2, ///< Value is contents of a stack location.
- VREG = 3 ///< Value is a virtual register.
+ enum Kind {
+ SDNODE = 0, ///< Value is the result of an expression.
+ CONST = 1, ///< Value is a constant.
+ FRAMEIX = 2, ///< Value is contents of a stack location.
+ VREG = 3 ///< Value is a virtual register.
};
+ Kind getKind() const { return kind; }
+
+ /// Returns the SDNode* for a register ref
+ SDNode *getSDNode() const {
+ assert(kind == SDNODE);
+ return u.s.Node;
+ }
+
+ /// Returns the ResNo for a register ref
+ unsigned getResNo() const {
+ assert(kind == SDNODE);
+ return u.s.ResNo;
+ }
+
+ /// Returns the Value* for a constant
+ const Value *getConst() const {
+ assert(kind == CONST);
+ return u.Const;
+ }
+
+ /// Returns the FrameIx for a stack object
+ unsigned getFrameIx() const {
+ assert(kind == FRAMEIX);
+ return u.FrameIx;
+ }
+
+ /// Returns the Virtual Register for a VReg
+ unsigned getVReg() const {
+ assert(kind == VREG);
+ return u.VReg;
+ }
+
+ static SDDbgOperand fromNode(SDNode *Node, unsigned ResNo) {
+ return SDDbgOperand(Node, ResNo);
+ }
+ static SDDbgOperand fromFrameIdx(unsigned FrameIdx) {
+ return SDDbgOperand(FrameIdx, FRAMEIX);
+ }
+ static SDDbgOperand fromVReg(unsigned VReg) {
+ return SDDbgOperand(VReg, VREG);
+ }
+ static SDDbgOperand fromConst(const Value *Const) {
+ return SDDbgOperand(Const);
+ }
+
+ bool operator!=(const SDDbgOperand &Other) const { return !(*this == Other); }
+ bool operator==(const SDDbgOperand &Other) const {
+ if (kind != Other.kind)
+ return false;
+ switch (kind) {
+ case SDNODE:
+ return getSDNode() == Other.getSDNode() && getResNo() == Other.getResNo();
+ case CONST:
+ return getConst() == Other.getConst();
+ case VREG:
+ return getVReg() == Other.getVReg();
+ case FRAMEIX:
+ return getFrameIx() == Other.getFrameIx();
+ }
+ return false;
+ }
+
private:
+ Kind kind;
union {
struct {
- SDNode *Node; ///< Valid for expressions.
- unsigned ResNo; ///< Valid for expressions.
+ SDNode *Node; ///< Valid for expressions.
+ unsigned ResNo; ///< Valid for expressions.
} s;
- const Value *Const; ///< Valid for constants.
- unsigned FrameIx; ///< Valid for stack objects.
- unsigned VReg; ///< Valid for registers.
+ const Value *Const; ///< Valid for constants.
+ unsigned FrameIx; ///< Valid for stack objects.
+ unsigned VReg; ///< Valid for registers.
} u;
- DIVariable *Var;
- DIExpression *Expr;
- DebugLoc DL;
- unsigned Order;
- enum DbgValueKind kind;
- bool IsIndirect;
- bool Invalid = false;
- bool Emitted = false;
-public:
/// Constructor for non-constants.
- SDDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R,
- bool indir, DebugLoc dl, unsigned O)
- : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(indir) {
- kind = SDNODE;
+ SDDbgOperand(SDNode *N, unsigned R) : kind(SDNODE) {
u.s.Node = N;
u.s.ResNo = R;
}
-
/// Constructor for constants.
- SDDbgValue(DIVariable *Var, DIExpression *Expr, const Value *C, DebugLoc dl,
- unsigned O)
- : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) {
- kind = CONST;
- u.Const = C;
- }
-
+ SDDbgOperand(const Value *C) : kind(CONST) { u.Const = C; }
/// Constructor for virtual registers and frame indices.
- SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned VRegOrFrameIdx,
- bool IsIndirect, DebugLoc DL, unsigned Order,
- enum DbgValueKind Kind)
- : Var(Var), Expr(Expr), DL(DL), Order(Order), IsIndirect(IsIndirect) {
+ SDDbgOperand(unsigned VRegOrFrameIdx, Kind Kind) : kind(Kind) {
assert((Kind == VREG || Kind == FRAMEIX) &&
"Invalid SDDbgValue constructor");
- kind = Kind;
if (kind == VREG)
u.VReg = VRegOrFrameIdx;
else
u.FrameIx = VRegOrFrameIdx;
}
+};
- /// Returns the kind.
- DbgValueKind getKind() const { return kind; }
+/// Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+class SDDbgValue {
+public:
+
+private:
+ // SDDbgValues are allocated by a BumpPtrAllocator, which means the destructor
+ // may not be called; therefore all member arrays must also be allocated by
+ // that BumpPtrAllocator, to ensure that they are correctly freed.
+ size_t NumLocationOps;
+ SDDbgOperand *LocationOps;
+ // SDNode dependencies will be calculated as SDNodes that appear in
+ // LocationOps plus these AdditionalDependencies.
+ size_t NumAdditionalDependencies;
+ SDNode **AdditionalDependencies;
+ DIVariable *Var;
+ DIExpression *Expr;
+ DebugLoc DL;
+ unsigned Order;
+ bool IsIndirect;
+ bool IsVariadic;
+ bool Invalid = false;
+ bool Emitted = false;
+
+public:
+ SDDbgValue(BumpPtrAllocator &Alloc, DIVariable *Var, DIExpression *Expr,
+ ArrayRef<SDDbgOperand> L, ArrayRef<SDNode *> Dependencies,
+ bool IsIndirect, DebugLoc DL, unsigned O, bool IsVariadic)
+ : NumLocationOps(L.size()),
+ LocationOps(Alloc.Allocate<SDDbgOperand>(L.size())),
+ NumAdditionalDependencies(Dependencies.size()),
+ AdditionalDependencies(Alloc.Allocate<SDNode *>(Dependencies.size())),
+ Var(Var), Expr(Expr), DL(DL), Order(O), IsIndirect(IsIndirect),
+ IsVariadic(IsVariadic) {
+ assert(IsVariadic || L.size() == 1);
+ assert(!(IsVariadic && IsIndirect));
+ std::copy(L.begin(), L.end(), LocationOps);
+ std::copy(Dependencies.begin(), Dependencies.end(), AdditionalDependencies);
+ }
+
+ // We allocate arrays with the BumpPtrAllocator and never free or copy them,
+ // for LocationOps and AdditionalDependencies, as we never expect to copy or
+ // destroy an SDDbgValue. If we ever start copying or destroying instances, we
+ // should manage the allocated memory appropriately.
+ SDDbgValue(const SDDbgValue &Other) = delete;
+ SDDbgValue &operator=(const SDDbgValue &Other) = delete;
+ ~SDDbgValue() = delete;
/// Returns the DIVariable pointer for the variable.
DIVariable *getVariable() const { return Var; }
@@ -95,26 +181,37 @@ public:
/// Returns the DIExpression pointer for the expression.
DIExpression *getExpression() const { return Expr; }
- /// Returns the SDNode* for a register ref
- SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; }
-
- /// Returns the ResNo for a register ref
- unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; }
+ ArrayRef<SDDbgOperand> getLocationOps() const {
+ return ArrayRef<SDDbgOperand>(LocationOps, NumLocationOps);
+ }
- /// Returns the Value* for a constant
- const Value *getConst() const { assert (kind==CONST); return u.Const; }
+ SmallVector<SDDbgOperand> copyLocationOps() const {
+ return SmallVector<SDDbgOperand>(LocationOps, LocationOps + NumLocationOps);
+ }
- /// Returns the FrameIx for a stack object
- unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; }
+ // Returns the SDNodes which this SDDbgValue depends on.
+ SmallVector<SDNode *> getSDNodes() const {
+ SmallVector<SDNode *> Dependencies;
+ for (SDDbgOperand DbgOp : getLocationOps())
+ if (DbgOp.getKind() == SDDbgOperand::SDNODE)
+ Dependencies.push_back(DbgOp.getSDNode());
+ for (SDNode *Node : getAdditionalDependencies())
+ Dependencies.push_back(Node);
+ return Dependencies;
+ }
- /// Returns the Virtual Register for a VReg
- unsigned getVReg() const { assert (kind==VREG); return u.VReg; }
+ ArrayRef<SDNode *> getAdditionalDependencies() const {
+ return ArrayRef<SDNode *>(AdditionalDependencies,
+ NumAdditionalDependencies);
+ }
/// Returns whether this is an indirect value.
bool isIndirect() const { return IsIndirect; }
+ bool isVariadic() const { return IsVariadic; }
+
/// Returns the DebugLoc.
- DebugLoc getDebugLoc() const { return DL; }
+ const DebugLoc &getDebugLoc() const { return DL; }
/// Returns the SDNodeOrder. This is the order of the preceding node in the
/// input.
@@ -154,7 +251,7 @@ public:
MDNode *getLabel() const { return Label; }
/// Returns the DebugLoc.
- DebugLoc getDebugLoc() const { return DL; }
+ const DebugLoc &getDebugLoc() const { return DL; }
/// Returns the SDNodeOrder. This is the order of the preceding node in the
/// input.
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index debfdda90e1e..b2a8c8bdd78c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -739,6 +739,17 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
if (!N->getHasDebugValue())
return;
+ /// Returns true if \p DV has any VReg operand locations which don't exist in
+ /// VRBaseMap.
+ auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) {
+ for (SDDbgOperand L : DV->getLocationOps()) {
+ if (L.getKind() == SDDbgOperand::SDNODE &&
+ VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0)
+ return true;
+ }
+ return false;
+ };
+
// Opportunistically insert immediate dbg_value uses, i.e. those with the same
// source order number as N.
MachineBasicBlock *BB = Emitter.getBlock();
@@ -747,13 +758,20 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
if (DV->isEmitted())
continue;
unsigned DVOrder = DV->getOrder();
- if (!Order || DVOrder == Order) {
- MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap);
- if (DbgMI) {
- Orders.push_back({DVOrder, DbgMI});
- BB->insert(InsertPos, DbgMI);
- }
- }
+ if (Order != 0 && DVOrder != Order)
+ continue;
+ // If DV has any VReg location operands which haven't been mapped then
+ // either that node is no longer available or we just haven't visited the
+ // node yet. In the former case we should emit an undef dbg_value, but we
+ // can do it later. And for the latter we'll want to wait until all
+ // dependent nodes have been visited.
+ if (!DV->isInvalidated() && HasUnknownVReg(DV))
+ continue;
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap);
+ if (!DbgMI)
+ continue;
+ Orders.push_back({DVOrder, DbgMI});
+ BB->insert(InsertPos, DbgMI);
}
}
@@ -790,20 +808,21 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
void ScheduleDAGSDNodes::
EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
MachineBasicBlock::iterator InsertPos) {
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain preds
- if (I->getSUnit()->CopyDstRC) {
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue; // ignore chain preds
+ if (Pred.getSUnit()->CopyDstRC) {
// Copy to physical register.
- DenseMap<SUnit*, Register>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ DenseMap<SUnit *, Register>::iterator VRI =
+ VRBaseMap.find(Pred.getSUnit());
assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
// Find the destination physical register.
Register Reg;
- for (SUnit::const_succ_iterator II = SU->Succs.begin(),
- EE = SU->Succs.end(); II != EE; ++II) {
- if (II->isCtrl()) continue; // ignore chain preds
- if (II->getReg()) {
- Reg = II->getReg();
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ continue; // ignore chain preds
+ if (Succ.getReg()) {
+ Reg = Succ.getReg();
break;
}
}
@@ -811,13 +830,13 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
.addReg(VRI->second);
} else {
// Copy from physical register.
- assert(I->getReg() && "Unknown physical register!");
+ assert(Pred.getReg() && "Unknown physical register!");
Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
(void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
- .addReg(I->getReg());
+ .addReg(Pred.getReg());
}
break;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index e7bac73678a7..540a6e3efbe1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -136,12 +136,11 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
// Top down: release successors.
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- assert(!I->isAssignedRegDep() &&
+ for (SDep &Succ : SU->Succs) {
+ assert(!Succ.isAssignedRegDep() &&
"The list-td scheduler doesn't yet support physreg dependencies!");
- releaseSucc(SU, *I);
+ releaseSucc(SU, Succ);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2090762e2ff4..2a98464425c4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -146,6 +146,10 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
SplatVal = Op0->getAPIntValue().truncOrSelf(EltSize);
return true;
}
+ if (auto *Op0 = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) {
+ SplatVal = Op0->getValueAPF().bitcastToAPInt().truncOrSelf(EltSize);
+ return true;
+ }
}
auto *BV = dyn_cast<BuildVectorSDNode>(N);
@@ -338,8 +342,9 @@ bool ISD::matchBinaryPredicate(
return Match(LHSCst, RHSCst);
// TODO: Add support for vector UNDEF cases?
- if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
- ISD::BUILD_VECTOR != RHS.getOpcode())
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ (LHS.getOpcode() != ISD::BUILD_VECTOR &&
+ LHS.getOpcode() != ISD::SPLAT_VECTOR))
return false;
EVT SVT = LHS.getValueType().getScalarType();
@@ -879,6 +884,17 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
DeallocateNode(N);
}
+void SDDbgInfo::add(SDDbgValue *V, bool isParameter) {
+ assert(!(V->isVariadic() && isParameter));
+ if (isParameter)
+ ByvalParmDbgValues.push_back(V);
+ else
+ DbgValues.push_back(V);
+ for (const SDNode *Node : V->getSDNodes())
+ if (Node)
+ DbgValMap[Node].push_back(V);
+}
+
void SDDbgInfo::erase(const SDNode *Node) {
DbgValMapType::iterator I = DbgValMap.find(Node);
if (I == DbgValMap.end())
@@ -932,12 +948,12 @@ static void VerifySDNode(SDNode *N) {
assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
"Wrong number of operands!");
EVT EltVT = N->getValueType(0).getVectorElementType();
- for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
- assert((I->getValueType() == EltVT ||
- (EltVT.isInteger() && I->getValueType().isInteger() &&
- EltVT.bitsLE(I->getValueType()))) &&
- "Wrong operand type!");
- assert(I->getValueType() == N->getOperand(0).getValueType() &&
+ for (const SDUse &Op : N->ops()) {
+ assert((Op.getValueType() == EltVT ||
+ (EltVT.isInteger() && Op.getValueType().isInteger() &&
+ EltVT.bitsLE(Op.getValueType()))) &&
+ "Wrong operand type!");
+ assert(Op.getValueType() == N->getOperand(0).getValueType() &&
"Operands must all have the same type");
}
break;
@@ -1372,6 +1388,22 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
const APInt &NewVal = Elt->getValue();
EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
+
+ // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node.
+ if (VT.isScalableVector()) {
+ assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 &&
+ "Can only handle an even split!");
+ unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits;
+
+ SmallVector<SDValue, 2> ScalarParts;
+ for (unsigned i = 0; i != Parts; ++i)
+ ScalarParts.push_back(getConstant(
+ NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
+ ViaEltVT, isT, isO));
+
+ return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts);
+ }
+
unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
@@ -1381,11 +1413,10 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
SmallVector<SDValue, 2> EltParts;
- for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
+ for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i)
EltParts.push_back(getConstant(
- NewVal.lshr(i * ViaEltSizeInBits).zextOrTrunc(ViaEltSizeInBits), DL,
+ NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
ViaEltVT, isT, isO));
- }
// EltParts is currently in little endian order. If we actually want
// big-endian order then reverse it now.
@@ -1498,17 +1529,17 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
EVT EltVT = VT.getScalarType();
if (EltVT == MVT::f32)
return getConstantFP(APFloat((float)Val), DL, VT, isTarget);
- else if (EltVT == MVT::f64)
+ if (EltVT == MVT::f64)
return getConstantFP(APFloat(Val), DL, VT, isTarget);
- else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 ||
- EltVT == MVT::f16 || EltVT == MVT::bf16) {
+ if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 ||
+ EltVT == MVT::f16 || EltVT == MVT::bf16) {
bool Ignored;
APFloat APF = APFloat(Val);
APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
&Ignored);
return getConstantFP(APF, DL, VT, isTarget);
- } else
- llvm_unreachable("Unsupported type in getConstantFP");
+ }
+ llvm_unreachable("Unsupported type in getConstantFP");
}
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
@@ -1717,6 +1748,25 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
return SDValue(CondCodeNodes[Cond], 0);
}
+SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
+ APInt One(ResVT.getScalarSizeInBits(), 1);
+ return getStepVector(DL, ResVT, One);
+}
+
+SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal) {
+ assert(ResVT.getScalarSizeInBits() == StepVal.getBitWidth());
+ if (ResVT.isScalableVector())
+ return getNode(
+ ISD::STEP_VECTOR, DL, ResVT,
+ getTargetConstant(StepVal, DL, ResVT.getVectorElementType()));
+
+ SmallVector<SDValue, 16> OpsStepConstants;
+ for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++)
+ OpsStepConstants.push_back(
+ getConstant(StepVal * i, DL, ResVT.getVectorElementType()));
+ return getBuildVector(ResVT, DL, OpsStepConstants);
+}
+
/// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that
/// point at N1 to point at N2 and indices that point at N2 to point at N1.
static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) {
@@ -1727,7 +1777,7 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) {
SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
SDValue N2, ArrayRef<int> Mask) {
assert(VT.getVectorNumElements() == Mask.size() &&
- "Must have the same number of vector elements as mask elements!");
+ "Must have the same number of vector elements as mask elements!");
assert(VT == N1.getValueType() && VT == N2.getValueType() &&
"Invalid VECTOR_SHUFFLE");
@@ -2430,7 +2480,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
return true;
case ISD::ADD:
case ISD::SUB:
- case ISD::AND: {
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR: {
APInt UndefLHS, UndefRHS;
SDValue LHS = V.getOperand(0);
SDValue RHS = V.getOperand(1);
@@ -2439,8 +2491,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
UndefElts = UndefLHS | UndefRHS;
return true;
}
- break;
+ return false;
}
+ case ISD::ABS:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
@@ -2495,6 +2548,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
case ISD::EXTRACT_SUBVECTOR: {
// Offset the demanded elts by the subvector index.
SDValue Src = V.getOperand(0);
+ // We don't support scalable vectors at the moment.
+ if (Src.getValueType().isScalableVector())
+ return false;
uint64_t Idx = V.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt UndefSrcElts;
@@ -2578,12 +2634,21 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
return SDValue();
}
-SDValue SelectionDAG::getSplatValue(SDValue V) {
+SDValue SelectionDAG::getSplatValue(SDValue V, bool LegalTypes) {
int SplatIdx;
- if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx))
- return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V),
- SrcVector.getValueType().getScalarType(), SrcVector,
+ if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) {
+ EVT SVT = SrcVector.getValueType().getScalarType();
+ EVT LegalSVT = SVT;
+ if (LegalTypes && !TLI->isTypeLegal(SVT)) {
+ if (!SVT.isInteger())
+ return SDValue();
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(SVT))
+ return SDValue();
+ }
+ return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), LegalSVT, SrcVector,
getVectorIdxConstant(SplatIdx, SDLoc(V)));
+ }
return SDValue();
}
@@ -2791,8 +2856,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
unsigned NumSubVectors = Op.getNumOperands();
for (unsigned i = 0; i != NumSubVectors; ++i) {
- APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
- DemandedSub = DemandedSub.trunc(NumSubVectorElts);
+ APInt DemandedSub =
+ DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts);
if (!!DemandedSub) {
SDValue Sub = Op.getOperand(i);
Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1);
@@ -2888,8 +2953,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(N0, SubDemandedElts.shl(i),
Depth + 1);
unsigned Shifts = IsLE ? i : SubScale - 1 - i;
- Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts);
- Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts);
+ Known.insertBits(Known2, SubBitWidth * Shifts);
}
}
@@ -2913,8 +2977,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (DemandedElts[i]) {
unsigned Shifts = IsLE ? i : NumElts - 1 - i;
unsigned Offset = (Shifts % SubScale) * BitWidth;
- Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
- Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
+ Known = KnownBits::commonBits(Known,
+ Known2.extractBits(BitWidth, Offset));
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
@@ -2943,7 +3007,39 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::MUL: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known = KnownBits::computeForMul(Known, Known2);
+ Known = KnownBits::mul(Known, Known2);
+ break;
+ }
+ case ISD::MULHU: {
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = KnownBits::mulhu(Known, Known2);
+ break;
+ }
+ case ISD::MULHS: {
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = KnownBits::mulhs(Known, Known2);
+ break;
+ }
+ case ISD::UMUL_LOHI: {
+ assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Op.getResNo() == 0)
+ Known = KnownBits::mul(Known, Known2);
+ else
+ Known = KnownBits::mulhu(Known, Known2);
+ break;
+ }
+ case ISD::SMUL_LOHI: {
+ assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Op.getResNo() == 0)
+ Known = KnownBits::mul(Known, Known2);
+ else
+ Known = KnownBits::mulhs(Known, Known2);
break;
}
case ISD::UDIV: {
@@ -2975,7 +3071,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::SMULO:
case ISD::UMULO:
- case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (Op.getResNo() != 1)
break;
// The boolean result conforms to getBooleanContents.
@@ -3373,6 +3468,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = Known2.abs();
break;
}
+ case ISD::USUBSAT: {
+ // The result of usubsat will never be larger than the LHS.
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known.Zero.setHighBits(Known2.countMinLeadingZeros());
+ break;
+ }
case ISD::UMIN: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -3424,6 +3525,42 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::smin(Known, Known2);
break;
}
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ if (Op.getResNo() == 1) {
+ // The boolean result conforms to getBooleanContents.
+ // If we know the result of a setcc has the top bits zero, use this info.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD: {
+ unsigned MemBits =
+ cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits();
+ // If we are looking at the loaded value.
+ if (Op.getResNo() == 0) {
+ if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ Known.Zero.setBitsFrom(MemBits);
+ }
+ break;
+ }
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(),
@@ -3867,6 +4004,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
(VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
}
+ case ISD::SREM:
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero. The magnitude of the result should be less than or
+ // equal to the magnitude of the LHS. Therefore, the result should have
+ // at least as many sign bits as the left hand side.
+ return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
case ISD::TRUNCATE: {
// Check if the sign bits of source go down as far as the truncated value.
unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
@@ -3922,6 +4065,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
+ // ComputeNumSignBits not yet implemented for scalable vectors.
+ if (VecVT.isScalableVector())
+ break;
const unsigned BitWidth = Op.getValueSizeInBits();
const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
@@ -3961,8 +4107,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
unsigned NumSubVectors = Op.getNumOperands();
for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) {
- APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
- DemandedSub = DemandedSub.trunc(NumSubVectorElts);
+ APInt DemandedSub =
+ DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts);
if (!DemandedSub)
continue;
Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1);
@@ -3995,6 +4141,33 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD: {
+ Tmp = cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits();
+ // If we are looking at the loaded value.
+ if (Op.getResNo() == 0) {
+ if (Tmp == VTBits)
+ return 1; // early-out
+ if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND)
+ return VTBits - Tmp + 1;
+ if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ return VTBits - Tmp;
+ }
+ break;
+ }
}
// If we are looking at the loaded value of the SDNode.
@@ -4075,6 +4248,61 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::max(FirstAnswer, Mask.countLeadingOnes());
}
+bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
+ unsigned Depth) const {
+ // Early out for FREEZE.
+ if (Op.getOpcode() == ISD::FREEZE)
+ return true;
+
+ // TODO: Assume we don't know anything for now.
+ EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
+ return false;
+
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
+}
+
+bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
+ const APInt &DemandedElts,
+ bool PoisonOnly,
+ unsigned Depth) const {
+ unsigned Opcode = Op.getOpcode();
+
+ // Early out for FREEZE.
+ if (Opcode == ISD::FREEZE)
+ return true;
+
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
+ if (isIntOrFPConstant(Op))
+ return true;
+
+ switch (Opcode) {
+ case ISD::UNDEF:
+ return PoisonOnly;
+
+ // TODO: ISD::BUILD_VECTOR handling
+
+ // TODO: Search for noundef attributes from library functions.
+
+ // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
+
+ default:
+ // Allow the target to implement this method for its nodes.
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
+ return TLI->isGuaranteedNotToBeUndefOrPoisonForTargetNode(
+ Op, DemandedElts, *this, PoisonOnly, Depth);
+ break;
+ }
+
+ return false;
+}
+
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
!isa<ConstantSDNode>(Op.getOperand(1)))
@@ -4256,7 +4484,16 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
- return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue();
+ return KnownBits::haveNoCommonBitsSet(computeKnownBits(A),
+ computeKnownBits(B));
+}
+
+static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,
+ SelectionDAG &DAG) {
+ if (cast<ConstantSDNode>(Step)->isNullValue())
+ return DAG.getConstant(0, DL, VT);
+
+ return SDValue();
}
static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
@@ -4408,6 +4645,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue Operand, const SDNodeFlags Flags) {
+ assert(Operand.getOpcode() != ISD::DELETED_NODE &&
+ "Operand is DELETED_NODE!");
// Constant fold unary operations with an integer constant operand. Even
// opaque constant will be folded, because the folding of unary operations
// doesn't create new constants with different values. Nevertheless, the
@@ -4424,10 +4663,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (C->isOpaque())
break;
LLVM_FALLTHROUGH;
- case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
+ case ISD::ANY_EXTEND:
+ // Some targets like RISCV prefer to sign extend some types.
+ if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT))
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
APFloat apf(EVTToAPFloatSemantics(VT),
@@ -4478,6 +4723,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
APFloat::rmNearestTiesToEven, &Ignored);
return getConstantFP(FPV, DL, VT);
}
+ case ISD::STEP_VECTOR: {
+ if (SDValue V = FoldSTEP_VECTOR(DL, VT, Operand, *this))
+ return V;
+ break;
+ }
}
}
@@ -4531,9 +4781,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::BITCAST:
if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
break;
case ISD::FP_TO_FP16: {
@@ -4548,45 +4800,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
// Constant fold unary operations with a vector integer or float operand.
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) {
- if (BV->isConstant()) {
- switch (Opcode) {
- default:
- // FIXME: Entirely reasonable to perform folding of other unary
- // operations here as the need arises.
- break;
- case ISD::FNEG:
- case ISD::FABS:
- case ISD::FCEIL:
- case ISD::FTRUNC:
- case ISD::FFLOOR:
- case ISD::FP_EXTEND:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::TRUNCATE:
- case ISD::ANY_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::UINT_TO_FP:
- case ISD::SINT_TO_FP:
- case ISD::ABS:
- case ISD::BITREVERSE:
- case ISD::BSWAP:
- case ISD::CTLZ:
- case ISD::CTLZ_ZERO_UNDEF:
- case ISD::CTTZ:
- case ISD::CTTZ_ZERO_UNDEF:
- case ISD::CTPOP: {
- SDValue Ops = { Operand };
- if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
- return Fold;
- }
- }
- }
+ switch (Opcode) {
+ default:
+ // FIXME: Entirely reasonable to perform folding of other unary
+ // operations here as the need arises.
+ break;
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::TRUNCATE:
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::ABS:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP: {
+ SDValue Ops = {Operand};
+ if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ return Fold;
+ }
}
unsigned OpOpcode = Operand.getNode()->getOpcode();
switch (Opcode) {
+ case ISD::STEP_VECTOR:
+ assert(VT.isScalableVector() &&
+ "STEP_VECTOR can only be used with scalable types");
+ assert(OpOpcode == ISD::TargetConstant &&
+ VT.getVectorElementType() == Operand.getValueType() &&
+ "Unexpected step operand");
+ break;
case ISD::FREEZE:
assert(VT == Operand.getValueType() && "Unexpected VT!");
break;
@@ -4641,7 +4896,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Invalid sext node, dst < src!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
- else if (OpOpcode == ISD::UNDEF)
+ if (OpOpcode == ISD::UNDEF)
// sext(undef) = 0, because the top bits will all be the same.
return getConstant(0, DL, VT);
break;
@@ -4660,7 +4915,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Invalid zext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0));
- else if (OpOpcode == ISD::UNDEF)
+ if (OpOpcode == ISD::UNDEF)
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
break;
@@ -4682,7 +4937,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
- else if (OpOpcode == ISD::UNDEF)
+ if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// (ext (trunc x)) -> x
@@ -4728,8 +4983,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isVector() && "This DAG node is restricted to vector types.");
assert(Operand.getValueType().bitsLE(VT) &&
"The input must be the same size or smaller than the result.");
- assert(VT.getVectorNumElements() <
- Operand.getValueType().getVectorNumElements() &&
+ assert(VT.getVectorMinNumElements() <
+ Operand.getValueType().getVectorMinNumElements() &&
"The destination vector type must have fewer lanes than the input.");
break;
case ISD::ABS:
@@ -4879,6 +5134,18 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
if (!C2.getBoolValue())
break;
return C1.srem(C2);
+ case ISD::MULHS: {
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
+ }
+ case ISD::MULHU: {
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
+ }
}
return llvm::None;
}
@@ -4933,7 +5200,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
- if (Opcode >= ISD::BUILTIN_OP_END)
+ // We can't create a scalar CONCAT_VECTORS so skip it. It will break
+ // for concats involving SPLAT_VECTOR. Concats of BUILD_VECTORS are handled by
+ // foldCONCAT_VECTORS in getNode before this is called.
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS)
return SDValue();
// For now, the array Ops should only contain two values.
@@ -4973,27 +5243,20 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
return FoldSymbolOffset(Opcode, VT, GA, N1);
- // TODO: All the folds below are performed lane-by-lane and assume a fixed
- // vector width, however we should be able to do constant folds involving
- // splat vector nodes too.
- if (VT.isScalableVector())
- return SDValue();
-
// For fixed width vectors, extract each constant element and fold them
// individually. Either input may be an undef value.
- auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
- if (!BV1 && !N1->isUndef())
+ bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR ||
+ N1->getOpcode() == ISD::SPLAT_VECTOR;
+ if (!IsBVOrSV1 && !N1->isUndef())
return SDValue();
- auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
- if (!BV2 && !N2->isUndef())
+ bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR ||
+ N2->getOpcode() == ISD::SPLAT_VECTOR;
+ if (!IsBVOrSV2 && !N2->isUndef())
return SDValue();
// If both operands are undef, that's handled the same way as scalars.
- if (!BV1 && !BV2)
+ if (!IsBVOrSV1 && !IsBVOrSV2)
return SDValue();
- assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) &&
- "Vector binop with different number of elements in operands?");
-
EVT SVT = VT.getScalarType();
EVT LegalSVT = SVT;
if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
@@ -5001,19 +5264,46 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (LegalSVT.bitsLT(SVT))
return SDValue();
}
+
SmallVector<SDValue, 4> Outputs;
- unsigned NumOps = BV1 ? BV1->getNumOperands() : BV2->getNumOperands();
+ unsigned NumOps = 0;
+ if (IsBVOrSV1)
+ NumOps = std::max(NumOps, N1->getNumOperands());
+ if (IsBVOrSV2)
+ NumOps = std::max(NumOps, N2->getNumOperands());
+ assert(NumOps != 0 && "Expected non-zero operands");
+ // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need
+ // one iteration for that.
+ assert((!VT.isScalableVector() || NumOps == 1) &&
+ "Scalable vector should only have one scalar");
+
for (unsigned I = 0; I != NumOps; ++I) {
- SDValue V1 = BV1 ? BV1->getOperand(I) : getUNDEF(SVT);
- SDValue V2 = BV2 ? BV2->getOperand(I) : getUNDEF(SVT);
+ // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need
+ // to use operand 0 of the SPLAT_VECTOR for each fixed element.
+ SDValue V1;
+ if (N1->getOpcode() == ISD::BUILD_VECTOR)
+ V1 = N1->getOperand(I);
+ else if (N1->getOpcode() == ISD::SPLAT_VECTOR)
+ V1 = N1->getOperand(0);
+ else
+ V1 = getUNDEF(SVT);
+
+ SDValue V2;
+ if (N2->getOpcode() == ISD::BUILD_VECTOR)
+ V2 = N2->getOperand(I);
+ else if (N2->getOpcode() == ISD::SPLAT_VECTOR)
+ V2 = N2->getOperand(0);
+ else
+ V2 = getUNDEF(SVT);
+
if (SVT.isInteger()) {
- if (V1->getValueType(0).bitsGT(SVT))
+ if (V1.getValueType().bitsGT(SVT))
V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
- if (V2->getValueType(0).bitsGT(SVT))
+ if (V2.getValueType().bitsGT(SVT))
V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
}
- if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+ if (V1.getValueType() != SVT || V2.getValueType() != SVT)
return SDValue();
// Fold one vector element.
@@ -5028,14 +5318,21 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
Outputs.push_back(ScalarResult);
}
- assert(VT.getVectorNumElements() == Outputs.size() &&
- "Vector size mismatch!");
+ if (N1->getOpcode() == ISD::BUILD_VECTOR ||
+ N2->getOpcode() == ISD::BUILD_VECTOR) {
+ assert(VT.getVectorNumElements() == Outputs.size() &&
+ "Vector size mismatch!");
+
+ // Build a big vector out of the scalar elements we generated.
+ return getBuildVector(VT, SDLoc(), Outputs);
+ }
- // We may have a vector type but a scalar result. Create a splat.
- Outputs.resize(VT.getVectorNumElements(), Outputs.back());
+ assert((N1->getOpcode() == ISD::SPLAT_VECTOR ||
+ N2->getOpcode() == ISD::SPLAT_VECTOR) &&
+ "One operand should be a splat vector");
- // Build a big vector out of the scalar elements we generated.
- return getBuildVector(VT, SDLoc(), Outputs);
+ assert(Outputs.size() == 1 && "Vector size mismatch!");
+ return getSplatVector(VT, SDLoc(), Outputs[0]);
}
// TODO: Merge with FoldConstantArithmetic
@@ -5056,30 +5353,26 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
if (!VT.isVector())
return SDValue();
- // TODO: All the folds below are performed lane-by-lane and assume a fixed
- // vector width, however we should be able to do constant folds involving
- // splat vector nodes too.
- if (VT.isScalableVector())
- return SDValue();
-
- // From this point onwards all vectors are assumed to be fixed width.
- unsigned NumElts = VT.getVectorNumElements();
+ ElementCount NumElts = VT.getVectorElementCount();
- auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
+ auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
return !Op.getValueType().isVector() ||
- Op.getValueType().getVectorNumElements() == NumElts;
+ Op.getValueType().getVectorElementCount() == NumElts;
};
- auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
+ auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
+ APInt SplatVal;
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
- return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) ||
- (BV && BV->isConstant());
+ return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
+ (BV && BV->isConstant()) ||
+ (Op.getOpcode() == ISD::SPLAT_VECTOR &&
+ ISD::isConstantSplatVector(Op.getNode(), SplatVal));
};
// All operands must be vector types with the same number of elements as
// the result type and must be either UNDEF or a build vector of constant
// or UNDEF scalars.
- if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) ||
+ if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) ||
!llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
@@ -5096,14 +5389,19 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
return SDValue();
}
+ // For scalable vector types we know we're dealing with SPLAT_VECTORs. We
+ // only have one operand to check. For fixed-length vector types we may have
+ // a combination of BUILD_VECTOR and SPLAT_VECTOR.
+ unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
+
// Constant fold each scalar lane separately.
SmallVector<SDValue, 4> ScalarResults;
- for (unsigned i = 0; i != NumElts; i++) {
+ for (unsigned I = 0; I != NumOperands; I++) {
SmallVector<SDValue, 4> ScalarOps;
for (SDValue Op : Ops) {
EVT InSVT = Op.getValueType().getScalarType();
- BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
- if (!InBV) {
+ if (Op.getOpcode() != ISD::BUILD_VECTOR &&
+ Op.getOpcode() != ISD::SPLAT_VECTOR) {
// We've checked that this is UNDEF or a constant of some kind.
if (Op.isUndef())
ScalarOps.push_back(getUNDEF(InSVT));
@@ -5112,7 +5410,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
continue;
}
- SDValue ScalarOp = InBV->getOperand(i);
+ SDValue ScalarOp =
+ Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I);
EVT ScalarVT = ScalarOp.getValueType();
// Build vector (integer) scalar operands may need implicit
@@ -5137,7 +5436,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
ScalarResults.push_back(ScalarResult);
}
- SDValue V = getBuildVector(VT, DL, ScalarResults);
+ SDValue V = NumElts.isScalable() ? getSplatVector(VT, DL, ScalarResults[0])
+ : getBuildVector(VT, DL, ScalarResults);
NewSDValueDbgMsg(V, "New node fold constant vector: ", this);
return V;
}
@@ -5243,6 +5543,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
+ assert(N1.getOpcode() != ISD::DELETED_NODE &&
+ N2.getOpcode() != ISD::DELETED_NODE &&
+ "Operand is DELETED_NODE!");
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
@@ -5304,14 +5607,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// it's worth handling here.
if (N2C && N2C->isNullValue())
return N1;
+ if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::XOR, DL, VT, N1, N2);
break;
case ISD::MUL:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::AND, DL, VT, N1, N2);
if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
- APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue();
- APInt N2CImm = N2C->getAPIntValue();
+ const APInt &MulImm = N1->getConstantOperandAPInt(0);
+ const APInt &N2CImm = N2C->getAPIntValue();
return getVScale(DL, VT, MulImm * N2CImm);
}
break;
@@ -5328,6 +5636,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
+ // fold (add_sat x, y) -> (or x, y) for bool types.
+ if (Opcode == ISD::SADDSAT || Opcode == ISD::UADDSAT)
+ return getNode(ISD::OR, DL, VT, N1, N2);
+ // fold (sub_sat x, y) -> (and x, ~y) for bool types.
+ if (Opcode == ISD::SSUBSAT || Opcode == ISD::USUBSAT)
+ return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT));
+ }
break;
case ISD::SMIN:
case ISD::UMAX:
@@ -5364,8 +5680,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::SHL:
if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
- APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue();
- APInt ShiftImm = N2C->getAPIntValue();
+ const APInt &MulImm = N1->getConstantOperandAPInt(0);
+ const APInt &ShiftImm = N2C->getAPIntValue();
return getVScale(DL, VT, MulImm << ShiftImm);
}
LLVM_FALLTHROUGH;
@@ -5444,6 +5760,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
const APInt &Val = N1C->getAPIntValue();
return SignExtendInReg(Val, VT);
}
+
if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
SmallVector<SDValue, 8> Ops;
llvm::EVT OpVT = N1.getOperand(0).getValueType();
@@ -5461,6 +5778,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
break;
}
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: {
+ assert(VT.isInteger() && cast<VTSDNode>(N2)->getVT().isInteger() &&
+ N1.getValueType().isFloatingPoint() && "Invalid FP_TO_*INT_SAT");
+ assert(N1.getValueType().isVector() == VT.isVector() &&
+ "FP_TO_*INT_SAT type should be vector iff the operand type is "
+ "vector!");
+ assert((!VT.isVector() || VT.getVectorNumElements() ==
+ N1.getValueType().getVectorNumElements()) &&
+ "Vector element counts must match in FP_TO_*INT_SAT");
+ assert(!cast<VTSDNode>(N2)->getVT().isVector() &&
+ "Type to saturate to must be a scalar.");
+ assert(cast<VTSDNode>(N2)->getVT().bitsLE(VT.getScalarType()) &&
+ "Not extending!");
+ break;
+ }
case ISD::EXTRACT_VECTOR_ELT:
assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() &&
"The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
@@ -5523,10 +5856,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
if (VT == N1.getOperand(1).getValueType())
return N1.getOperand(1);
- else
- return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+ return getSExtOrTrunc(N1.getOperand(1), DL, VT);
}
-
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
}
}
@@ -5563,11 +5894,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N1C) {
unsigned ElementSize = VT.getSizeInBits();
unsigned Shift = ElementSize * N2C->getZExtValue();
- APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift);
- return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
+ const APInt &Val = N1C->getAPIntValue();
+ return getConstant(Val.extractBits(ElementSize, Shift), DL, VT);
}
break;
- case ISD::EXTRACT_SUBVECTOR:
+ case ISD::EXTRACT_SUBVECTOR: {
EVT N1VT = N1.getValueType();
assert(VT.isVector() && N1VT.isVector() &&
"Extract subvector VTs must be vectors!");
@@ -5584,9 +5915,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1VT.getVectorMinNumElements()) &&
"Extract subvector overflow!");
assert(N2C->getAPIntValue().getBitWidth() ==
- TLI->getVectorIdxTy(getDataLayout())
- .getSizeInBits()
- .getFixedSize() &&
+ TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() &&
"Constant index for EXTRACT_SUBVECTOR has an invalid size");
// Trivial extraction.
@@ -5612,6 +5941,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return N1.getOperand(1);
break;
}
+ }
// Perform trivial constant folding.
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
@@ -5707,6 +6037,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3,
const SDNodeFlags Flags) {
+ assert(N1.getOpcode() != ISD::DELETED_NODE &&
+ N2.getOpcode() != ISD::DELETED_NODE &&
+ N3.getOpcode() != ISD::DELETED_NODE &&
+ "Operand is DELETED_NODE!");
// Perform various simplifications.
switch (Opcode) {
case ISD::FMA: {
@@ -5806,6 +6140,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
cast<ConstantSDNode>(N3)->getZExtValue()) <=
VT.getVectorMinNumElements()) &&
"Insert subvector overflow!");
+ assert(cast<ConstantSDNode>(N3)->getAPIntValue().getBitWidth() ==
+ TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() &&
+ "Constant index for INSERT_SUBVECTOR has an invalid size");
// Trivial insertion.
if (VT == N2VT)
@@ -5939,17 +6276,17 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
if (Slice.Array == nullptr) {
if (VT.isInteger())
return DAG.getConstant(0, dl, VT);
- else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
+ if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
return DAG.getConstantFP(0.0, dl, VT);
- else if (VT.isVector()) {
+ if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getConstant(0, dl,
EVT::getVectorVT(*DAG.getContext(),
EltVT, NumElts)));
- } else
- llvm_unreachable("Expected type!");
+ }
+ llvm_unreachable("Expected type!");
}
assert(!VT.isVector() && "Can't handle vector type here!");
@@ -6056,7 +6393,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
uint64_t Size, Align Alignment,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) {
+ MachinePointerInfo SrcPtrInfo,
+ const AAMDNodes &AAInfo) {
// Turn a memcpy of undef to nop.
// FIXME: We need to honor volatile even is Src is undef.
if (Src.isUndef())
@@ -6103,7 +6441,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->needsStackRealignment(MF))
+ if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign = NewAlign / 2;
@@ -6115,6 +6453,10 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
}
}
+ // Prepare AAInfo for loads/stores after lowering this memcpy.
+ AAMDNodes NewAAInfo = AAInfo;
+ NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr;
+
MachineMemOperand::Flags MMOFlags =
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
SmallVector<SDValue, 16> OutLoadChains;
@@ -6157,7 +6499,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Store = DAG.getStore(
Chain, dl, Value,
DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
- DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
OutChains.push_back(Store);
}
}
@@ -6181,13 +6523,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
ISD::EXTLOAD, dl, NVT, Chain,
DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
- commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags);
+ commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo);
OutLoadChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value,
DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
- DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo);
OutStoreChains.push_back(Store);
}
SrcOff += VTSize;
@@ -6246,7 +6588,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
uint64_t Size, Align Alignment,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) {
+ MachinePointerInfo SrcPtrInfo,
+ const AAMDNodes &AAInfo) {
// Turn a memmove of undef to nop.
// FIXME: We need to honor volatile even is Src is undef.
if (Src.isUndef())
@@ -6289,6 +6632,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
}
}
+ // Prepare AAInfo for loads/stores after lowering this memmove.
+ AAMDNodes NewAAInfo = AAInfo;
+ NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr;
+
MachineMemOperand::Flags MMOFlags =
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
uint64_t SrcOff = 0, DstOff = 0;
@@ -6307,10 +6654,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
- Value =
- DAG.getLoad(VT, dl, Chain,
- DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
- SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags);
+ Value = DAG.getLoad(
+ VT, dl, Chain,
+ DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -6322,10 +6669,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Store;
- Store =
- DAG.getStore(Chain, dl, LoadValues[i],
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
- DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
+ Store = DAG.getStore(
+ Chain, dl, LoadValues[i],
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -6354,7 +6701,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, Align Alignment, bool isVol,
- MachinePointerInfo DstPtrInfo) {
+ MachinePointerInfo DstPtrInfo,
+ const AAMDNodes &AAInfo) {
// Turn a memset of undef to nop.
// FIXME: We need to honor volatile even is Src is undef.
if (Src.isUndef())
@@ -6401,6 +6749,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
LargestVT = MemOps[i];
SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+ // Prepare AAInfo for loads/stores after lowering this memset.
+ AAMDNodes NewAAInfo = AAInfo;
+ NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr;
+
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
@@ -6426,7 +6778,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
Chain, dl, Value,
DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment,
- isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone);
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
+ NewAAInfo);
OutChains.push_back(Store);
DstOff += VT.getSizeInBits() / 8;
Size -= VTSize;
@@ -6449,7 +6802,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, Align Alignment,
bool isVol, bool AlwaysInline, bool isTailCall,
MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) {
+ MachinePointerInfo SrcPtrInfo,
+ const AAMDNodes &AAInfo) {
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6460,7 +6814,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Result = getMemcpyLoadsAndStores(
*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
- isVol, false, DstPtrInfo, SrcPtrInfo);
+ isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo);
if (Result.getNode())
return Result;
}
@@ -6481,7 +6835,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
assert(ConstantSize && "AlwaysInline requires a constant size!");
return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Alignment,
- isVol, true, DstPtrInfo, SrcPtrInfo);
+ isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo);
}
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
@@ -6563,7 +6917,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, Align Alignment,
bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) {
+ MachinePointerInfo SrcPtrInfo,
+ const AAMDNodes &AAInfo) {
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6574,7 +6929,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Result = getMemmoveLoadsAndStores(
*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
- isVol, false, DstPtrInfo, SrcPtrInfo);
+ isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo);
if (Result.getNode())
return Result;
}
@@ -6664,7 +7019,8 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, Align Alignment,
bool isVol, bool isTailCall,
- MachinePointerInfo DstPtrInfo) {
+ MachinePointerInfo DstPtrInfo,
+ const AAMDNodes &AAInfo) {
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6675,7 +7031,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Alignment,
- isVol, DstPtrInfo);
+ isVol, DstPtrInfo, AAInfo);
if (Result.getNode())
return Result;
@@ -6839,8 +7195,8 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SmallVector<EVT, 4> VTs;
VTs.reserve(Ops.size());
- for (unsigned i = 0; i < Ops.size(); ++i)
- VTs.push_back(Ops[i].getValueType());
+ for (const SDValue &Op : Ops)
+ VTs.push_back(Op.getValueType());
return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops);
}
@@ -7355,7 +7711,7 @@ SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
AM, ST->isTruncatingStore(), ST->isCompressingStore());
}
-SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
+SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
ISD::MemIndexType IndexType,
@@ -7364,9 +7720,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
- ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
- dl.getIROrder(), VTs, VT, MMO, IndexType, ExtTy));
+ dl.getIROrder(), VTs, MemVT, MMO, IndexType, ExtTy));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7374,9 +7730,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
return SDValue(E, 0);
}
- IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]);
+ IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]);
auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO, IndexType, ExtTy);
+ VTs, MemVT, MMO, IndexType, ExtTy);
createOperands(N, Ops);
assert(N->getPassThru().getValueType() == N->getValueType(0) &&
@@ -7402,7 +7758,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
return V;
}
-SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
+SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
ISD::MemIndexType IndexType,
@@ -7411,9 +7767,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
- ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
- dl.getIROrder(), VTs, VT, MMO, IndexType, IsTrunc));
+ dl.getIROrder(), VTs, MemVT, MMO, IndexType, IsTrunc));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7421,9 +7777,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
return SDValue(E, 0);
}
- IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]);
+ IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]);
auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO, IndexType, IsTrunc);
+ VTs, MemVT, MMO, IndexType, IsTrunc);
createOperands(N, Ops);
assert(N->getMask().getValueType().getVectorElementCount() ==
@@ -7588,6 +7944,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
default: break;
}
+#ifndef NDEBUG
+ for (auto &Op : Ops)
+ assert(Op.getOpcode() != ISD::DELETED_NODE &&
+ "Operand is DELETED_NODE!");
+#endif
+
switch (Opcode) {
default: break;
case ISD::BUILD_VECTOR:
@@ -7661,6 +8023,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
if (VTList.NumVTs == 1)
return getNode(Opcode, DL, VTList.VTs[0], Ops);
+#ifndef NDEBUG
+ for (auto &Op : Ops)
+ assert(Op.getOpcode() != ISD::DELETED_NODE &&
+ "Operand is DELETED_NODE!");
+#endif
+
switch (Opcode) {
case ISD::STRICT_FP_EXTEND:
assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
@@ -8397,7 +8765,9 @@ SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr,
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc())
- SDDbgValue(Var, Expr, N, R, IsIndirect, DL, O);
+ SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromNode(N, R),
+ {}, IsIndirect, DL, O,
+ /*IsVariadic=*/false);
}
/// Constant
@@ -8407,7 +8777,10 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,
const DebugLoc &DL, unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, DL, O);
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromConst(C), {},
+ /*IsIndirect=*/false, DL, O,
+ /*IsVariadic=*/false);
}
/// FrameIndex
@@ -8418,19 +8791,46 @@ SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
+ return getFrameIndexDbgValue(Var, Expr, FI, {}, IsIndirect, DL, O);
+}
+
+/// FrameIndex with dependencies
+SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
+ DIExpression *Expr, unsigned FI,
+ ArrayRef<SDNode *> Dependencies,
+ bool IsIndirect,
+ const DebugLoc &DL,
+ unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc())
- SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX);
+ SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromFrameIdx(FI),
+ Dependencies, IsIndirect, DL, O,
+ /*IsVariadic=*/false);
}
/// VReg
-SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var,
- DIExpression *Expr,
+SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, DIExpression *Expr,
unsigned VReg, bool IsIndirect,
const DebugLoc &DL, unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc())
- SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG);
+ SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromVReg(VReg),
+ {}, IsIndirect, DL, O,
+ /*IsVariadic=*/false);
+}
+
+SDDbgValue *SelectionDAG::getDbgValueList(DIVariable *Var, DIExpression *Expr,
+ ArrayRef<SDDbgOperand> Locs,
+ ArrayRef<SDNode *> Dependencies,
+ bool IsIndirect, const DebugLoc &DL,
+ unsigned O, bool IsVariadic) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(DbgInfo->getAlloc(), Var, Expr, Locs, Dependencies, IsIndirect,
+ DL, O, IsVariadic);
}
void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
@@ -8449,15 +8849,31 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
if (!FromNode->getHasDebugValue())
return;
+ SDDbgOperand FromLocOp =
+ SDDbgOperand::fromNode(From.getNode(), From.getResNo());
+ SDDbgOperand ToLocOp = SDDbgOperand::fromNode(To.getNode(), To.getResNo());
+
SmallVector<SDDbgValue *, 2> ClonedDVs;
for (SDDbgValue *Dbg : GetDbgValues(FromNode)) {
- if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
+ if (Dbg->isInvalidated())
continue;
// TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value");
- // Just transfer the dbg value attached to From.
- if (Dbg->getResNo() != From.getResNo())
+ // Create a new location ops vector that is equal to the old vector, but
+ // with each instance of FromLocOp replaced with ToLocOp.
+ bool Changed = false;
+ auto NewLocOps = Dbg->copyLocationOps();
+ std::replace_if(
+ NewLocOps.begin(), NewLocOps.end(),
+ [&Changed, FromLocOp](const SDDbgOperand &Op) {
+ bool Match = Op == FromLocOp;
+ Changed |= Match;
+ return Match;
+ },
+ ToLocOp);
+ // Ignore this SDDbgValue if we didn't find a matching location.
+ if (!Changed)
continue;
DIVariable *Var = Dbg->getVariable();
@@ -8476,10 +8892,13 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
continue;
Expr = *Fragment;
}
+
+ auto AdditionalDependencies = Dbg->getAdditionalDependencies();
// Clone the SDDbgValue and move it to To.
- SDDbgValue *Clone = getDbgValue(
- Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), Dbg->getDebugLoc(),
- std::max(ToNode->getIROrder(), Dbg->getOrder()));
+ SDDbgValue *Clone = getDbgValueList(
+ Var, Expr, NewLocOps, AdditionalDependencies, Dbg->isIndirect(),
+ Dbg->getDebugLoc(), std::max(ToNode->getIROrder(), Dbg->getOrder()),
+ Dbg->isVariadic());
ClonedDVs.push_back(Clone);
if (InvalidateDbg) {
@@ -8489,8 +8908,11 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
}
}
- for (SDDbgValue *Dbg : ClonedDVs)
- AddDbgValue(Dbg, ToNode, false);
+ for (SDDbgValue *Dbg : ClonedDVs) {
+ assert(is_contained(Dbg->getSDNodes(), ToNode) &&
+ "Transferred DbgValues should depend on the new SDNode");
+ AddDbgValue(Dbg, false);
+ }
}
void SelectionDAG::salvageDebugInfo(SDNode &N) {
@@ -8510,16 +8932,35 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
if (!isConstantIntBuildVectorOrConstantInt(N0) &&
isConstantIntBuildVectorOrConstantInt(N1)) {
uint64_t Offset = N.getConstantOperandVal(1);
+
// Rewrite an ADD constant node into a DIExpression. Since we are
// performing arithmetic to compute the variable's *value* in the
// DIExpression, we need to mark the expression with a
// DW_OP_stack_value.
auto *DIExpr = DV->getExpression();
- DIExpr =
- DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset);
- SDDbgValue *Clone =
- getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
- DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
+ auto NewLocOps = DV->copyLocationOps();
+ bool Changed = false;
+ for (size_t i = 0; i < NewLocOps.size(); ++i) {
+ // We're not given a ResNo to compare against because the whole
+ // node is going away. We know that any ISD::ADD only has one
+ // result, so we can assume any node match is using the result.
+ if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE ||
+ NewLocOps[i].getSDNode() != &N)
+ continue;
+ NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo());
+ SmallVector<uint64_t, 3> ExprOps;
+ DIExpression::appendOffset(ExprOps, Offset);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true);
+ Changed = true;
+ }
+ (void)Changed;
+ assert(Changed && "Salvage target doesn't use N");
+
+ auto AdditionalDependencies = DV->getAdditionalDependencies();
+ SDDbgValue *Clone = getDbgValueList(DV->getVariable(), DIExpr,
+ NewLocOps, AdditionalDependencies,
+ DV->isIndirect(), DV->getDebugLoc(),
+ DV->getOrder(), DV->isVariadic());
ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
DV->setIsEmitted();
@@ -8530,8 +8971,11 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
}
}
- for (SDDbgValue *Dbg : ClonedDVs)
- AddDbgValue(Dbg, Dbg->getSDNode(), false);
+ for (SDDbgValue *Dbg : ClonedDVs) {
+ assert(!Dbg->getSDNodes().empty() &&
+ "Salvaged DbgValue should depend on a new SDNode");
+ AddDbgValue(Dbg, false);
+ }
}
/// Creates a SDDbgLabel node.
@@ -8965,9 +9409,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
checkForCycles(N, this);
// N is in sorted position, so all its uses have one less operand
// that needs to be sorted.
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- UI != UE; ++UI) {
- SDNode *P = *UI;
+ for (SDNode *P : N->uses()) {
unsigned Degree = P->getNodeId();
assert(Degree != 0 && "Invalid node degree");
--Degree;
@@ -9014,17 +9456,17 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
/// value is produced by SD.
-void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
- if (SD) {
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) {
+ for (SDNode *SD : DB->getSDNodes()) {
+ if (!SD)
+ continue;
assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue());
SD->setHasDebugValue(true);
}
- DbgInfo->add(DB, SD, isParameter);
+ DbgInfo->add(DB, isParameter);
}
-void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) {
- DbgInfo->add(DB);
-}
+void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { DbgInfo->add(DB); }
SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
SDValue NewMemOpChain) {
@@ -9226,21 +9668,22 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N,
bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
// TODO: may want to use peekThroughBitcast() here.
- ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
+ ConstantSDNode *C =
+ isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true);
return C && C->isNullValue();
}
-bool llvm::isOneOrOneSplat(SDValue N) {
+bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
// TODO: may want to use peekThroughBitcast() here.
unsigned BitWidth = N.getScalarValueSizeInBits();
- ConstantSDNode *C = isConstOrConstSplat(N);
+ ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth;
}
-bool llvm::isAllOnesOrAllOnesSplat(SDValue N) {
+bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
N = peekThroughBitcasts(N);
unsigned BitWidth = N.getScalarValueSizeInBits();
- ConstantSDNode *C = isConstOrConstSplat(N);
+ ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;
}
@@ -9290,8 +9733,8 @@ namespace {
std::vector<EVT> VTs;
EVTArray() {
- VTs.reserve(MVT::LAST_VALUETYPE);
- for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+ VTs.reserve(MVT::VALUETYPE_SIZE);
+ for (unsigned i = 0; i < MVT::VALUETYPE_SIZE; ++i)
VTs.push_back(MVT((MVT::SimpleValueType)i));
}
};
@@ -9308,11 +9751,9 @@ const EVT *SDNode::getValueTypeList(EVT VT) {
if (VT.isExtended()) {
sys::SmartScopedLock<true> Lock(*VTMutex);
return &(*EVTs->insert(VT).first);
- } else {
- assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
- "Value type out of range!");
- return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
}
+ assert(VT.getSimpleVT() < MVT::VALUETYPE_SIZE && "Value type out of range!");
+ return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
}
/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
@@ -9890,10 +10331,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
// FIXME: This does not work for vectors with elements less than 8 bits.
while (VecWidth > 8) {
unsigned HalfSize = VecWidth / 2;
- APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
- APInt LowValue = SplatValue.trunc(HalfSize);
- APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
- APInt LowUndef = SplatUndef.trunc(HalfSize);
+ APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize);
+ APInt LowValue = SplatValue.extractBits(HalfSize, 0);
+ APInt HighUndef = SplatUndef.extractBits(HalfSize, HalfSize);
+ APInt LowUndef = SplatUndef.extractBits(HalfSize, 0);
// If the two halves do not match (ignoring undef bits), stop here.
if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6638ff6a6358..d56d4bcc9169 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -436,14 +436,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
} else if (ValueVT.bitsLT(PartEVT)) {
- // Bitcast Val back the original type and extract the corresponding
- // vector we want.
- unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
- EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
- ValueVT.getVectorElementType(), Elts);
- Val = DAG.getBitcast(WiderVecType, Val);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
- DAG.getVectorIdxConstant(0, DL));
+ const uint64_t ValueSize = ValueVT.getFixedSizeInBits();
+ EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
+ // Drop the extra bits.
+ Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
+ return DAG.getBitcast(ValueVT, Val);
}
diagnosePossiblyInvalidConstraint(
@@ -610,30 +607,39 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
std::reverse(Parts, Parts + OrigNumParts);
}
-static SDValue widenVectorToPartType(SelectionDAG &DAG,
- SDValue Val, const SDLoc &DL, EVT PartVT) {
- if (!PartVT.isFixedLengthVector())
+static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
+ const SDLoc &DL, EVT PartVT) {
+ if (!PartVT.isVector())
return SDValue();
EVT ValueVT = Val.getValueType();
- unsigned PartNumElts = PartVT.getVectorNumElements();
- unsigned ValueNumElts = ValueVT.getVectorNumElements();
- if (PartNumElts > ValueNumElts &&
- PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
- EVT ElementVT = PartVT.getVectorElementType();
- // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
- // undef elements.
- SmallVector<SDValue, 16> Ops;
- DAG.ExtractVectorElements(Val, Ops);
- SDValue EltUndef = DAG.getUNDEF(ElementVT);
- for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i)
- Ops.push_back(EltUndef);
+ ElementCount PartNumElts = PartVT.getVectorElementCount();
+ ElementCount ValueNumElts = ValueVT.getVectorElementCount();
+
+ // We only support widening vectors with equivalent element types and
+ // fixed/scalable properties. If a target needs to widen a fixed-length type
+ // to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
+ if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
+ PartNumElts.isScalable() != ValueNumElts.isScalable() ||
+ PartVT.getVectorElementType() != ValueVT.getVectorElementType())
+ return SDValue();
- // FIXME: Use CONCAT for 2x -> 4x.
- return DAG.getBuildVector(PartVT, DL, Ops);
- }
+ // Widening a scalable vector to another scalable vector is done by inserting
+ // the vector into a larger undef one.
+ if (PartNumElts.isScalable())
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
+ Val, DAG.getVectorIdxConstant(0, DL));
- return SDValue();
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ DAG.ExtractVectorElements(Val, Ops);
+ SDValue EltUndef = DAG.getUNDEF(ElementVT);
+ Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef);
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+ return DAG.getBuildVector(PartVT, DL, Ops);
}
/// getCopyToPartsVector - Create a series of nodes that contain the specified
@@ -714,13 +720,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
EVT BuiltVectorTy = EVT::getVectorVT(
*DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue());
- if (ValueVT != BuiltVectorTy) {
- if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
- Val = Widened;
+ if (ValueVT == BuiltVectorTy) {
+ // Nothing to do.
+ } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
+ // Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+ } else if (SDValue Widened =
+ widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
+ Val = Widened;
+ } else if (BuiltVectorTy.getVectorElementType().bitsGE(
+ ValueVT.getVectorElementType()) &&
+ BuiltVectorTy.getVectorElementCount() ==
+ ValueVT.getVectorElementCount()) {
+ // Promoted vector extract
+ Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy);
}
+ assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
+
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -970,8 +988,9 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
- unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
MVT RegisterVT = RegVTs[Value];
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value],
+ RegisterVT);
for (unsigned i = 0; i != NumRegs; ++i) {
assert(Reg < Regs.size() && "Mismatch in # registers expected");
unsigned TheReg = Regs[Reg++];
@@ -1119,6 +1138,33 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
+ DebugLoc DL, unsigned Order) {
+ // We treat variadic dbg_values differently at this stage.
+ if (DI->hasArgList()) {
+ // For variadic dbg_values we will now insert an undef.
+ // FIXME: We can potentially recover these!
+ SmallVector<SDDbgOperand, 2> Locs;
+ for (const Value *V : DI->getValues()) {
+ auto Undef = UndefValue::get(V->getType());
+ Locs.push_back(SDDbgOperand::fromConst(Undef));
+ }
+ SDDbgValue *SDV = DAG.getDbgValueList(
+ DI->getVariable(), DI->getExpression(), Locs, {},
+ /*IsIndirect=*/false, DL, Order, /*IsVariadic=*/true);
+ DAG.AddDbgValue(SDV, /*isParameter=*/false);
+ } else {
+ // TODO: Dangling debug info will eventually either be resolved or produce
+ // an Undef DBG_VALUE. However in the resolution case, a gap may appear
+ // between the original dbg.value location and its resolved DBG_VALUE,
+ // which we should ideally fill with an extra Undef DBG_VALUE.
+ assert(DI->getNumVariableLocationOps() == 1 &&
+ "DbgValueInst without an ArgList should have a single location "
+ "operand.");
+ DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order);
+ }
+}
+
void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
const DIExpression *Expr) {
auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
@@ -1156,6 +1202,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
for (auto &DDI : DDIV) {
const DbgValueInst *DI = DDI.getDI();
+ assert(!DI->hasArgList() && "Not implemented for variadic dbg_values");
assert(DI && "Ill-formed DanglingDebugInfo");
DebugLoc dl = DDI.getdl();
unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
@@ -1185,37 +1232,41 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
<< ValSDNodeOrder << "\n");
SDV = getDbgValue(Val, Variable, Expr, dl,
std::max(DbgSDNodeOrder, ValSDNodeOrder));
- DAG.AddDbgValue(SDV, Val.getNode(), false);
+ DAG.AddDbgValue(SDV, false);
} else
LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
<< "in EmitFuncArgumentDbgValue\n");
} else {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
- auto Undef =
- UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
+ auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
auto SDV =
DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
+ DAG.AddDbgValue(SDV, false);
}
}
DDIV.clear();
}
void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
- Value *V = DDI.getDI()->getValue();
+ // TODO: For the variadic implementation, instead of only checking the fail
+ // state of `handleDebugValue`, we need know specifically which values were
+ // invalid, so that we attempt to salvage only those values when processing
+ // a DIArgList.
+ assert(!DDI.getDI()->hasArgList() &&
+ "Not implemented for variadic dbg_values");
+ Value *V = DDI.getDI()->getValue(0);
DILocalVariable *Var = DDI.getDI()->getVariable();
DIExpression *Expr = DDI.getDI()->getExpression();
DebugLoc DL = DDI.getdl();
DebugLoc InstDL = DDI.getDI()->getDebugLoc();
unsigned SDOrder = DDI.getSDNodeOrder();
-
// Currently we consider only dbg.value intrinsics -- we tell the salvager
// that DW_OP_stack_value is desired.
assert(isa<DbgValueInst>(DDI.getDI()));
bool StackValue = true;
// Can this Value can be encoded without any further work?
- if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder))
+ if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false))
return;
// Attempt to salvage back through as many instructions as possible. Bail if
@@ -1223,20 +1274,27 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// variable. FIXME: Further work could recover those too.
while (isa<Instruction>(V)) {
Instruction &VAsInst = *cast<Instruction>(V);
- DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue);
+ // Temporary "0", awaiting real implementation.
+ SmallVector<Value *, 4> AdditionalValues;
+ DIExpression *SalvagedExpr =
+ salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues);
// If we cannot salvage any further, and haven't yet found a suitable debug
// expression, bail out.
- if (!NewExpr)
+ // TODO: If AdditionalValues isn't empty, then the salvage can only be
+ // represented with a DBG_VALUE_LIST, so we give up. When we have support
+ // here for variadic dbg_values, remove that condition.
+ if (!SalvagedExpr || !AdditionalValues.empty())
break;
// New value and expr now represent this debuginfo.
V = VAsInst.getOperand(0);
- Expr = NewExpr;
+ Expr = SalvagedExpr;
// Some kind of simplification occurred: check whether the operand of the
// salvaged debug expression can be encoded in this DAG.
- if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) {
+ if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder,
+ /*IsVariadic=*/false)) {
LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
<< DDI.getDI() << "\nBy stripping back to:\n " << V);
return;
@@ -1246,9 +1304,9 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// This was the final opportunity to salvage this debug information, and it
// couldn't be done. Place an undef DBG_VALUE at this location to terminate
// any earlier variable location.
- auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
+ auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
+ DAG.AddDbgValue(SDV, false);
LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI()
<< "\n");
@@ -1256,53 +1314,72 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
<< "\n");
}
-bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var,
+bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
+ DILocalVariable *Var,
DIExpression *Expr, DebugLoc dl,
- DebugLoc InstDL, unsigned Order) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDDbgValue *SDV;
- if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
- isa<ConstantPointerNull>(V)) {
- SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
+ DebugLoc InstDL, unsigned Order,
+ bool IsVariadic) {
+ if (Values.empty())
return true;
- }
+ SmallVector<SDDbgOperand> LocationOps;
+ SmallVector<SDNode *> Dependencies;
+ for (const Value *V : Values) {
+ // Constant value.
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
+ isa<ConstantPointerNull>(V)) {
+ LocationOps.emplace_back(SDDbgOperand::fromConst(V));
+ continue;
+ }
- // If the Value is a frame index, we can create a FrameIndex debug value
- // without relying on the DAG at all.
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
- auto SI = FuncInfo.StaticAllocaMap.find(AI);
- if (SI != FuncInfo.StaticAllocaMap.end()) {
- auto SDV =
- DAG.getFrameIndexDbgValue(Var, Expr, SI->second,
- /*IsIndirect*/ false, dl, SDNodeOrder);
- // Do not attach the SDNodeDbgValue to an SDNode: this variable location
- // is still available even if the SDNode gets optimized out.
- DAG.AddDbgValue(SDV, nullptr, false);
- return true;
+ // If the Value is a frame index, we can create a FrameIndex debug value
+ // without relying on the DAG at all.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(SI->second));
+ continue;
+ }
}
- }
- // Do not use getValue() in here; we don't want to generate code at
- // this point if it hasn't been done yet.
- SDValue N = NodeMap[V];
- if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
- N = UnusedArgNodeMap[V];
- if (N.getNode()) {
- if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
- return true;
- SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, N.getNode(), false);
- return true;
- }
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ // Only emit func arg dbg value for non-variadic dbg.values for now.
+ if (!IsVariadic && EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
+ return true;
+ if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
+ // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
+ // describe stack slot locations.
+ //
+ // Consider "int x = 0; int *px = &x;". There are two kinds of
+ // interesting debug values here after optimization:
+ //
+ // dbg.value(i32* %px, !"int *px", !DIExpression()), and
+ // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
+ //
+ // Both describe the direct values of their associated variables.
+ Dependencies.push_back(N.getNode());
+ LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(FISDN->getIndex()));
+ continue;
+ }
+ LocationOps.emplace_back(
+ SDDbgOperand::fromNode(N.getNode(), N.getResNo()));
+ continue;
+ }
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Special rules apply for the first dbg.values of parameter variables in a
+ // function. Identify them by the fact they reference Argument Values, that
+ // they're parameters, and they are parameters of the current function. We
+ // need to let them dangle until they get an SDNode.
+ bool IsParamOfFunc =
+ isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt();
+ if (IsParamOfFunc)
+ return false;
- // Special rules apply for the first dbg.values of parameter variables in a
- // function. Identify them by the fact they reference Argument Values, that
- // they're parameters, and they are parameters of the current function. We
- // need to let them dangle until they get an SDNode.
- bool IsParamOfFunc = isa<Argument>(V) && Var->isParameter() &&
- !InstDL.getInlinedAt();
- if (!IsParamOfFunc) {
// The value is not used in this block yet (or it would have an SDNode).
// We still want the value to appear for the user if possible -- if it has
// an associated VReg, we can refer to that instead.
@@ -1314,6 +1391,9 @@ bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var,
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
V->getType(), None);
if (RFV.occupiesMultipleRegs()) {
+ // FIXME: We could potentially support variadic dbg_values here.
+ if (IsVariadic)
+ return false;
unsigned Offset = 0;
unsigned BitsToDescribe = 0;
if (auto VarSize = Var->getSizeInBits())
@@ -1321,31 +1401,41 @@ bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var,
if (auto Fragment = Expr->getFragmentInfo())
BitsToDescribe = Fragment->SizeInBits;
for (auto RegAndSize : RFV.getRegsAndSizes()) {
- unsigned RegisterSize = RegAndSize.second;
// Bail out if all bits are described already.
if (Offset >= BitsToDescribe)
break;
+ // TODO: handle scalable vectors.
+ unsigned RegisterSize = RegAndSize.second;
unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
- ? BitsToDescribe - Offset
- : RegisterSize;
+ ? BitsToDescribe - Offset
+ : RegisterSize;
auto FragmentExpr = DIExpression::createFragmentExpression(
Expr, Offset, FragmentSize);
if (!FragmentExpr)
- continue;
- SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first,
- false, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
+ continue;
+ SDDbgValue *SDV = DAG.getVRegDbgValue(
+ Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, false);
Offset += RegisterSize;
}
- } else {
- SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
+ return true;
}
- return true;
+ // We can use simple vreg locations for variadic dbg_values as well.
+ LocationOps.emplace_back(SDDbgOperand::fromVReg(Reg));
+ continue;
}
+ // We failed to create a SDDbgOperand for V.
+ return false;
}
- return false;
+ // We have created a SDDbgOperand for each Value in Values.
+ // Should use Order instead of SDNodeOrder?
+ assert(!LocationOps.empty());
+ SDDbgValue *SDV =
+ DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
+ /*IsIndirect=*/false, dl, SDNodeOrder, IsVariadic);
+ DAG.AddDbgValue(SDV, /*isParameter=*/false);
+ return true;
}
void SelectionDAGBuilder::resolveOrClearDbgInfo() {
@@ -1458,9 +1548,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
SmallVector<SDValue, 4> Constants;
- for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
- OI != OE; ++OI) {
- SDNode *Val = getValue(*OI).getNode();
+ for (const Use &U : C->operands()) {
+ SDNode *Val = getValue(U).getNode();
// If the operand is an empty aggregate, there are no values.
if (!Val) continue;
// Add each leaf value from the operand to the Constants list
@@ -1592,6 +1681,8 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
// Update machine-CFG edge.
MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
FuncInfo.MBB->addSuccessor(TargetMBB);
+ TargetMBB->setIsEHCatchretTarget(true);
+ DAG.getMachineFunction().setHasEHCatchret(true);
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsSEH = isAsynchronousEHPersonality(Pers);
@@ -1851,7 +1942,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
I.getOperand(0)->getType(), F->getCallingConv(),
- /*IsVarArg*/ false);
+ /*IsVarArg*/ false, DL);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
@@ -1991,7 +2082,7 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
// If this is an argument, we can export it if the BB is the entry block or
// if it is already exported.
if (isa<Argument>(V)) {
- if (FromBB == &FromBB->getParent()->getEntryBlock())
+ if (FromBB->isEntryBlock())
return true;
// Otherwise, can only export this if it is already exported.
@@ -2782,23 +2873,27 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
- assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
- LLVMContext::OB_gc_transition,
- LLVMContext::OB_gc_live,
- LLVMContext::OB_funclet,
- LLVMContext::OB_cfguardtarget}) &&
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
+ LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
+ LLVMContext::OB_cfguardtarget,
+ LLVMContext::OB_clang_arc_attachedcall}) &&
"Cannot lower invokes with arbitrary operand bundles yet!");
const Value *Callee(I.getCalledOperand());
const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
- visitInlineAsm(I);
+ visitInlineAsm(I, EHPadBB);
else if (Fn && Fn->isIntrinsic()) {
switch (Fn->getIntrinsicID()) {
default:
llvm_unreachable("Cannot invoke this intrinsic");
case Intrinsic::donothing:
// Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ case Intrinsic::seh_try_begin:
+ case Intrinsic::seh_scope_begin:
+ case Intrinsic::seh_try_end:
+ case Intrinsic::seh_scope_end:
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
@@ -2829,7 +2924,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// with deopt state.
LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
} else {
- LowerCallTo(I, getValue(Callee), false, EHPadBB);
+ LowerCallTo(I, getValue(Callee), false, false, EHPadBB);
}
// If the value of the invoke is used outside of its defining block, make it
@@ -4273,7 +4368,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
Base = SDB->getValue(C);
- unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements();
+ ElementCount NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
IndexType = ISD::SIGNED_SCALED;
@@ -4314,7 +4409,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
EVT VT = Src0.getValueType();
Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
->getMaybeAlignValue()
- .getValueOr(DAG.getEVTAlign(VT));
+ .getValueOr(DAG.getEVTAlign(VT.getScalarType()));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
AAMDNodes AAInfo;
@@ -4339,6 +4434,14 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
IndexType = ISD::SIGNED_UNSCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
+
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
+ }
+
SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
Ops, MMO, IndexType, false);
@@ -4424,7 +4527,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
->getMaybeAlignValue()
- .getValueOr(DAG.getEVTAlign(VT));
+ .getValueOr(DAG.getEVTAlign(VT.getScalarType()));
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
@@ -4450,6 +4553,14 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
IndexType = ISD::SIGNED_UNSCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
+
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
+ }
+
SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
Ops, MMO, IndexType, ISD::NON_EXTLOAD);
@@ -4702,6 +4813,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
SDVTList VTs = DAG.getVTList(ValueVTs);
+ // Propagate fast-math-flags from IR to node(s).
+ SDNodeFlags Flags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPMO);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+
// Create the node.
SDValue Result;
if (IsTgtIntrinsic) {
@@ -5377,6 +5494,8 @@ getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
/// If the DbgValueInst is a dbg_value of a function argument, create the
/// corresponding DBG_VALUE machine instruction for it now. At the end of
/// instruction selection, they will be inserted to the entry BB.
+/// We don't currently support this for variadic dbg_values, as they shouldn't
+/// appear for function arguments or in the prologue.
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const Value *V, DILocalVariable *Variable, DIExpression *Expr,
DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
@@ -5384,6 +5503,35 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Arg)
return false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+
+ // Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind
+ // we've been asked to pursue.
+ auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
+ bool Indirect) {
+ if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) {
+ // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
+ // pointing at the VReg, which will be patched up later.
+ auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
+ auto MIB = BuildMI(MF, DL, Inst);
+ MIB.addReg(Reg, RegState::Debug);
+ MIB.addImm(0);
+ MIB.addMetadata(Variable);
+ auto *NewDIExpr = FragExpr;
+ // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
+ // the DIExpression.
+ if (Indirect)
+ NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore);
+ MIB.addMetadata(NewDIExpr);
+ return MIB;
+ } else {
+ // Create a completely standard DBG_VALUE.
+ auto &Inst = TII->get(TargetOpcode::DBG_VALUE);
+ return BuildMI(MF, DL, Inst, Indirect, Reg, Variable, FragExpr);
+ }
+ };
+
if (!IsDbgDeclare) {
// ArgDbgValues are hoisted to the beginning of the entry block. So we
// should only emit as ArgDbgValue if the dbg.value intrinsic is found in
@@ -5449,9 +5597,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
}
- MachineFunction &MF = DAG.getMachineFunction();
- const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
-
bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
@@ -5518,13 +5663,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!FragmentExpr) {
SDDbgValue *SDV = DAG.getConstantDbgValue(
Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
+ DAG.AddDbgValue(SDV, false);
continue;
}
- assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
- RegAndSize.first, Variable, *FragmentExpr));
+ MachineInstr *NewMI =
+ MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, IsDbgDeclare);
+ FuncInfo.ArgDbgValues.push_back(NewMI);
}
};
@@ -5555,11 +5699,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- IsIndirect = (Op->isReg()) ? IsIndirect : true;
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
- *Op, Variable, Expr));
+ MachineInstr *NewMI = nullptr;
+ if (Op->isReg())
+ NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect);
+ else
+ NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op,
+ Variable, Expr);
+
+ FuncInfo.ArgDbgValues.push_back(NewMI);
return true;
}
@@ -5616,7 +5764,7 @@ void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
SDValue Callee = DAG.getExternalSymbol(
FunctionName,
DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
- LowerCallTo(I, Callee, I.isTailCall());
+ LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
}
/// Given a @llvm.call.preallocated.setup, return the corresponding
@@ -5718,10 +5866,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
/* AlwaysInline */ false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)));
+ MachinePointerInfo(I.getArgOperand(1)), AAInfo);
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5739,10 +5889,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
/* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)));
+ MachinePointerInfo(I.getArgOperand(1)), AAInfo);
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5756,8 +5908,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
- MachinePointerInfo(I.getArgOperand(0)));
+ MachinePointerInfo(I.getArgOperand(0)), AAInfo);
updateDAGForMaybeTailCall(MS);
return;
}
@@ -5775,9 +5929,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)));
+ MachinePointerInfo(I.getArgOperand(1)), AAInfo);
updateDAGForMaybeTailCall(MM);
return;
}
@@ -5859,7 +6015,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
+ // Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
+ // they are non-variadic.
const auto &DI = cast<DbgVariableIntrinsic>(I);
+ assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
@@ -5867,7 +6026,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
<< "\n");
// Check if address has undef value.
- const Value *Address = DI.getVariableLocation();
+ const Value *Address = DI.getVariableLocationOp(0);
if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
@@ -5898,8 +6057,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (FI != std::numeric_limits<int>::max()) {
if (Intrinsic == Intrinsic::dbg_addr) {
SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
- Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
+ Variable, Expression, FI, getRoot().getNode(), /*IsIndirect*/ true,
+ dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, isParameter);
} else {
LLVM_DEBUG(dbgs() << "Skipping " << DI
<< " (variable info stashed in MF side table)\n");
@@ -5931,7 +6091,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, dl, SDNodeOrder);
}
- DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+ DAG.AddDbgValue(SDV, isParameter);
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
@@ -5960,20 +6120,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
- const Value *V = DI.getValue();
- if (!V)
+ SmallVector<Value *, 4> Values(DI.getValues());
+ if (Values.empty())
return;
- if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(),
- SDNodeOrder))
+ if (std::count(Values.begin(), Values.end(), nullptr))
return;
- // TODO: Dangling debug info will eventually either be resolved or produce
- // an Undef DBG_VALUE. However in the resolution case, a gap may appear
- // between the original dbg.value location and its resolved DBG_VALUE, which
- // we should ideally fill with an extra Undef DBG_VALUE.
-
- DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
+ bool IsVariadic = DI.hasArgList();
+ if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(),
+ SDNodeOrder, IsVariadic))
+ addDanglingDebugInfo(&DI, dl, SDNodeOrder);
return;
}
@@ -6165,6 +6322,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
+ case Intrinsic::arithmetic_fence: {
+ setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)), Flags));
+ return;
+ }
case Intrinsic::fma:
setValue(&I, DAG.getNode(
ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(),
@@ -6215,19 +6378,25 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)))));
return;
case Intrinsic::fptosi_sat: {
- EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType());
- SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32);
- setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, Type,
- getValue(I.getArgOperand(0)), SatW));
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, VT,
+ getValue(I.getArgOperand(0)),
+ DAG.getValueType(VT.getScalarType())));
return;
}
case Intrinsic::fptoui_sat: {
- EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType());
- SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32);
- setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, Type,
- getValue(I.getArgOperand(0)), SatW));
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, VT,
+ getValue(I.getArgOperand(0)),
+ DAG.getValueType(VT.getScalarType())));
return;
}
+ case Intrinsic::set_rounding:
+ Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
+ {getRoot(), getValue(I.getArgOperand(0))});
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(0));
+ return;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
@@ -6642,9 +6811,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SmallVector<const Value *, 4> Allocas;
getUnderlyingObjects(ObjectPtr, Allocas);
- for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(),
- E = Allocas.end(); Object != E; ++Object) {
- const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
+ for (const Value *Alloca : Allocas) {
+ const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Alloca);
// Could not find an Alloca.
if (!LifetimeObject)
@@ -6688,6 +6856,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
lowerCallToExternalSymbol(I, FunctionName);
return;
case Intrinsic::donothing:
+ case Intrinsic::seh_try_begin:
+ case Intrinsic::seh_scope_begin:
+ case Intrinsic::seh_try_end:
+ case Intrinsic::seh_scope_end:
// ignore
return;
case Intrinsic::experimental_stackmap:
@@ -6849,7 +7021,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_deoptimize:
LowerDeoptimizeCall(&I);
return;
-
+ case Intrinsic::experimental_stepvector:
+ visitStepVector(I);
+ return;
case Intrinsic::vector_reduce_fadd:
case Intrinsic::vector_reduce_fmul:
case Intrinsic::vector_reduce_add:
@@ -6986,6 +7160,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
SDValue Index = getValue(I.getOperand(2));
+
+ // The intrinsic's index type is i64, but the SDNode requires an index type
+ // suitable for the target. Convert the index as required.
+ MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ if (Index.getValueType() != VectorIdxTy)
+ Index = DAG.getVectorIdxConstant(
+ cast<ConstantSDNode>(Index)->getZExtValue(), DL);
+
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
Index));
@@ -6998,9 +7180,22 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Index = getValue(I.getOperand(1));
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ // The intrinsic's index type is i64, but the SDNode requires an index type
+ // suitable for the target. Convert the index as required.
+ MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ if (Index.getValueType() != VectorIdxTy)
+ Index = DAG.getVectorIdxConstant(
+ cast<ConstantSDNode>(Index)->getZExtValue(), DL);
+
setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
return;
}
+ case Intrinsic::experimental_vector_reverse:
+ visitVectorReverse(I);
+ return;
+ case Intrinsic::experimental_vector_splice:
+ visitVectorSplice(I);
+ return;
}
}
@@ -7104,7 +7299,10 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
- Opers.push_back(DAG.getCondCode(getFCmpCondCode(FPCmp->getPredicate())));
+ ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate());
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ Opers.push_back(DAG.getCondCode(Condition));
break;
}
}
@@ -7134,6 +7332,7 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
const VPIntrinsic &VPIntrin) {
+ SDLoc DL = getCurSDLoc();
unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
SmallVector<EVT, 4> ValueVTs;
@@ -7141,46 +7340,92 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
SDVTList VTs = DAG.getVTList(ValueVTs);
+ auto EVLParamPos =
+ VPIntrinsic::getVectorLengthParamPos(VPIntrin.getIntrinsicID());
+
+ MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
+ assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
+ "Unexpected target EVL type");
+
// Request operands.
SmallVector<SDValue, 7> OpValues;
- for (int i = 0; i < (int)VPIntrin.getNumArgOperands(); ++i)
- OpValues.push_back(getValue(VPIntrin.getArgOperand(i)));
+ for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) {
+ auto Op = getValue(VPIntrin.getArgOperand(I));
+ if (I == EVLParamPos)
+ Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
+ OpValues.push_back(Op);
+ }
- SDLoc DL = getCurSDLoc();
SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
setValue(&VPIntrin, Result);
}
-std::pair<SDValue, SDValue>
-SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
- const BasicBlock *EHPadBB) {
+SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
+ const BasicBlock *EHPadBB,
+ MCSymbol *&BeginLabel) {
MachineFunction &MF = DAG.getMachineFunction();
MachineModuleInfo &MMI = MF.getMMI();
- MCSymbol *BeginLabel = nullptr;
- if (EHPadBB) {
- // Insert a label before the invoke call to mark the try range. This can be
- // used to detect deletion of the invoke via the MachineModuleInfo.
- BeginLabel = MMI.getContext().createTempSymbol();
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI.getContext().createTempSymbol();
- // For SjLj, keep track of which landing pads go with which invokes
- // so as to maintain the ordering of pads in the LSDA.
- unsigned CallSiteIndex = MMI.getCurrentCallSite();
- if (CallSiteIndex) {
- MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
- LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
+ // For SjLj, keep track of which landing pads go with which invokes
+ // so as to maintain the ordering of pads in the LSDA.
+ unsigned CallSiteIndex = MMI.getCurrentCallSite();
+ if (CallSiteIndex) {
+ MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
- // Now that the call site is handled, stop tracking it.
- MMI.setCurrentCallSite(0);
- }
+ // Now that the call site is handled, stop tracking it.
+ MMI.setCurrentCallSite(0);
+ }
+
+ return DAG.getEHLabel(getCurSDLoc(), Chain, BeginLabel);
+}
+
+SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II,
+ const BasicBlock *EHPadBB,
+ MCSymbol *BeginLabel) {
+ assert(BeginLabel && "BeginLabel should've been set");
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineModuleInfo &MMI = MF.getMMI();
+
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
+ Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel);
+
+ // Inform MachineModuleInfo of range.
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ // There is a platform (e.g. wasm) that uses funclet style IR but does not
+ // actually use outlined funclets and their LSDA info style.
+ if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
+ assert(II && "II should've been set");
+ WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
+ EHInfo->addIPToStateRange(II, BeginLabel, EndLabel);
+ } else if (!isScopedEHPersonality(Pers)) {
+ assert(EHPadBB);
+ MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+ }
+
+ return Chain;
+}
+
+std::pair<SDValue, SDValue>
+SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB) {
+ MCSymbol *BeginLabel = nullptr;
+
+ if (EHPadBB) {
// Both PendingLoads and PendingExports must be flushed here;
// this call might not return.
(void)getRoot();
- DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel));
-
+ DAG.setRoot(lowerStartEH(getControlRoot(), EHPadBB, BeginLabel));
CLI.setChain(getRoot());
}
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
@@ -7202,22 +7447,8 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
}
if (EHPadBB) {
- // Insert a label at the end of the invoke call to mark the try range. This
- // can be used to detect deletion of the invoke via the MachineModuleInfo.
- MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
- DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
-
- // Inform MachineModuleInfo of range.
- auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
- // There is a platform (e.g. wasm) that uses funclet style IR but does not
- // actually use outlined funclets and their LSDA info style.
- if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
- assert(CLI.CB);
- WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
- EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CB), BeginLabel, EndLabel);
- } else if (!isScopedEHPersonality(Pers)) {
- MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
- }
+ DAG.setRoot(lowerEndEH(getRoot(), cast_or_null<InvokeInst>(CLI.CB), EHPadBB,
+ BeginLabel));
}
return Result;
@@ -7225,6 +7456,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
bool isTailCall,
+ bool isMustTailCall,
const BasicBlock *EHPadBB) {
auto &DL = DAG.getDataLayout();
FunctionType *FTy = CB.getFunctionType();
@@ -7241,7 +7473,7 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
// attribute.
auto *Caller = CB.getParent()->getParent();
if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
- "true")
+ "true" && !isMustTailCall)
isTailCall = false;
// We can't tail call inside a function with a swifterror argument. Lowering
@@ -7528,10 +7760,12 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// because the return pointer needs to be adjusted by the size of
// the copied memory.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
/*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)));
+ MachinePointerInfo(I.getArgOperand(1)), AAInfo);
assert(MC.getNode() != nullptr &&
"** memcpy should not be lowered as TailCall in mempcpy context **");
DAG.setRoot(MC);
@@ -7851,7 +8085,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// CFGuardTarget bundles are lowered in LowerCallTo.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
- LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) &&
+ LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
+ LLVMContext::OB_clang_arc_attachedcall}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledOperand());
@@ -7862,7 +8097,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check if we can potentially perform a tail call. More detailed checking
// is be done within LowerCallTo, after more information about the call is
// known.
- LowerCallTo(I, Callee, I.isTailCall());
+ LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
}
namespace {
@@ -8055,7 +8290,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// remember that AX is actually i16 to get the right extension.
const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
- if (OpInfo.ConstraintVT != MVT::Other) {
+ if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) {
// If this is an FP operand in an integer register (or visa versa), or more
// generally if the operand value disagrees with the register class we plan
// to stick it in, fix the operand type.
@@ -8102,7 +8337,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// Initialize NumRegs.
unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other)
- NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+ NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RegVT);
// If this is a constraint for a specific physical register, like {r17},
// assign it now.
@@ -8186,7 +8421,8 @@ public:
} // end anonymous namespace
/// visitInlineAsm - Handle a call to an InlineAsm object.
-void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) {
+void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
+ const BasicBlock *EHPadBB) {
const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
/// ConstraintOperands - Information about all of the constraints.
@@ -8274,19 +8510,28 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) {
ExtraInfo.update(T);
}
-
// We won't need to flush pending loads if this asm doesn't touch
// memory and is nonvolatile.
SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
+ bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow();
+ if (EmitEHLabels) {
+ assert(EHPadBB && "InvokeInst must have an EHPadBB");
+ }
bool IsCallBr = isa<CallBrInst>(Call);
- if (IsCallBr) {
- // If this is a callbr we need to flush pending exports since inlineasm_br
- // is a terminator. We need to do this before nodes are glued to
- // the inlineasm_br node.
+
+ if (IsCallBr || EmitEHLabels) {
+ // If this is a callbr or invoke we need to flush pending exports since
+ // inlineasm_br and invoke are terminators.
+ // We need to do this before nodes are glued to the inlineasm_br node.
Chain = getControlRoot();
}
+ MCSymbol *BeginLabel = nullptr;
+ if (EmitEHLabels) {
+ Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
+ }
+
// Second pass over the constraints: compute which constraint option to use.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
// If this is an output operand with a matching input operand, look up the
@@ -8425,21 +8670,18 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) {
return;
}
- MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
SmallVector<unsigned, 4> Regs;
-
- if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) {
- unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
- MachineRegisterInfo &RegInfo =
- DAG.getMachineFunction().getRegInfo();
- for (unsigned i = 0; i != NumRegs; ++i)
- Regs.push_back(RegInfo.createVirtualRegister(RC));
- } else {
- emitInlineAsmError(Call,
- "inline asm error: This value type register "
- "class is not natively supported!");
- return;
- }
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
+ Register TiedReg = R->getReg();
+ MVT RegVT = R->getSimpleValueType(0);
+ const TargetRegisterClass *RC = TiedReg.isVirtual() ?
+ MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg);
+ unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(MRI.createVirtualRegister(RC));
RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
@@ -8677,8 +8919,13 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) {
if (!OutChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
+ if (EmitEHLabels) {
+ Chain = lowerEndEH(Chain, cast<InvokeInst>(&Call), EHPadBB, BeginLabel);
+ }
+
// Only Update Root if inline assembly has a memory effect.
- if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr)
+ if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr ||
+ EmitEHLabels)
DAG.setRoot(Chain);
}
@@ -9217,6 +9464,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.IsByRef = false;
Entry.IsReturned = false;
Entry.IsSwiftSelf = false;
+ Entry.IsSwiftAsync = false;
Entry.IsSwiftError = false;
Entry.IsCFGuardTarget = false;
Entry.Alignment = Alignment;
@@ -9229,7 +9477,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
CLI.IsTailCall = false;
} else {
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
- CLI.RetTy, CLI.CallConv, CLI.IsVarArg);
+ CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
ISD::ArgFlagsTy Flags;
if (NeedsRegBlock) {
@@ -9287,9 +9535,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// FIXME: Split arguments if CLI.IsPostTypeLegalization
Type *FinalType = Args[i].Ty;
if (Args[i].IsByVal)
- FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
+ FinalType = Args[i].IndirectType;
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
- FinalType, CLI.CallConv, CLI.IsVarArg);
+ FinalType, CLI.CallConv, CLI.IsVarArg, DL);
for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
++Value) {
EVT VT = ValueVTs[Value];
@@ -9302,6 +9550,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
+ Flags.setOrigAlign(OriginalAlignment);
if (Args[i].Ty->isPointerTy()) {
Flags.setPointer();
@@ -9329,6 +9578,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setSRet();
if (Args[i].IsSwiftSelf)
Flags.setSwiftSelf();
+ if (Args[i].IsSwiftAsync)
+ Flags.setSwiftAsync();
if (Args[i].IsSwiftError)
Flags.setSwiftError();
if (Args[i].IsCFGuardTarget)
@@ -9355,27 +9606,26 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// in the various CC lowering callbacks.
Flags.setByVal();
}
+ Align MemAlign;
if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
- PointerType *Ty = cast<PointerType>(Args[i].Ty);
- Type *ElementTy = Ty->getElementType();
-
- unsigned FrameSize = DL.getTypeAllocSize(
- Args[i].ByValType ? Args[i].ByValType : ElementTy);
+ unsigned FrameSize = DL.getTypeAllocSize(Args[i].IndirectType);
Flags.setByValSize(FrameSize);
// info is not there but there are cases it cannot get right.
- Align FrameAlign;
if (auto MA = Args[i].Alignment)
- FrameAlign = *MA;
+ MemAlign = *MA;
else
- FrameAlign = Align(getByValTypeAlignment(ElementTy, DL));
- Flags.setByValAlign(FrameAlign);
+ MemAlign = Align(getByValTypeAlignment(Args[i].IndirectType, DL));
+ } else if (auto MA = Args[i].Alignment) {
+ MemAlign = *MA;
+ } else {
+ MemAlign = OriginalAlignment;
}
+ Flags.setMemAlign(MemAlign);
if (Args[i].IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- Flags.setOrigAlign(OriginalAlignment);
MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
@@ -9660,8 +9910,9 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
// We will look through cast uses, so ignore them completely.
if (I.isCast())
continue;
- // Ignore debug info intrinsics, they don't escape or store to allocas.
- if (isa<DbgInfoIntrinsic>(I))
+ // Ignore debug info and pseudo op intrinsics, they don't escape or store
+ // to allocas.
+ if (I.isDebugOrPseudoInst())
continue;
// This is an unknown instruction. Assume it escapes or writes to all
// static alloca operands.
@@ -9688,13 +9939,17 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
continue;
// Check if the stored value is an argument, and that this store fully
- // initializes the alloca. Don't elide copies from the same argument twice.
+ // initializes the alloca.
+ // If the argument type has padding bits we can't directly forward a pointer
+ // as the upper bits may contain garbage.
+ // Don't elide copies from the same argument twice.
const Value *Val = SI->getValueOperand()->stripPointerCasts();
const auto *Arg = dyn_cast<Argument>(Val);
if (!Arg || Arg->hasPassPointeeByValueCopyAttr() ||
Arg->getType()->isEmptyTy() ||
DL.getTypeStoreSize(Arg->getType()) !=
DL.getTypeAllocSize(AI->getAllocatedType()) ||
+ !DL.typeSizeEqualsStoreSize(Arg->getType()) ||
ArgCopyElisionCandidates.count(Arg)) {
*Info = StaticAllocaInfo::Clobbered;
continue;
@@ -9829,18 +10084,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (Arg.hasAttribute(Attribute::ByVal))
FinalType = Arg.getParamByValType();
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
- FinalType, F.getCallingConv(), F.isVarArg());
+ FinalType, F.getCallingConv(), F.isVarArg(), DL);
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
- // Certain targets (such as MIPS), may have a different ABI alignment
- // for a type depending on the context. Give the target a chance to
- // specify the alignment it wants.
- const Align OriginalAlignment(
- TLI->getABIAlignmentForCallingConv(ArgTy, DL));
if (Arg.getType()->isPointerTy()) {
Flags.setPointer();
@@ -9868,6 +10118,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setSRet();
if (Arg.hasAttribute(Attribute::SwiftSelf))
Flags.setSwiftSelf();
+ if (Arg.hasAttribute(Attribute::SwiftAsync))
+ Flags.setSwiftAsync();
if (Arg.hasAttribute(Attribute::SwiftError))
Flags.setSwiftError();
if (Arg.hasAttribute(Attribute::ByVal))
@@ -9893,6 +10145,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setByVal();
}
+ // Certain targets (such as MIPS), may have a different ABI alignment
+ // for a type depending on the context. Give the target a chance to
+ // specify the alignment it wants.
+ const Align OriginalAlignment(
+ TLI->getABIAlignmentForCallingConv(ArgTy, DL));
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Align MemAlign;
Type *ArgMemTy = nullptr;
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
Flags.isByRef()) {
@@ -9904,24 +10164,27 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// For in-memory arguments, size and alignment should be passed from FE.
// BE will guess if this info is not there but there are cases it cannot
// get right.
- MaybeAlign MemAlign = Arg.getParamAlign();
- if (!MemAlign)
+ if (auto ParamAlign = Arg.getParamStackAlign())
+ MemAlign = *ParamAlign;
+ else if ((ParamAlign = Arg.getParamAlign()))
+ MemAlign = *ParamAlign;
+ else
MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL));
-
- if (Flags.isByRef()) {
+ if (Flags.isByRef())
Flags.setByRefSize(MemSize);
- Flags.setByRefAlign(*MemAlign);
- } else {
+ else
Flags.setByValSize(MemSize);
- Flags.setByValAlign(*MemAlign);
- }
+ } else if (auto ParamAlign = Arg.getParamStackAlign()) {
+ MemAlign = *ParamAlign;
+ } else {
+ MemAlign = OriginalAlignment;
}
+ Flags.setMemAlign(MemAlign);
if (Arg.hasAttribute(Attribute::Nest))
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- Flags.setOrigAlign(OriginalAlignment);
if (ArgCopyElisionCandidates.count(&Arg))
Flags.setCopyElisionCandidate();
if (Arg.hasAttribute(Attribute::Returned))
@@ -10807,6 +11070,36 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
}
}
+void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto DL = getCurSDLoc();
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getStepVector(DL, ResultVT));
+}
+
+void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ SDLoc DL = getCurSDLoc();
+ SDValue V = getValue(I.getOperand(0));
+ assert(VT == V.getValueType() && "Malformed vector.reverse!");
+
+ if (VT.isScalableVector()) {
+ setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
+ return;
+ }
+
+ // Use VECTOR_SHUFFLE for the fixed-length vector
+ // to maintain existing behavior.
+ SmallVector<int, 8> Mask;
+ unsigned NumElts = VT.getVectorMinNumElements();
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(NumElts - 1 - i);
+
+ setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
+}
+
void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
@@ -10824,3 +11117,37 @@ void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ValueVTs), Values));
}
+
+void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ SDLoc DL = getCurSDLoc();
+ SDValue V1 = getValue(I.getOperand(0));
+ SDValue V2 = getValue(I.getOperand(1));
+ int64_t Imm = cast<ConstantInt>(I.getOperand(2))->getSExtValue();
+
+ // VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
+ if (VT.isScalableVector()) {
+ MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2,
+ DAG.getConstant(Imm, DL, IdxVT)));
+ return;
+ }
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ if ((-Imm > NumElts) || (Imm >= NumElts)) {
+ // Result is undefined if immediate is out-of-bounds.
+ setValue(&I, DAG.getUNDEF(VT));
+ return;
+ }
+
+ uint64_t Idx = (NumElts + Imm) % NumElts;
+
+ // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Mask.push_back(Idx + i);
+ setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask));
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8f6e98c40161..df5be156821f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -492,6 +492,10 @@ public:
/// of the specified type Ty. Return empty SDValue() otherwise.
SDValue getCopyFromRegs(const Value *V, Type *Ty);
+ /// Register a dbg_value which relies on a Value which we have not yet seen.
+ void addDanglingDebugInfo(const DbgValueInst *DI, DebugLoc DL,
+ unsigned Order);
+
/// If we have dangling debug info that describes \p Variable, or an
/// overlapping part of variable considering the \p Expr, then this method
/// will drop that debug info as it isn't valid any longer.
@@ -507,11 +511,11 @@ public:
/// this cannot be done, produce an Undef debug value record.
void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI);
- /// For a given Value, attempt to create and record a SDDbgValue in the
- /// SelectionDAG.
- bool handleDebugValue(const Value *V, DILocalVariable *Var,
- DIExpression *Expr, DebugLoc CurDL,
- DebugLoc InstDL, unsigned Order);
+ /// For a given list of Values, attempt to create and record a SDDbgValue in
+ /// the SelectionDAG.
+ bool handleDebugValue(ArrayRef<const Value *> Values, DILocalVariable *Var,
+ DIExpression *Expr, DebugLoc CurDL, DebugLoc InstDL,
+ unsigned Order, bool IsVariadic);
/// Evict any dangling debug information, attempting to salvage it first.
void resolveOrClearDbgInfo();
@@ -549,7 +553,7 @@ public:
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
void LowerCallTo(const CallBase &CB, SDValue Callee, bool IsTailCall,
- const BasicBlock *EHPadBB = nullptr);
+ bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr);
// Lower range metadata from 0 to N to assert zext to an integer of nearest
// floor power of two.
@@ -755,7 +759,8 @@ private:
void visitStoreToSwiftError(const StoreInst &I);
void visitFreeze(const FreezeInst &I);
- void visitInlineAsm(const CallBase &Call);
+ void visitInlineAsm(const CallBase &Call,
+ const BasicBlock *EHPadBB = nullptr);
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
@@ -773,6 +778,9 @@ private:
void visitGCResult(const GCResultInst &I);
void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
+ void visitVectorReverse(const CallInst &I);
+ void visitVectorSplice(const CallInst &I);
+ void visitStepVector(const CallInst &I);
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
@@ -809,6 +817,11 @@ private:
/// Lowers CallInst to an external symbol.
void lowerCallToExternalSymbol(const CallInst &I, const char *FunctionName);
+
+ SDValue lowerStartEH(SDValue Chain, const BasicBlock *EHPadBB,
+ MCSymbol *&BeginLabel);
+ SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
+ const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
};
/// This struct represents the registers (physical or virtual)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index d867f3e09e9c..40083c614a6c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -145,7 +145,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
- return Intrinsic::getName((Intrinsic::ID)IID, None);
+ return Intrinsic::getBaseName((Intrinsic::ID)IID).str();
else if (!G)
return "Unknown intrinsic";
else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
@@ -231,6 +231,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::MUL: return "mul";
case ISD::MULHU: return "mulhu";
case ISD::MULHS: return "mulhs";
+ case ISD::ABDS: return "abds";
+ case ISD::ABDU: return "abdu";
case ISD::SDIV: return "sdiv";
case ISD::UDIV: return "udiv";
case ISD::SREM: return "srem";
@@ -288,7 +290,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::VECTOR_SPLICE: return "vector_splice";
case ISD::SPLAT_VECTOR: return "splat_vector";
+ case ISD::SPLAT_VECTOR_PARTS: return "splat_vector_parts";
+ case ISD::VECTOR_REVERSE: return "vector_reverse";
+ case ISD::STEP_VECTOR: return "step_vector";
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
@@ -336,7 +342,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::TRUNCATE: return "truncate";
case ISD::FP_ROUND: return "fp_round";
case ISD::STRICT_FP_ROUND: return "strict_fp_round";
- case ISD::FLT_ROUNDS_: return "flt_rounds";
case ISD::FP_EXTEND: return "fp_extend";
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
@@ -410,6 +415,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::PREALLOCATED_ARG:
return "call_alloc";
+ // Floating point environment manipulation
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::SET_ROUNDING: return "set_rounding";
+
// Bit manipulation
case ISD::ABS: return "abs";
case ISD::BITREVERSE: return "bitreverse";
@@ -828,26 +837,38 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
LLVM_DUMP_METHOD void SDDbgValue::print(raw_ostream &OS) const {
OS << " DbgVal(Order=" << getOrder() << ')';
- if (isInvalidated()) OS << "(Invalidated)";
- if (isEmitted()) OS << "(Emitted)";
- switch (getKind()) {
- case SDNODE:
- if (getSDNode())
- OS << "(SDNODE=" << PrintNodeId(*getSDNode()) << ':' << getResNo() << ')';
- else
- OS << "(SDNODE)";
- break;
- case CONST:
- OS << "(CONST)";
- break;
- case FRAMEIX:
- OS << "(FRAMEIX=" << getFrameIx() << ')';
- break;
- case VREG:
- OS << "(VREG=" << getVReg() << ')';
- break;
+ if (isInvalidated())
+ OS << "(Invalidated)";
+ if (isEmitted())
+ OS << "(Emitted)";
+ OS << "(";
+ bool Comma = false;
+ for (const SDDbgOperand &Op : getLocationOps()) {
+ if (Comma)
+ OS << ", ";
+ switch (Op.getKind()) {
+ case SDDbgOperand::SDNODE:
+ if (Op.getSDNode())
+ OS << "SDNODE=" << PrintNodeId(*Op.getSDNode()) << ':' << Op.getResNo();
+ else
+ OS << "SDNODE";
+ break;
+ case SDDbgOperand::CONST:
+ OS << "CONST";
+ break;
+ case SDDbgOperand::FRAMEIX:
+ OS << "FRAMEIX=" << Op.getFrameIx();
+ break;
+ case SDDbgOperand::VREG:
+ OS << "VREG=" << Op.getVReg();
+ break;
+ }
+ Comma = true;
}
+ OS << ")";
if (isIndirect()) OS << "(Indirect)";
+ if (isVariadic())
+ OS << "(Variadic)";
OS << ":\"" << Var->getName() << '"';
#ifndef NDEBUG
if (Expr->getNumElements())
@@ -892,12 +913,10 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
LLVM_DUMP_METHOD void SelectionDAG::dump() const {
dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
- for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
- I != E; ++I) {
- const SDNode *N = &*I;
- if (!N->hasOneUse() && N != getRoot().getNode() &&
- (!shouldPrintInline(*N, this) || N->use_empty()))
- DumpNodes(N, 2, this);
+ for (const SDNode &N : allnodes()) {
+ if (!N.hasOneUse() && &N != getRoot().getNode() &&
+ (!shouldPrintInline(N, this) || N.use_empty()))
+ DumpNodes(&N, 2, this);
}
if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 7bae5048fc0e..1415cce3b1df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -270,6 +270,10 @@ namespace llvm {
return createHybridListDAGScheduler(IS, OptLevel);
if (TLI->getSchedulingPreference() == Sched::VLIW)
return createVLIWDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::Fast)
+ return createFastDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::Linearize)
+ return createDAGLinearizer(IS, OptLevel);
assert(TLI->getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
return createILPListDAGScheduler(IS, OptLevel);
@@ -571,8 +575,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LiveInMap.insert(LI);
// Insert DBG_VALUE instructions for function arguments to the entry block.
+ bool InstrRef = TM.Options.ValueTrackingVariableLocations;
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
- MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
+ MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];
+ assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
+ "Function parameters should not be described by DBG_VALUE_LIST.");
bool hasFI = MI->getOperand(0).isFI();
Register Reg =
hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
@@ -589,6 +596,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
<< Register::virtReg2Index(Reg) << "\n");
}
+ // Don't try and extend through copies in instruction referencing mode.
+ if (InstrRef)
+ continue;
+
// If Reg is live-in then update debug info to track its copy in a vreg.
DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
if (LDI != LiveInMap.end()) {
@@ -605,6 +616,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
"DBG_VALUE with nonzero offset");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
+ assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
+ "Didn't expect to see a DBG_VALUE_LIST here");
// Def is never a terminator here, so it is ok to increment InsertPos.
BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
IsIndirect, LDI->second, Variable, Expr);
@@ -638,6 +651,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
}
+ // For debug-info, in instruction referencing mode, we need to perform some
+ // post-isel maintenence.
+ MF->finalizeDebugInstrRefs();
+
// Determine if there are any calls in this machine function.
MachineFrameInfo &MFI = MF->getFrameInfo();
for (const auto &MBB : *MF) {
@@ -1419,9 +1436,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
for (const BasicBlock *LLVMBB : RPOT) {
if (OptLevel != CodeGenOpt::None) {
bool AllPredsVisited = true;
- for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
- PI != PE; ++PI) {
- if (!FuncInfo->VisitedBBs.count(*PI)) {
+ for (const BasicBlock *Pred : predecessors(LLVMBB)) {
+ if (!FuncInfo->VisitedBBs.count(Pred)) {
AllPredsVisited = false;
break;
}
@@ -1691,9 +1707,9 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
/// terminator, but additionally the copies that move the vregs into the
/// physical registers.
static MachineBasicBlock::iterator
-FindSplitPointForStackProtector(MachineBasicBlock *BB) {
+FindSplitPointForStackProtector(MachineBasicBlock *BB,
+ const TargetInstrInfo &TII) {
MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
- //
if (SplitPoint == BB->begin())
return SplitPoint;
@@ -1701,6 +1717,31 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) {
MachineBasicBlock::iterator Previous = SplitPoint;
--Previous;
+ if (TII.isTailCall(*SplitPoint) &&
+ Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // call itself, then we must insert before the sequence even starts. For
+ // example:
+ // <split point>
+ // ADJCALLSTACKDOWN ...
+ // <Moves>
+ // ADJCALLSTACKUP ...
+ // TAILJMP somewhere
+ // On the other hand, it could be an unrelated call in which case this tail call
+ // has to register moves of its own and should be the split point. For example:
+ // ADJCALLSTACKDOWN
+ // CALL something_else
+ // ADJCALLSTACKUP
+ // <split point>
+ // TAILJMP somewhere
+ do {
+ --Previous;
+ if (Previous->isCall())
+ return SplitPoint;
+ } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
+
+ return Previous;
+ }
+
while (MIIsInTerminatorSequence(*Previous)) {
SplitPoint = Previous;
if (Previous == Start)
@@ -1740,7 +1781,7 @@ SelectionDAGISel::FinishBasicBlock() {
// Add load and check to the basicblock.
FuncInfo->MBB = ParentMBB;
FuncInfo->InsertPt =
- FindSplitPointForStackProtector(ParentMBB);
+ FindSplitPointForStackProtector(ParentMBB, *TII);
SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
@@ -1759,7 +1800,7 @@ SelectionDAGISel::FinishBasicBlock() {
// register allocation issues caused by us splitting the parent mbb. The
// register allocator will clean up said virtual copies later on.
MachineBasicBlock::iterator SplitPoint =
- FindSplitPointForStackProtector(ParentMBB);
+ FindSplitPointForStackProtector(ParentMBB, *TII);
// Splice the terminator of ParentMBB into SuccessMBB.
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
@@ -2293,6 +2334,11 @@ void SelectionDAGISel::Select_FREEZE(SDNode *N) {
N->getOperand(0));
}
+void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::ARITH_FENCE, N->getValueType(0),
+ N->getOperand(0));
+}
+
/// GetVBR - decode a vbr encoding whose top bit is set.
LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
@@ -2579,6 +2625,17 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
}
+// Bit 0 stores the sign of the immediate. The upper bits contain the magnitude
+// shifted left by 1.
+static uint64_t decodeSignRotatedValue(uint64_t V) {
+ if ((V & 1) == 0)
+ return V >> 1;
+ if (V != 1)
+ return -(V >> 1);
+ // There is no such thing as -0 with integers. "-0" really means MININT.
+ return 1ULL << 63;
+}
+
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
@@ -2586,6 +2643,8 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
+ Val = decodeSignRotatedValue(Val);
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
return C && C->getSExtValue() == Val;
}
@@ -2831,6 +2890,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::FREEZE:
Select_FREEZE(NodeToMatch);
return;
+ case ISD::ARITH_FENCE:
+ Select_ARITH_FENCE(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
@@ -3239,12 +3301,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
- case OPC_EmitInteger: {
+ case OPC_EmitInteger:
+ case OPC_EmitStringInteger: {
MVT::SimpleValueType VT =
(MVT::SimpleValueType)MatcherTable[MatcherIndex++];
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
+ if (Opcode == OPC_EmitInteger)
+ Val = decodeSignRotatedValue(Val);
RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch),
VT), nullptr));
@@ -3729,7 +3794,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
unsigned iid =
cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
if (iid < Intrinsic::num_intrinsics)
- Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid, None);
+ Msg << "intrinsic %" << Intrinsic::getBaseName((Intrinsic::ID)iid);
else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
Msg << "target intrinsic %" << TII->getName(iid);
else
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 059a6baf967a..d022e2a23ea0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -204,7 +204,7 @@ void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
/// Used from getNodeAttributes.
-const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
#ifndef NDEBUG
std::map<const SDNode *, std::string>::const_iterator I =
NodeGraphAttrs.find(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 0172646c22ec..a903c2401264 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -21,7 +21,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -33,6 +32,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GCStrategy.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
@@ -72,10 +72,6 @@ cl::opt<unsigned> MaxRegistersForGCPointers(
"max-registers-for-gc-values", cl::Hidden, cl::init(0),
cl::desc("Max number of VRegs allowed to pass GC pointer meta args in"));
-cl::opt<bool> AlwaysSpillBase("statepoint-always-spill-base", cl::Hidden,
- cl::init(true),
- cl::desc("Force spilling of base GC pointers"));
-
typedef FunctionLoweringInfo::StatepointRelocationRecord RecordType;
static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
@@ -113,7 +109,9 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
unsigned SpillSize = ValueType.getStoreSize();
- assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?");
+ assert((SpillSize * 8) ==
+ (-8u & (7 + ValueType.getSizeInBits())) && // Round up modulo 8.
+ "Size not in bytes?");
// First look for a previously created stack slot which is not in
// use (accounting for the fact arbitrary slots may already be
@@ -386,7 +384,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// (i.e. change the '==' in the assert below to a '>=').
MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
assert((MFI.getObjectSize(Index) * 8) ==
- (int64_t)Incoming.getValueSizeInBits() &&
+ (-8 & (7 + // Round up modulo 8.
+ (int64_t)Incoming.getValueSizeInBits())) &&
"Bad spill: stack slot does not match!");
// Note: Using the alignment of the spill slot (rather than the abi or
@@ -489,6 +488,18 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
}
+/// Return true if value V represents the GC value. The behavior is conservative
+/// in case it is not sure that value is not GC the function returns true.
+static bool isGCValue(const Value *V, SelectionDAGBuilder &Builder) {
+ auto *Ty = V->getType();
+ if (!Ty->isPtrOrPtrVectorTy())
+ return false;
+ if (auto *GFI = Builder.GFI)
+ if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty))
+ return *IsManaged;
+ return true; // conservative
+}
+
/// Lower deopt state and gc pointer arguments of the statepoint. The actual
/// lowering is described in lowerIncomingStatepointValue. This function is
/// responsible for lowering everything in the right position and playing some
@@ -607,18 +618,11 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
LLVM_DEBUG(dbgs() << LowerAsVReg.size() << " pointers will go in vregs\n");
- auto isGCValue = [&](const Value *V) {
- auto *Ty = V->getType();
- if (!Ty->isPtrOrPtrVectorTy())
- return false;
- if (auto *GFI = Builder.GFI)
- if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty))
- return *IsManaged;
- return true; // conservative
- };
-
auto requireSpillSlot = [&](const Value *V) {
- if (isGCValue(V))
+ if (!Builder.DAG.getTargetLoweringInfo().isTypeLegal(
+ Builder.getValue(V).getValueType()))
+ return true;
+ if (isGCValue(V, Builder))
return !LowerAsVReg.count(Builder.getValue(V));
return !(LiveInDeopt || UseRegistersForDeoptValues);
};
@@ -727,8 +731,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
NumOfStatepoints++;
// Clear state
StatepointLowering.startNewStatepoint(*this);
- assert(SI.Bases.size() == SI.Ptrs.size() &&
- SI.Ptrs.size() <= SI.GCRelocates.size());
+ assert(SI.Bases.size() == SI.Ptrs.size());
LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
#ifndef NDEBUG
@@ -1042,6 +1045,21 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
}
}
+ // If we find a deopt value which isn't explicitly added, we need to
+ // ensure it gets lowered such that gc cycles occurring before the
+ // deoptimization event during the lifetime of the call don't invalidate
+ // the pointer we're deopting with. Note that we assume that all
+ // pointers passed to deopt are base pointers; relaxing that assumption
+ // would require relatively large changes to how we represent relocations.
+ for (Value *V : I.deopt_operands()) {
+ if (!isGCValue(V, *this))
+ continue;
+ if (Seen.insert(getValue(V)).second) {
+ SI.Bases.push_back(V);
+ SI.Ptrs.push_back(V);
+ }
+ }
+
SI.GCArgs = ArrayRef<const Use>(I.gc_args_begin(), I.gc_args_end());
SI.StatepointInstr = &I;
SI.ID = I.getID();
@@ -1057,23 +1075,25 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
SDValue ReturnValue = LowerAsSTATEPOINT(SI);
// Export the result value if needed
- const GCResultInst *GCResult = I.getGCResult();
+ const std::pair<bool, bool> GCResultLocality = I.getGCResultLocality();
Type *RetTy = I.getActualReturnType();
- if (RetTy->isVoidTy() || !GCResult) {
+ if (RetTy->isVoidTy() ||
+ (!GCResultLocality.first && !GCResultLocality.second)) {
// The return value is not needed, just generate a poison value.
setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc()));
return;
}
- if (GCResult->getParent() == I.getParent()) {
+ if (GCResultLocality.first) {
// Result value will be used in a same basic block. Don't export it or
// perform any explicit register copies. The gc_result will simply grab
// this value.
setValue(&I, ReturnValue);
- return;
}
+ if (!GCResultLocality.second)
+ return;
// Result value will be used in a different basic block so we need to export
// it now. Default exporting mechanism will not work here because statepoint
// call has a different type than the actual call. It means that by default
@@ -1191,7 +1211,40 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
setValue(&Relocate, Relocation);
return;
}
-
+
+ if (Record.type == RecordType::Spill) {
+ unsigned Index = Record.payload.FI;
+ SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
+
+ // All the reloads are independent and are reading memory only modified by
+ // statepoints (i.e. no other aliasing stores); informing SelectionDAG of
+ // this this let's CSE kick in for free and allows reordering of
+ // instructions if possible. The lowering for statepoint sets the root,
+ // so this is ordering all reloads with the either
+ // a) the statepoint node itself, or
+ // b) the entry of the current block for an invoke statepoint.
+ const SDValue Chain = DAG.getRoot(); // != Builder.getRoot()
+
+ auto &MF = DAG.getMachineFunction();
+ auto &MFI = MF.getFrameInfo();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlign(Index));
+
+ auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ Relocate.getType());
+
+ SDValue SpillLoad =
+ DAG.getLoad(LoadVT, getCurSDLoc(), Chain, SpillSlot, LoadMMO);
+ PendingLoads.push_back(SpillLoad.getValue(1));
+
+ assert(SpillLoad.getNode());
+ setValue(&Relocate, SpillLoad);
+ return;
+ }
+
+ assert(Record.type == RecordType::NoRelocate);
SDValue SD = getValue(DerivedPtr);
if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) {
@@ -1201,43 +1254,9 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
return;
}
-
// We didn't need to spill these special cases (constants and allocas).
// See the handling in spillIncomingValueForStatepoint for detail.
- if (Record.type == RecordType::NoRelocate) {
- setValue(&Relocate, SD);
- return;
- }
-
- assert(Record.type == RecordType::Spill);
-
- unsigned Index = Record.payload.FI;;
- SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
-
- // All the reloads are independent and are reading memory only modified by
- // statepoints (i.e. no other aliasing stores); informing SelectionDAG of
- // this this let's CSE kick in for free and allows reordering of instructions
- // if possible. The lowering for statepoint sets the root, so this is
- // ordering all reloads with the either a) the statepoint node itself, or b)
- // the entry of the current block for an invoke statepoint.
- const SDValue Chain = DAG.getRoot(); // != Builder.getRoot()
-
- auto &MF = DAG.getMachineFunction();
- auto &MFI = MF.getFrameInfo();
- auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
- auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- MFI.getObjectSize(Index),
- MFI.getObjectAlign(Index));
-
- auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
- Relocate.getType());
-
- SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain,
- SpillSlot, LoadMMO);
- PendingLoads.push_back(SpillLoad.getValue(1));
-
- assert(SpillLoad.getNode());
- setValue(&Relocate, SpillLoad);
+ setValue(&Relocate, SD);
}
void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
index 634ef87f3840..addc0a7eef3a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -18,11 +18,11 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/IntrinsicInst.h"
#include <cassert>
namespace llvm {
-class CallInst;
class SelectionDAGBuilder;
/// This class tracks both per-statepoint and per-selectiondag information.
@@ -63,7 +63,7 @@ public:
/// Record the fact that we expect to encounter a given gc_relocate
/// before the next statepoint. If we don't see it, we'll report
/// an assertion.
- void scheduleRelocCall(const CallInst &RelocCall) {
+ void scheduleRelocCall(const GCRelocateInst &RelocCall) {
// We are not interested in lowering dead instructions.
if (!RelocCall.use_empty())
PendingGCRelocateCalls.push_back(&RelocCall);
@@ -72,7 +72,7 @@ public:
/// Remove this gc_relocate from the list we're expecting to see
/// before the next statepoint. If we weren't expecting to see
/// it, we'll report an assertion.
- void relocCallVisited(const CallInst &RelocCall) {
+ void relocCallVisited(const GCRelocateInst &RelocCall) {
// We are not interested in lowering dead instructions.
if (RelocCall.use_empty())
return;
@@ -118,7 +118,7 @@ private:
unsigned NextSlotToAllocate = 0;
/// Keep track of pending gcrelocate calls for consistency check
- SmallVector<const CallInst *, 10> PendingGCRelocateCalls;
+ SmallVector<const GCRelocateInst *, 10> PendingGCRelocateCalls;
};
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5760132e44a0..1c1dae8f953f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -53,22 +53,24 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
const Function &F = DAG.getMachineFunction().getFunction();
// First, check if tail calls have been disabled in this function.
- if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+ if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
return false;
// Conservatively require the attributes of the call to match those of
- // the return. Ignore NoAlias and NonNull because they don't affect the
+ // the return. Ignore following attributes because they don't affect the
// call sequence.
- AttributeList CallerAttrs = F.getAttributes();
- if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
- .removeAttribute(Attribute::NoAlias)
- .removeAttribute(Attribute::NonNull)
- .hasAttributes())
+ AttrBuilder CallerAttrs(F.getAttributes(), AttributeList::ReturnIndex);
+ for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
+ Attribute::DereferenceableOrNull, Attribute::NoAlias,
+ Attribute::NonNull})
+ CallerAttrs.removeAttribute(Attr);
+
+ if (CallerAttrs.hasAttributes())
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ if (CallerAttrs.contains(Attribute::ZExt) ||
+ CallerAttrs.contains(Attribute::SExt))
return false;
// Check if the only use is a function return node.
@@ -114,14 +116,21 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
+ IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
- Alignment = Call->getParamAlign(ArgIdx);
- ByValType = nullptr;
- if (IsByVal)
- ByValType = Call->getParamByValType(ArgIdx);
- PreallocatedType = nullptr;
+ Alignment = Call->getParamStackAlign(ArgIdx);
+ IndirectType = nullptr;
+ assert(IsByVal + IsPreallocated + IsInAlloca <= 1 &&
+ "multiple ABI attributes?");
+ if (IsByVal) {
+ IndirectType = Call->getParamByValType(ArgIdx);
+ if (!Alignment)
+ Alignment = Call->getParamAlign(ArgIdx);
+ }
if (IsPreallocated)
- PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
+ IndirectType = Call->getParamPreallocatedType(ArgIdx);
+ if (IsInAlloca)
+ IndirectType = Call->getParamInAllocaType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
@@ -194,9 +203,8 @@ bool TargetLowering::findOptimalMemOpLowering(
// equal to DstAlign (or zero).
VT = MVT::i64;
if (Op.isFixedDstAlign())
- while (
- Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
- !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value()))
+ while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
+ !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());
@@ -250,7 +258,7 @@ bool TargetLowering::findOptimalMemOpLowering(
bool Fast;
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(
- VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 1,
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast)
VTSize = Size;
@@ -502,7 +510,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
case ISD::AND:
case ISD::OR: {
auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!Op1C)
+ if (!Op1C || Op1C->isOpaque())
return false;
// If this is a 'not' op, don't touch it because that's a canonical form.
@@ -971,11 +979,12 @@ bool TargetLowering::SimplifyDemandedBits(
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
case ISD::LOAD: {
- LoadSDNode *LD = cast<LoadSDNode>(Op);
+ auto *LD = cast<LoadSDNode>(Op);
if (getTargetConstantFromLoad(LD)) {
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
- } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ }
+ if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
// If this is a ZEXTLoad and we are looking at the loaded value.
EVT MemVT = LD->getMemoryVT();
unsigned MemBits = MemVT.getScalarSizeInBits();
@@ -2012,7 +2021,7 @@ bool TargetLowering::SimplifyDemandedBits(
const APInt *ShAmtC =
TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
- if (!ShAmtC)
+ if (!ShAmtC || ShAmtC->uge(BitWidth))
break;
uint64_t ShVal = ShAmtC->getZExtValue();
@@ -2267,10 +2276,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
// Avoid folding to a constant if any OpaqueConstant is involved.
const SDNode *N = Op.getNode();
- for (SDNodeIterator I = SDNodeIterator::begin(N),
- E = SDNodeIterator::end(N);
- I != E; ++I) {
- SDNode *Op = *I;
+ for (SDNode *Op :
+ llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
if (C->isOpaque())
return false;
@@ -2417,6 +2424,27 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownUndef.setAllBits();
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
}
+ SDValue ScalarSrc = Op.getOperand(0);
+ if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue Src = ScalarSrc.getOperand(0);
+ SDValue Idx = ScalarSrc.getOperand(1);
+ EVT SrcVT = Src.getValueType();
+
+ ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
+
+ if (SrcEltCnt.isScalable())
+ return false;
+
+ unsigned NumSrcElts = SrcEltCnt.getFixedValue();
+ if (isNullConstant(Idx)) {
+ APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
+ APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
+ APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
+ if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+ }
+ }
KnownUndef.setHighBits(NumElts - 1);
break;
}
@@ -3028,6 +3056,19 @@ const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
return nullptr;
}
+bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ bool PoisonOnly, unsigned Depth) const {
+ assert(
+ (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN,
@@ -4502,40 +4543,39 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
GA->getValueType(0),
Offset + GA->getOffset()));
return;
- } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
- ConstraintLetter != 's') {
+ }
+ if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
// gcc prints these as sign extended. Sign extend value to 64 bits
// now; without this it would get ZExt'd later in
// ScheduleDAGSDNodes::EmitNode, which is very generic.
bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
BooleanContent BCont = getBooleanContents(MVT::i64);
- ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
- : ISD::SIGN_EXTEND;
- int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
- : C->getSExtValue();
- Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
- SDLoc(C), MVT::i64));
+ ISD::NodeType ExtOpc =
+ IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
+ int64_t ExtVal =
+ ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
+ Ops.push_back(
+ DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
return;
- } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
- ConstraintLetter != 'n') {
+ }
+ if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && ConstraintLetter != 'n') {
Ops.push_back(DAG.getTargetBlockAddress(
BA->getBlockAddress(), BA->getValueType(0),
Offset + BA->getOffset(), BA->getTargetFlags()));
return;
- } else {
- const unsigned OpCode = Op.getOpcode();
- if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
- if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
- Op = Op.getOperand(1);
- // Subtraction is not commutative.
- else if (OpCode == ISD::ADD &&
- (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
- Op = Op.getOperand(0);
- else
- return;
- Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
- continue;
- }
+ }
+ const unsigned OpCode = Op.getOpcode();
+ if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
+ if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
+ Op = Op.getOperand(1);
+ // Subtraction is not commutative.
+ else if (OpCode == ISD::ADD &&
+ (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
+ Op = Op.getOperand(0);
+ else
+ return;
+ Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
+ continue;
}
return;
}
@@ -4565,11 +4605,10 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
if (!isLegalRC(*RI, *RC))
continue;
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
- I != E; ++I) {
- if (RegName.equals_lower(RI->getRegAsmName(*I))) {
+ for (const MCPhysReg &PR : *RC) {
+ if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
std::pair<unsigned, const TargetRegisterClass *> S =
- std::make_pair(*I, RC);
+ std::make_pair(PR, RC);
// If this register class has the requested value type, return it,
// otherwise keep searching and return the first class found
@@ -5033,16 +5072,17 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
return SDValue();
SDValue Shift, Factor;
- if (VT.isFixedLengthVector()) {
+ if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
Shift = DAG.getBuildVector(ShVT, dl, Shifts);
Factor = DAG.getBuildVector(VT, dl, Factors);
- } else if (VT.isScalableVector()) {
+ } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
assert(Shifts.size() == 1 && Factors.size() == 1 &&
"Expected matchUnaryPredicate to return one element for scalable "
"vectors");
Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
Factor = DAG.getSplatVector(VT, dl, Factors[0]);
} else {
+ assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
Shift = Shifts[0];
Factor = Factors[0];
}
@@ -5084,11 +5124,25 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
EVT ShSVT = ShVT.getScalarType();
unsigned EltBits = VT.getScalarSizeInBits();
+ EVT MulVT;
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
- if (!isTypeLegal(VT))
- return SDValue();
+ if (!isTypeLegal(VT)) {
+ // Limit this to simple scalars for now.
+ if (VT.isVector() || !VT.isSimple())
+ return SDValue();
+
+ // If this type will be promoted to a large enough type with a legal
+ // multiply operation, we can go ahead and do this transform.
+ if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
+ return SDValue();
+
+ MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
+ if (MulVT.getSizeInBits() < (2 * EltBits) ||
+ !isOperationLegal(ISD::MUL, MulVT))
+ return SDValue();
+ }
// If the sdiv has an 'exact' bit we can use a simpler lowering.
if (N->getFlags().hasExact())
@@ -5134,12 +5188,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue MagicFactor, Factor, Shift, ShiftMask;
- if (VT.isFixedLengthVector()) {
+ if (N1.getOpcode() == ISD::BUILD_VECTOR) {
MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
Factor = DAG.getBuildVector(VT, dl, Factors);
Shift = DAG.getBuildVector(ShVT, dl, Shifts);
ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
- } else if (VT.isScalableVector()) {
+ } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
Shifts.size() == 1 && ShiftMasks.size() == 1 &&
"Expected matchUnaryPredicate to return one element for scalable "
@@ -5149,6 +5203,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
} else {
+ assert(isa<ConstantSDNode>(N1) && "Expected a constant");
MagicFactor = MagicFactors[0];
Factor = Factors[0];
Shift = Shifts[0];
@@ -5157,17 +5212,32 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
// Multiply the numerator (operand 0) by the magic value.
// FIXME: We should support doing a MUL in a wider type.
- SDValue Q;
- if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
- : isOperationLegalOrCustom(ISD::MULHS, VT))
- Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
- else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
- : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
- SDValue LoHi =
- DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
- Q = SDValue(LoHi.getNode(), 1);
- } else
- return SDValue(); // No mulhs or equivalent.
+ auto GetMULHS = [&](SDValue X, SDValue Y) {
+ // If the type isn't legal, use a wider mul of the the type calculated
+ // earlier.
+ if (!isTypeLegal(VT)) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
+ Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
+ Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
+ DAG.getShiftAmountConstant(EltBits, MulVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
+ }
+
+ if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
+ return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
+ if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
+ SDValue LoHi =
+ DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
+ return SDValue(LoHi.getNode(), 1);
+ }
+ return SDValue();
+ };
+
+ SDValue Q = GetMULHS(N0, MagicFactor);
+ if (!Q)
+ return SDValue();
+
Created.push_back(Q.getNode());
// (Optionally) Add/subtract the numerator using Factor.
@@ -5202,11 +5272,25 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
EVT ShSVT = ShVT.getScalarType();
unsigned EltBits = VT.getScalarSizeInBits();
+ EVT MulVT;
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
- if (!isTypeLegal(VT))
- return SDValue();
+ if (!isTypeLegal(VT)) {
+ // Limit this to simple scalars for now.
+ if (VT.isVector() || !VT.isSimple())
+ return SDValue();
+
+ // If this type will be promoted to a large enough type with a legal
+ // multiply operation, we can go ahead and do this transform.
+ if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
+ return SDValue();
+
+ MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
+ if (MulVT.getSizeInBits() < (2 * EltBits) ||
+ !isOperationLegal(ISD::MUL, MulVT))
+ return SDValue();
+ }
bool UseNPQ = false;
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
@@ -5216,7 +5300,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return false;
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
- APInt Divisor = C->getAPIntValue();
+ const APInt& Divisor = C->getAPIntValue();
APInt::mu magics = Divisor.magicu();
unsigned PreShift = 0, PostShift = 0;
@@ -5261,12 +5345,12 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue PreShift, PostShift, MagicFactor, NPQFactor;
- if (VT.isFixedLengthVector()) {
+ if (N1.getOpcode() == ISD::BUILD_VECTOR) {
PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
- } else if (VT.isScalableVector()) {
+ } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
NPQFactors.size() == 1 && PostShifts.size() == 1 &&
"Expected matchUnaryPredicate to return one for scalable vectors");
@@ -5275,6 +5359,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
} else {
+ assert(isa<ConstantSDNode>(N1) && "Expected a constant");
PreShift = PreShifts[0];
MagicFactor = MagicFactors[0];
PostShift = PostShifts[0];
@@ -5286,11 +5371,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
// FIXME: We should support doing a MUL in a wider type.
auto GetMULHU = [&](SDValue X, SDValue Y) {
- if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
- : isOperationLegalOrCustom(ISD::MULHU, VT))
+ // If the type isn't legal, use a wider mul of the the type calculated
+ // earlier.
+ if (!isTypeLegal(VT)) {
+ X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
+ Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
+ Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
+ Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
+ DAG.getShiftAmountConstant(EltBits, MulVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
+ }
+
+ if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
- if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
- : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
+ if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
SDValue LoHi =
DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
return SDValue(LoHi.getNode(), 1);
@@ -5398,11 +5492,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
EVT VT = REMNode.getValueType();
EVT SVT = VT.getScalarType();
- EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
EVT ShSVT = ShVT.getScalarType();
// If MUL is unavailable, we cannot proceed in any case.
- if (!isOperationLegalOrCustom(ISD::MUL, VT))
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
bool ComparingWithAllZeros = true;
@@ -5511,7 +5605,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
return SDValue();
SDValue PVal, KVal, QVal;
- if (VT.isVector()) {
+ if (D.getOpcode() == ISD::BUILD_VECTOR) {
if (HadTautologicalLanes) {
// Try to turn PAmts into a splat, since we don't care about the values
// that are currently '0'. If we can't, just keep '0'`s.
@@ -5525,6 +5619,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
PVal = DAG.getBuildVector(VT, DL, PAmts);
KVal = DAG.getBuildVector(ShVT, DL, KAmts);
QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
+ "Expected matchBinaryPredicate to return one element for "
+ "SPLAT_VECTORs");
+ PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
+ KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
+ QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
} else {
PVal = PAmts[0];
KVal = KAmts[0];
@@ -5532,7 +5633,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
}
if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
- if (!isOperationLegalOrCustom(ISD::SUB, VT))
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
return SDValue(); // FIXME: Could/should use `ISD::ADD`?
assert(CompTargetNode.getValueType() == N.getValueType() &&
"Expecting that the types on LHS and RHS of comparisons match.");
@@ -5547,12 +5648,10 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
// divisors as a performance improvement, since rotating by 0 is a no-op.
if (HadEvenDivisor) {
// We need ROTR to do this.
- if (!isOperationLegalOrCustom(ISD::ROTR, VT))
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
return SDValue();
- SDNodeFlags Flags;
- Flags.setExact(true);
// UREM: (rotr (mul N, P), K)
- Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
Created.push_back(Op0.getNode());
}
@@ -5577,6 +5676,8 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
Created.push_back(TautologicalInvertedChannels.getNode());
+ // NOTE: we avoid letting illegal types through even if we're before legalize
+ // ops – legalization has a hard time producing good code for this.
if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
// If we have a vector select, let's replace the comparison results in the
// affected lanes with the correct tautological result.
@@ -5587,6 +5688,8 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
}
// Else, we can just invert the comparison result in the appropriate lanes.
+ //
+ // NOTE: see the note above VSELECT above.
if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
TautologicalInvertedChannels);
@@ -5638,11 +5741,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
EVT VT = REMNode.getValueType();
EVT SVT = VT.getScalarType();
- EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
EVT ShSVT = ShVT.getScalarType();
- // If MUL is unavailable, we cannot proceed in any case.
- if (!isOperationLegalOrCustom(ISD::MUL, VT))
+ // If we are after ops legalization, and MUL is unavailable, we can not
+ // proceed.
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
// TODO: Could support comparing with non-zero too.
@@ -5755,7 +5859,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
return SDValue();
SDValue PVal, AVal, KVal, QVal;
- if (VT.isFixedLengthVector()) {
+ if (D.getOpcode() == ISD::BUILD_VECTOR) {
if (HadOneDivisor) {
// Try to turn PAmts into a splat, since we don't care about the values
// that are currently '0'. If we can't, just keep '0'`s.
@@ -5774,7 +5878,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
AVal = DAG.getBuildVector(VT, DL, AAmts);
KVal = DAG.getBuildVector(ShVT, DL, KAmts);
QVal = DAG.getBuildVector(VT, DL, QAmts);
- } else if (VT.isScalableVector()) {
+ } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
QAmts.size() == 1 &&
"Expected matchUnaryPredicate to return one element for scalable "
@@ -5784,6 +5888,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
} else {
+ assert(isa<ConstantSDNode>(D) && "Expected a constant");
PVal = PAmts[0];
AVal = AAmts[0];
KVal = KAmts[0];
@@ -5796,7 +5901,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
if (NeedToApplyOffset) {
// We need ADD to do this.
- if (!isOperationLegalOrCustom(ISD::ADD, VT))
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
return SDValue();
// (add (mul N, P), A)
@@ -5808,12 +5913,10 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// divisors as a performance improvement, since rotating by 0 is a no-op.
if (HadEvenDivisor) {
// We need ROTR to do this.
- if (!isOperationLegalOrCustom(ISD::ROTR, VT))
+ if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
return SDValue();
- SDNodeFlags Flags;
- Flags.setExact(true);
// SREM: (rotr (add (mul N, P), A), K)
- Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
Created.push_back(Op0.getNode());
}
@@ -5831,10 +5934,13 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// we must fix-up results for said lanes.
assert(VT.isVector() && "Can/should only get here for vectors.");
+ // NOTE: we avoid letting illegal types through even if we're before legalize
+ // ops – legalization has a hard time producing good code for the code that
+ // follows.
if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
!isOperationLegalOrCustom(ISD::AND, VT) ||
!isOperationLegalOrCustom(Cond, VT) ||
- !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
return SDValue();
Created.push_back(Fold.getNode());
@@ -5860,8 +5966,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
// constant-folded, select can get lowered to a shuffle with constant mask.
- SDValue Blended =
- DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
+ SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
+ MaskedIsZero, Fold);
return Blended;
}
@@ -5935,6 +6041,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
+ // Because getNegatedExpression can delete nodes we need a handle to keep
+ // temporary nodes alive in case the recursion manages to create an identical
+ // node.
+ std::list<HandleSDNode> Handles;
+
switch (Opcode) {
case ISD::ConstantFP: {
// Don't invert constant FP values after legalization unless the target says
@@ -6003,11 +6114,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // Prevent this node from being deleted by the next call.
+ if (NegX)
+ Handles.emplace_back(NegX);
+
// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+ // We're done with the handles.
+ Handles.clear();
+
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = CostX;
@@ -6052,11 +6170,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // Prevent this node from being deleted by the next call.
+ if (NegX)
+ Handles.emplace_back(NegX);
+
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+ // We're done with the handles.
+ Handles.clear();
+
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = CostX;
@@ -6094,15 +6219,25 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
if (!NegZ)
break;
+ // Prevent this node from being deleted by the next two calls.
+ Handles.emplace_back(NegZ);
+
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // Prevent this node from being deleted by the next call.
+ if (NegX)
+ Handles.emplace_back(NegX);
+
// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+ // We're done with the handles.
+ Handles.clear();
+
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = std::min(CostX, CostZ);
@@ -6493,6 +6628,58 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
return true;
}
+void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
+ SelectionDAG &DAG) const {
+ assert(Node->getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Node->getValueType(0);
+ unsigned VTBits = VT.getScalarSizeInBits();
+ assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
+
+ bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
+ bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
+ SDValue ShOpLo = Node->getOperand(0);
+ SDValue ShOpHi = Node->getOperand(1);
+ SDValue ShAmt = Node->getOperand(2);
+ EVT ShAmtVT = ShAmt.getValueType();
+ EVT ShAmtCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
+ SDLoc dl(Node);
+
+ // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
+ // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
+ // away during isel.
+ SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
+ DAG.getConstant(VTBits - 1, dl, ShAmtVT));
+ SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, dl, ShAmtVT))
+ : DAG.getConstant(0, dl, VT);
+
+ SDValue Tmp2, Tmp3;
+ if (IsSHL) {
+ Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
+ } else {
+ Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
+ }
+
+ // If the shift amount is larger or equal than the width of a part we don't
+ // use the result from the FSHL/FSHR. Insert a test and select the appropriate
+ // values for large shift amounts.
+ SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
+ DAG.getConstant(VTBits, dl, ShAmtVT));
+ SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
+ DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
+
+ if (IsSHL) {
+ Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
+ Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
+ } else {
+ Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
+ Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
+ }
+}
+
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
@@ -6514,7 +6701,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
// Expand f32 -> i64 conversion
// This algorithm comes from compiler-rt's implementation of fixsfdi:
- // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
+ // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
EVT IntVT = SrcVT.changeTypeToInteger();
EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
@@ -7006,6 +7193,129 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
return true;
}
+SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+
+ if (!VT.isSimple())
+ return SDValue();
+
+ EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().getScalarType().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::i16:
+ // Use a rotate by 8. This can be further expanded if necessary.
+ return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+ DAG.getConstant(0xFF0000, dl, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
+ DAG.getConstant(255ULL<<48, dl, VT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
+ DAG.getConstant(255ULL<<40, dl, VT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
+ DAG.getConstant(255ULL<<32, dl, VT));
+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
+ DAG.getConstant(255ULL<<24, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+ DAG.getConstant(255ULL<<16, dl, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
+ DAG.getConstant(255ULL<<8 , dl, VT));
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+ }
+}
+
+SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ SDValue Tmp, Tmp2, Tmp3;
+
+ // If we can, perform BSWAP first and then the mask+swap the i4, then i2
+ // and finally the i1 pairs.
+ // TODO: We can easily support i4/i2 legal types if any target ever does.
+ if (Sz >= 8 && isPowerOf2_32(Sz)) {
+ // Create the masks - repeating the pattern every byte.
+ APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
+ APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
+ APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
+ APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
+
+ // BSWAP if the type is wider than a single byte.
+ Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
+
+ // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+ // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+ // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ return Tmp;
+ }
+
+ Tmp = DAG.getConstant(0, dl, VT);
+ for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
+ if (I < J)
+ Tmp2 =
+ DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
+ else
+ Tmp2 =
+ DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
+
+ APInt Shift(Sz, 1);
+ Shift <<= J;
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
+ }
+
+ return Tmp;
+}
+
std::pair<SDValue, SDValue>
TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SelectionDAG &DAG) const {
@@ -7490,39 +7800,51 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
}
-static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
- SDValue Idx,
- EVT VecVT,
- const SDLoc &dl) {
+static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
+ EVT VecVT, const SDLoc &dl,
+ unsigned NumSubElts) {
if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
return Idx;
EVT IdxVT = Idx.getValueType();
unsigned NElts = VecVT.getVectorMinNumElements();
if (VecVT.isScalableVector()) {
- SDValue VS = DAG.getVScale(dl, IdxVT,
- APInt(IdxVT.getFixedSizeInBits(),
- NElts));
- SDValue Sub = DAG.getNode(ISD::SUB, dl, IdxVT, VS,
- DAG.getConstant(1, dl, IdxVT));
-
+ // If this is a constant index and we know the value plus the number of the
+ // elements in the subvector minus one is less than the minimum number of
+ // elements then it's safe to return Idx.
+ if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
+ if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
+ return Idx;
+ SDValue VS =
+ DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
+ unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
+ SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
+ DAG.getConstant(NumSubElts, dl, IdxVT));
return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
- } else {
- if (isPowerOf2_32(NElts)) {
- APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
- Log2_32(NElts));
- return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
- DAG.getConstant(Imm, dl, IdxVT));
- }
}
-
+ if (isPowerOf2_32(NElts) && NumSubElts == 1) {
+ APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
+ return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
+ DAG.getConstant(Imm, dl, IdxVT));
+ }
+ unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
- DAG.getConstant(NElts - 1, dl, IdxVT));
+ DAG.getConstant(MaxIndex, dl, IdxVT));
}
SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
SDValue VecPtr, EVT VecVT,
SDValue Index) const {
+ return getVectorSubVecPointer(
+ DAG, VecPtr, VecVT,
+ EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
+ Index);
+}
+
+SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
+ SDValue VecPtr, EVT VecVT,
+ EVT SubVecVT,
+ SDValue Index) const {
SDLoc dl(Index);
// Make sure the index type is big enough to compute in.
Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
@@ -7534,7 +7856,13 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
"Converting bits to bytes lost precision");
- Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
+ // Scalable vectors don't need clamping as these are checked at compile time
+ if (SubVecVT.isFixedLengthVector()) {
+ assert(SubVecVT.getVectorElementType() == EltVT &&
+ "Sub-vector must be a fixed vector with matching element type");
+ Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
+ SubVecVT.getVectorNumElements());
+ }
EVT IdxVT = Index.getValueType();
@@ -7717,8 +8045,7 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
unsigned BitWidth = LHS.getScalarValueSizeInBits();
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
- LHS, RHS);
+ SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue SumDiff = Result.getValue(0);
SDValue Overflow = Result.getValue(1);
SDValue Zero = DAG.getConstant(0, dl, VT);
@@ -7732,7 +8059,9 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
}
// Overflow ? 0xffff.... : (LHS + RHS)
return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
- } else if (Opcode == ISD::USUBSAT) {
+ }
+
+ if (Opcode == ISD::USUBSAT) {
if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
// (LHS - RHS) & ~OverflowMask
SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
@@ -7741,17 +8070,17 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
}
// Overflow ? 0 : (LHS - RHS)
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
- } else {
- // SatMax -> Overflow && SumDiff < 0
- // SatMin -> Overflow && SumDiff >= 0
- APInt MinVal = APInt::getSignedMinValue(BitWidth);
- APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
- SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
- return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
+
+ // SatMax -> Overflow && SumDiff < 0
+ // SatMin -> Overflow && SumDiff >= 0
+ APInt MinVal = APInt::getSignedMinValue(BitWidth);
+ APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
+ return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
@@ -8309,7 +8638,8 @@ bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
SDVTList VTs = DAG.getVTList(VT, VT);
Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
return true;
- } else if (isOperationLegalOrCustom(DivOpc, VT)) {
+ }
+ if (isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
@@ -8329,7 +8659,8 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
- unsigned SatWidth = Node->getConstantOperandVal(1);
+ EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ unsigned SatWidth = SatVT.getScalarSizeInBits();
unsigned DstWidth = DstVT.getScalarSizeInBits();
assert(SatWidth <= DstWidth &&
"Expected saturation width smaller than result width");
@@ -8420,3 +8751,210 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
}
+
+SDValue TargetLowering::expandVectorSplice(SDNode *Node,
+ SelectionDAG &DAG) const {
+ assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
+ assert(Node->getValueType(0).isScalableVector() &&
+ "Fixed length vector types expected to use SHUFFLE_VECTOR!");
+
+ EVT VT = Node->getValueType(0);
+ SDValue V1 = Node->getOperand(0);
+ SDValue V2 = Node->getOperand(1);
+ int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
+ SDLoc DL(Node);
+
+ // Expand through memory thusly:
+ // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
+ // Store V1, Ptr
+ // Store V2, Ptr + sizeof(V1)
+ // If (Imm < 0)
+ // TrailingElts = -Imm
+ // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
+ // else
+ // Ptr = Ptr + (Imm * sizeof(VT.Elt))
+ // Res = Load Ptr
+
+ Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
+
+ EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorElementCount() * 2);
+ SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
+ EVT PtrVT = StackPtr.getValueType();
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ // Store the lo part of CONCAT_VECTORS(V1, V2)
+ SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
+ // Store the hi part of CONCAT_VECTORS(V1, V2)
+ SDValue OffsetToV2 = DAG.getVScale(
+ DL, PtrVT,
+ APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
+ SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
+
+ if (Imm >= 0) {
+ // Load back the required element. getVectorElementPointer takes care of
+ // clamping the index if it's out-of-bounds.
+ StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
+ // Load the spliced result
+ return DAG.getLoad(VT, DL, StoreV2, StackPtr,
+ MachinePointerInfo::getUnknownStack(MF));
+ }
+
+ uint64_t TrailingElts = -Imm;
+
+ // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
+ TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
+ SDValue TrailingBytes =
+ DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
+
+ if (TrailingElts > VT.getVectorMinNumElements()) {
+ SDValue VLBytes = DAG.getVScale(
+ DL, PtrVT,
+ APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
+ TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
+ }
+
+ // Calculate the start address of the spliced result.
+ StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
+
+ // Load the spliced result
+ return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
+ MachinePointerInfo::getUnknownStack(MF));
+}
+
+bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
+ SDValue &LHS, SDValue &RHS,
+ SDValue &CC, bool &NeedInvert,
+ const SDLoc &dl, SDValue &Chain,
+ bool IsSignaling) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT OpVT = LHS.getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ NeedInvert = false;
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default:
+ llvm_unreachable("Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(InvCC);
+ return true;
+ }
+ // Swapping operands didn't work. Try inverting the condition.
+ bool NeedSwap = false;
+ InvCC = getSetCCInverse(CCCode, OpVT);
+ if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ // If inverting the condition is not enough, try swapping operands
+ // on top of it.
+ InvCC = ISD::getSetCCSwappedOperands(InvCC);
+ NeedSwap = true;
+ }
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ CC = DAG.getCondCode(InvCC);
+ NeedInvert = true;
+ if (NeedSwap)
+ std::swap(LHS, RHS);
+ return true;
+ }
+
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default:
+ llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETUO:
+ if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
+ CC1 = ISD::SETUNE;
+ CC2 = ISD::SETUNE;
+ Opc = ISD::OR;
+ break;
+ }
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
+ "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
+ NeedInvert = true;
+ LLVM_FALLTHROUGH;
+ case ISD::SETO:
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
+ "If SETO is expanded, SETOEQ must be legal!");
+ CC1 = ISD::SETOEQ;
+ CC2 = ISD::SETOEQ;
+ Opc = ISD::AND;
+ break;
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ // If the SETUO or SETO CC isn't legal, we might be able to use
+ // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
+ // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
+ // the operands.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ if (!TLI.isCondCodeLegal(CC2, OpVT) &&
+ (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
+ TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
+ CC1 = ISD::SETOGT;
+ CC2 = ISD::SETOLT;
+ Opc = ISD::OR;
+ NeedInvert = ((unsigned)CCCode & 0x8U);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETUNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If we are floating point, assign and break, otherwise fall through.
+ if (!OpVT.isInteger()) {
+ // We can use the 4th bit to tell if we are the unordered
+ // or ordered version of the opcode.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+ CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+ break;
+ }
+ // Fallthrough if we are unsigned integer.
+ LLVM_FALLTHROUGH;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETLT:
+ case ISD::SETNE:
+ case ISD::SETEQ:
+ // If all combinations of inverting the condition and swapping operands
+ // didn't work then we have no means to expand the condition.
+ llvm_unreachable("Don't know how to expand this condition!");
+ }
+
+ SDValue SetCC1, SetCC2;
+ if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+ // If we aren't the ordered or unorder operation,
+ // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
+ } else {
+ // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
+ }
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
+ SetCC2.getValue(1));
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ RHS = SDValue();
+ CC = SDValue();
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 45427dc41e6e..86b559fd6413 100644
--- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -17,11 +17,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -67,6 +69,7 @@ public:
ShadowStackGCLowering();
bool doInitialization(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnFunction(Function &F) override;
private:
@@ -86,10 +89,12 @@ private:
} // end anonymous namespace
char ShadowStackGCLowering::ID = 0;
+char &llvm::ShadowStackGCLoweringID = ShadowStackGCLowering::ID;
INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE,
"Shadow Stack GC Lowering", false, false)
INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE,
"Shadow Stack GC Lowering", false, false)
@@ -234,8 +239,8 @@ void ShadowStackGCLowering::CollectRoots(Function &F) {
SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ for (BasicBlock &BB : F)
+ for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;)
if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
if (Function *F = CI->getCalledFunction())
if (F->getIntrinsicID() == Intrinsic::gcroot) {
@@ -280,6 +285,10 @@ GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
return dyn_cast<GetElementPtrInst>(Val);
}
+void ShadowStackGCLowering::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
/// runOnFunction - Insert code to maintain the shadow stack.
bool ShadowStackGCLowering::runOnFunction(Function &F) {
// Quick exit for functions that do not use the shadow stack GC.
@@ -297,6 +306,10 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
if (Roots.empty())
return false;
+ Optional<DomTreeUpdater> DTU;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy);
+
// Build the constant map and figure the type of the shadow stack entry.
Value *FrameMap = GetFrameMap(F);
Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
@@ -348,7 +361,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
AtEntry.CreateStore(NewHeadVal, Head);
// For each instruction that escapes...
- EscapeEnumerator EE(F, "gc_cleanup");
+ EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true,
+ DTU.hasValue() ? DTU.getPointer() : nullptr);
while (IRBuilder<> *AtExit = EE.Next()) {
// Pop the entry from the shadow stack. Don't reuse CurrentHead from
// AtEntry, since that would make the value live for the entire function.
diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index d2fd4a6d8fd9..8211e3d6a9dd 100644
--- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -472,8 +472,12 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
// Finally, for any returns from this function, if this function contains an
// invoke, add a call to unregister the function context.
- for (ReturnInst *Return : Returns)
- CallInst::Create(UnregisterFn, FuncCtx, "", Return);
+ for (ReturnInst *Return : Returns) {
+ Instruction *InsertPoint = Return;
+ if (CallInst *CI = Return->getParent()->getTerminatingMustTailCall())
+ InsertPoint = CI;
+ CallInst::Create(UnregisterFn, FuncCtx, "", InsertPoint);
+ }
return true;
}
diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp
index d2bfdc663edb..c933031ef37d 100644
--- a/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -83,7 +83,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block);
for (MachineInstr &MI : MBB) {
- if (MI.isDebugInstr())
+ if (MI.isDebugOrPseudoInstr())
continue;
// Insert a store index for the instr.
@@ -241,19 +241,18 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
- if (!MI.isDebugInstr() && mi2iMap.find(&MI) == mi2iMap.end())
+ if (!MI.isDebugOrPseudoInstr() && mi2iMap.find(&MI) == mi2iMap.end())
insertMachineInstrInMaps(MI);
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SlotIndexes::dump() const {
- for (IndexList::const_iterator itr = indexList.begin();
- itr != indexList.end(); ++itr) {
- dbgs() << itr->getIndex() << " ";
+ for (const IndexListEntry &ILE : indexList) {
+ dbgs() << ILE.getIndex() << " ";
- if (itr->getInstr()) {
- dbgs() << *itr->getInstr();
+ if (ILE.getInstr()) {
+ dbgs() << *ILE.getInstr();
} else {
dbgs() << "\n";
}
@@ -280,4 +279,3 @@ LLVM_DUMP_METHOD void SlotIndex::dump() const {
dbgs() << "\n";
}
#endif
-
diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp
index 4bb50a285497..91da5e49713c 100644
--- a/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -121,9 +121,9 @@ struct SpillPlacement::Node {
SumLinkWeights += w;
// There can be multiple links to the same bundle, add them up.
- for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
- if (I->second == b) {
- I->first += w;
+ for (std::pair<BlockFrequency, unsigned> &L : Links)
+ if (L.second == b) {
+ L.first += w;
return;
}
// This must be the first link to b.
@@ -153,11 +153,11 @@ struct SpillPlacement::Node {
// Compute the weighted sum of inputs.
BlockFrequency SumN = BiasN;
BlockFrequency SumP = BiasP;
- for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) {
- if (nodes[I->second].Value == -1)
- SumN += I->first;
- else if (nodes[I->second].Value == 1)
- SumP += I->first;
+ for (std::pair<BlockFrequency, unsigned> &L : Links) {
+ if (nodes[L.second].Value == -1)
+ SumN += L.first;
+ else if (nodes[L.second].Value == 1)
+ SumP += L.first;
}
// Each weighted sum is going to be less than the total frequency of the
@@ -258,35 +258,33 @@ void SpillPlacement::setThreshold(const BlockFrequency &Entry) {
/// addConstraints - Compute node biases and weights from a set of constraints.
/// Set a bit in NodeMask for each active node.
void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
- for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(),
- E = LiveBlocks.end(); I != E; ++I) {
- BlockFrequency Freq = BlockFrequencies[I->Number];
+ for (const BlockConstraint &LB : LiveBlocks) {
+ BlockFrequency Freq = BlockFrequencies[LB.Number];
// Live-in to block?
- if (I->Entry != DontCare) {
- unsigned ib = bundles->getBundle(I->Number, false);
+ if (LB.Entry != DontCare) {
+ unsigned ib = bundles->getBundle(LB.Number, false);
activate(ib);
- nodes[ib].addBias(Freq, I->Entry);
+ nodes[ib].addBias(Freq, LB.Entry);
}
// Live-out from block?
- if (I->Exit != DontCare) {
- unsigned ob = bundles->getBundle(I->Number, true);
+ if (LB.Exit != DontCare) {
+ unsigned ob = bundles->getBundle(LB.Number, true);
activate(ob);
- nodes[ob].addBias(Freq, I->Exit);
+ nodes[ob].addBias(Freq, LB.Exit);
}
}
}
/// addPrefSpill - Same as addConstraints(PrefSpill)
void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
- for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
- I != E; ++I) {
- BlockFrequency Freq = BlockFrequencies[*I];
+ for (unsigned B : Blocks) {
+ BlockFrequency Freq = BlockFrequencies[B];
if (Strong)
Freq += Freq;
- unsigned ib = bundles->getBundle(*I, false);
- unsigned ob = bundles->getBundle(*I, true);
+ unsigned ib = bundles->getBundle(B, false);
+ unsigned ob = bundles->getBundle(B, true);
activate(ib);
activate(ob);
nodes[ib].addBias(Freq, PrefSpill);
@@ -295,9 +293,7 @@ void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
}
void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
- for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
- ++I) {
- unsigned Number = *I;
+ for (unsigned Number : Links) {
unsigned ib = bundles->getBundle(Number, false);
unsigned ob = bundles->getBundle(Number, true);
@@ -377,3 +373,26 @@ SpillPlacement::finish() {
ActiveNodes = nullptr;
return Perfect;
}
+
+void SpillPlacement::BlockConstraint::print(raw_ostream &OS) const {
+ auto toString = [](BorderConstraint C) -> StringRef {
+ switch(C) {
+ case DontCare: return "DontCare";
+ case PrefReg: return "PrefReg";
+ case PrefSpill: return "PrefSpill";
+ case PrefBoth: return "PrefBoth";
+ case MustSpill: return "MustSpill";
+ };
+ llvm_unreachable("uncovered switch");
+ };
+
+ dbgs() << "{" << Number << ", "
+ << toString(Entry) << ", "
+ << toString(Exit) << ", "
+ << (ChangesValue ? "changes" : "no change") << "}";
+}
+
+void SpillPlacement::BlockConstraint::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
diff --git a/llvm/lib/CodeGen/SpillPlacement.h b/llvm/lib/CodeGen/SpillPlacement.h
index aa0e07ef92e3..d2273a163025 100644
--- a/llvm/lib/CodeGen/SpillPlacement.h
+++ b/llvm/lib/CodeGen/SpillPlacement.h
@@ -95,6 +95,9 @@ public:
/// the block has a non-PHI def. When this is false, a live-in value on
/// the stack can be live-out on the stack without inserting a spill.
bool ChangesValue;
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
};
/// prepare - Reset state and prepare for a new spill placement computation.
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index a6a3149ae25b..c70620fd7532 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -94,10 +94,10 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
// instructions in the block.
if (ExceptionalSuccessors.empty())
return LIP.first;
- for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
- if ((EHPadSuccessor && I->isCall()) ||
- I->getOpcode() == TargetOpcode::INLINEASM_BR) {
- LIP.second = LIS.getInstructionIndex(*I);
+ for (const MachineInstr &MI : llvm::reverse(MBB)) {
+ if ((EHPadSuccessor && MI.isCall()) ||
+ MI.getOpcode() == TargetOpcode::INLINEASM_BR) {
+ LIP.second = LIS.getInstructionIndex(MI);
break;
}
}
@@ -118,6 +118,13 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
if (!VNI)
return LIP.first;
+ // The def of statepoint instruction is a gc relocation and it should be alive
+ // in landing pad. So we cannot split interval after statepoint instruction.
+ if (SlotIndex::isSameInstr(VNI->def, LIP.second))
+ if (auto *I = LIS.getInstructionFromIndex(LIP.second))
+ if (I->getOpcode() == TargetOpcode::STATEPOINT)
+ return LIP.second;
+
// If the value leaving MBB was defined after the call in MBB, it can't
// really be live-in to the landing pad. This can happen if the landing pad
// has a PHI, and this register is undef on the exceptional edge.
@@ -357,15 +364,15 @@ void SplitAnalysis::analyze(const LiveInterval *li) {
//===----------------------------------------------------------------------===//
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa,
- LiveIntervals &lis, VirtRegMap &vrm,
- MachineDominatorTree &mdt,
- MachineBlockFrequencyInfo &mbfi)
- : SA(sa), AA(aa), LIS(lis), VRM(vrm),
- MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt),
- TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()),
- TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()),
- MBFI(mbfi), RegAssign(Allocator) {}
+SplitEditor::SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA,
+ LiveIntervals &LIS, VirtRegMap &VRM,
+ MachineDominatorTree &MDT,
+ MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI)
+ : SA(SA), AA(AA), LIS(LIS), VRM(VRM),
+ MRI(VRM.getMachineFunction().getRegInfo()), MDT(MDT),
+ TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()),
+ TRI(*VRM.getMachineFunction().getSubtarget().getRegisterInfo()),
+ MBFI(MBFI), VRAI(VRAI), RegAssign(Allocator) {}
void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
Edit = &LRE;
@@ -557,71 +564,19 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
// First pass: Try to find a perfectly matching subregister index. If none
// exists find the one covering the most lanemask bits.
- SmallVector<unsigned, 8> PossibleIndexes;
- unsigned BestIdx = 0;
- unsigned BestCover = 0;
const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
- for (unsigned Idx = 1, E = TRI.getNumSubRegIndices(); Idx < E; ++Idx) {
- // Is this index even compatible with the given class?
- if (TRI.getSubClassWithSubReg(RC, Idx) != RC)
- continue;
- LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
- // Early exit if we found a perfect match.
- if (SubRegMask == LaneMask) {
- BestIdx = Idx;
- break;
- }
- // The index must not cover any lanes outside \p LaneMask.
- if ((SubRegMask & ~LaneMask).any())
- continue;
-
- unsigned PopCount = SubRegMask.getNumLanes();
- PossibleIndexes.push_back(Idx);
- if (PopCount > BestCover) {
- BestCover = PopCount;
- BestIdx = Idx;
- }
- }
+ SmallVector<unsigned, 8> Indexes;
// Abort if we cannot possibly implement the COPY with the given indexes.
- if (BestIdx == 0)
+ if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, Indexes))
report_fatal_error("Impossible to implement partial COPY");
- SlotIndex Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore,
- BestIdx, DestLI, Late, SlotIndex());
-
- // Greedy heuristic: Keep iterating keeping the best covering subreg index
- // each time.
- LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx));
- while (LanesLeft.any()) {
- unsigned BestIdx = 0;
- int BestCover = std::numeric_limits<int>::min();
- for (unsigned Idx : PossibleIndexes) {
- LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
- // Early exit if we found a perfect match.
- if (SubRegMask == LanesLeft) {
- BestIdx = Idx;
- break;
- }
-
- // Try to cover as much of the remaining lanes as possible but
- // as few of the already covered lanes as possible.
- int Cover = (SubRegMask & LanesLeft).getNumLanes()
- - (SubRegMask & ~LanesLeft).getNumLanes();
- if (Cover > BestCover) {
- BestCover = Cover;
- BestIdx = Idx;
- }
- }
-
- if (BestIdx == 0)
- report_fatal_error("Impossible to implement partial COPY");
-
- buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
- DestLI, Late, Def);
- LanesLeft &= ~TRI.getSubRegIndexLaneMask(BestIdx);
+ SlotIndex Def;
+ for (unsigned BestIdx : Indexes) {
+ Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
+ DestLI, Late, Def);
}
return Def;
@@ -747,6 +702,23 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << ": not live\n");
return End;
}
+ SlotIndex LSP = SA.getLastSplitPoint(&MBB);
+ if (LSP < Last) {
+ // It could be that the use after LSP is a def, and thus the ParentVNI
+ // just selected starts at that def. For this case to exist, the def
+ // must be part of a tied def/use pair (as otherwise we'd have split
+ // distinct live ranges into individual live intervals), and thus we
+ // can insert the def into the VNI of the use and the tied def/use
+ // pair can live in the resulting interval.
+ Last = LSP;
+ ParentVNI = Edit->getParent().getVNInfoAt(Last);
+ if (!ParentVNI) {
+ // undef use --> undef tied def
+ LLVM_DEBUG(dbgs() << ": tied use not live\n");
+ return End;
+ }
+ }
+
LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id);
VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
SA.getLastSplitPointIter(&MBB));
@@ -836,6 +808,12 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
return VNI->def;
}
+static bool hasTiedUseOf(MachineInstr &MI, unsigned Reg) {
+ return any_of(MI.defs(), [Reg](const MachineOperand &MO) {
+ return MO.isReg() && MO.isTied() && MO.getReg() == Reg;
+ });
+}
+
void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
assert(OpenIdx && "openIntv not called before overlapIntv");
const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
@@ -847,6 +825,16 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
// The complement interval will be extended as needed by LICalc.extend().
if (ParentVNI)
forceRecompute(0, *ParentVNI);
+
+ // If the last use is tied to a def, we can't mark it as live for the
+ // interval which includes only the use. That would cause the tied pair
+ // to end up in two different intervals.
+ if (auto *MI = LIS.getInstructionFromIndex(End))
+ if (hasTiedUseOf(*MI, Edit->getReg())) {
+ LLVM_DEBUG(dbgs() << "skip overlap due to tied def at end\n");
+ return;
+ }
+
LLVM_DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
RegAssign.insert(Start, End, OpenIdx);
LLVM_DEBUG(dump());
@@ -862,8 +850,8 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
RegAssignMap::iterator AssignI;
AssignI.setMap(RegAssign);
- for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
- SlotIndex Def = Copies[i]->def;
+ for (const VNInfo *C : Copies) {
+ SlotIndex Def = C->def;
MachineInstr *MI = LIS.getInstructionFromIndex(Def);
assert(MI && "No instruction for back-copy");
@@ -871,7 +859,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
MachineBasicBlock::iterator MBBI(MI);
bool AtBegin;
do AtBegin = MBBI == MBB->begin();
- while (!AtBegin && (--MBBI)->isDebugInstr());
+ while (!AtBegin && (--MBBI)->isDebugOrPseudoInstr());
LLVM_DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
LIS.removeVRegDefAt(*LI, Def);
@@ -887,12 +875,18 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
if (AssignI.stop() != Def)
continue;
unsigned RegIdx = AssignI.value();
- if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) {
+ // We could hoist back-copy right after another back-copy. As a result
+ // MMBI points to copy instruction which is actually dead now.
+ // We cannot set its stop to MBBI which will be the same as start and
+ // interval does not support that.
+ SlotIndex Kill =
+ AtBegin ? SlotIndex() : LIS.getInstructionIndex(*MBBI).getRegSlot();
+ if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg()) ||
+ Kill <= AssignI.start()) {
LLVM_DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx
<< '\n');
forceRecompute(RegIdx, *Edit->getParent().getVNInfoAt(Def));
} else {
- SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot();
LLVM_DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
AssignI.setStop(Kill);
}
@@ -1098,10 +1092,13 @@ void SplitEditor::hoistCopies() {
NotToHoistSet.insert(ParentVNI->id);
continue;
}
- SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
- Dom.second =
- defFromParent(0, ParentVNI, Last, *Dom.first,
- SA.getLastSplitPointIter(Dom.first))->def;
+ SlotIndex LSP = SA.getLastSplitPoint(Dom.first);
+ if (LSP <= ParentVNI->def) {
+ NotToHoistSet.insert(ParentVNI->id);
+ continue;
+ }
+ Dom.second = defFromParent(0, ParentVNI, LSP, *Dom.first,
+ SA.getLastSplitPointIter(Dom.first))->def;
}
// Remove redundant back-copies that are now known to be dominated by another
@@ -1322,11 +1319,9 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
SmallVector<ExtPoint,4> ExtPoints;
- for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
- RE = MRI.reg_end(); RI != RE;) {
- MachineOperand &MO = *RI;
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI.reg_operands(Edit->getReg()))) {
MachineInstr *MI = MO.getParent();
- ++RI;
// LiveDebugVariables should have handled all DBG_VALUE instructions.
if (MI->isDebugValue()) {
LLVM_DEBUG(dbgs() << "Zapping " << *MI);
@@ -1416,8 +1411,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
void SplitEditor::deleteRematVictims() {
SmallVector<MachineInstr*, 8> Dead;
- for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
- LiveInterval *LI = &LIS.getInterval(*I);
+ for (const Register &R : *Edit) {
+ LiveInterval *LI = &LIS.getInterval(R);
for (const LiveRange::Segment &S : LI->segments) {
// Dead defs end at the dead slot.
if (S.end != S.valno->def.getDeadSlot())
@@ -1554,7 +1549,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
}
// Calculate spill weight and allocation hints for new intervals.
- Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops, MBFI);
+ Edit->calculateRegClassAndHint(VRM.getMachineFunction(), VRAI);
assert(!LRMap || LRMap->size() == Edit->size());
}
@@ -1583,7 +1578,7 @@ bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI,
void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
openIntv();
- SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
+ SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB);
SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr,
LastSplitPoint));
if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) {
@@ -1737,7 +1732,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
return;
}
- SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB);
if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) {
//
@@ -1814,7 +1809,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
<< ", enter after " << EnterAfter
<< (BI.LiveIn ? ", stack-in" : ", defined in block"));
- SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB);
assert(IntvOut && "Must have register out");
assert(BI.LiveOut && "Must be live-out");
@@ -1864,3 +1859,16 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr));
useIntv(From, Idx);
}
+
+void SplitAnalysis::BlockInfo::print(raw_ostream &OS) const {
+ OS << "{" << printMBBReference(*MBB) << ", "
+ << "uses " << FirstInstr << " to " << LastInstr << ", "
+ << "1st def " << FirstDef << ", "
+ << (LiveIn ? "live in" : "dead in") << ", "
+ << (LiveOut ? "live out" : "dead out") << "}";
+}
+
+void SplitAnalysis::BlockInfo::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h
index a94518f5a4fc..fbcffacb49ab 100644
--- a/llvm/lib/CodeGen/SplitKit.h
+++ b/llvm/lib/CodeGen/SplitKit.h
@@ -44,6 +44,7 @@ class MachineRegisterInfo;
class TargetInstrInfo;
class TargetRegisterInfo;
class VirtRegMap;
+class VirtRegAuxInfo;
/// Determines the latest safe point in a block in which we can insert a split,
/// spill or other instruction related with CurLI.
@@ -131,6 +132,9 @@ public:
bool isOneInstr() const {
return SlotIndex::isSameInstr(FirstInstr, LastInstr);
}
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
};
private:
@@ -235,6 +239,10 @@ public:
return IPA.getLastInsertPoint(*CurLI, *MF.getBlockNumbered(Num));
}
+ SlotIndex getLastSplitPoint(MachineBasicBlock *BB) {
+ return IPA.getLastInsertPoint(*CurLI, *BB);
+ }
+
MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) {
return IPA.getLastInsertPointIter(*CurLI, *BB);
}
@@ -265,6 +273,7 @@ class LLVM_LIBRARY_VISIBILITY SplitEditor {
const TargetInstrInfo &TII;
const TargetRegisterInfo &TRI;
const MachineBlockFrequencyInfo &MBFI;
+ VirtRegAuxInfo &VRAI;
public:
/// ComplementSpillMode - Select how the complement live range should be
@@ -450,9 +459,9 @@ private:
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
- SplitEditor(SplitAnalysis &sa, AAResults &aa, LiveIntervals &lis,
- VirtRegMap &vrm, MachineDominatorTree &mdt,
- MachineBlockFrequencyInfo &mbfi);
+ SplitEditor(SplitAnalysis &SA, AAResults &AA, LiveIntervals &LIS,
+ VirtRegMap &VRM, MachineDominatorTree &MDT,
+ MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI);
/// reset - Prepare for a new split.
void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
@@ -502,7 +511,8 @@ public:
SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
/// overlapIntv - Indicate that all instructions in range should use the open
- /// interval, but also let the complement interval be live.
+ /// interval if End does not have tied-def usage of the register and in this
+ /// case compliment interval is used. Let the complement interval be live.
///
/// This doubles the register pressure, but is sometimes required to deal with
/// register uses after the last valid split point.
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index af58204f6db5..162f3aab024d 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -678,9 +678,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
// to this bb).
BitVector BetweenStartEnd;
BetweenStartEnd.resize(NumSlot);
- for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ BlockBitVecMap::const_iterator I = SeenStartMap.find(Pred);
if (I != SeenStartMap.end()) {
BetweenStartEnd |= I->second;
}
@@ -819,9 +818,8 @@ void StackColoring::calculateLocalLiveness() {
// Compute LiveIn by unioning together the LiveOut sets of all preds.
BitVector LocalLiveIn;
- for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
- PE = BB->pred_end(); PI != PE; ++PI) {
- LivenessMap::const_iterator I = BlockLiveness.find(*PI);
+ for (MachineBasicBlock *Pred : BB->predecessors()) {
+ LivenessMap::const_iterator I = BlockLiveness.find(Pred);
// PR37130: transformations prior to stack coloring can
// sometimes leave behind statically unreachable blocks; these
// can be safely skipped here.
diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
index faf07e90c39c..36e8f129ea15 100644
--- a/llvm/lib/CodeGen/StackMaps.cpp
+++ b/llvm/lib/CodeGen/StackMaps.cpp
@@ -511,7 +511,7 @@ void StackMaps::recordStackMapOpers(const MCSymbol &MILabel,
const MachineFrameInfo &MFI = AP.MF->getFrameInfo();
const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo();
bool HasDynamicFrameSize =
- MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF));
+ MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(*(AP.MF));
uint64_t FrameSize = HasDynamicFrameSize ? UINT64_MAX : MFI.getStackSize();
auto CurrentIt = FnInfos.find(AP.CurrentFnSym);
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 0411faabbcc3..9f229d51b985 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -70,6 +70,7 @@ StackProtector::StackProtector() : FunctionPass(ID), SSPBufferSize(8) {
INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE,
"Insert stack protectors", false, true)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE,
"Insert stack protectors", false, true)
@@ -192,7 +193,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// Ignore intrinsics that do not become real instructions.
// TODO: Narrow this to intrinsics that have store-like effects.
const auto *CI = cast<CallInst>(I);
- if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
+ if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd())
return true;
break;
}
@@ -379,9 +380,8 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
IRBuilder<> &B,
bool *SupportsSelectionDAGSP = nullptr) {
Value *Guard = TLI->getIRStackGuard(B);
- auto GuardMode = TLI->getTargetMachine().Options.StackProtectorGuard;
- if ((GuardMode == llvm::StackProtectorGuards::TLS ||
- GuardMode == llvm::StackProtectorGuards::None) && Guard)
+ StringRef GuardMode = M->getStackProtectorGuard();
+ if ((GuardMode == "tls" || GuardMode.empty()) && Guard)
return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard");
// Use SelectionDAG SSP handling, since there isn't an IR guard.
@@ -470,21 +470,36 @@ bool StackProtector::InsertStackProtectors() {
// instrumentation has already been generated.
HasIRCheck = true;
+ // If we're instrumenting a block with a musttail call, the check has to be
+ // inserted before the call rather than between it and the return. The
+ // verifier guarantees that a musttail call is either directly before the
+ // return or with a single correct bitcast of the return value in between so
+ // we don't need to worry about many situations here.
+ Instruction *CheckLoc = RI;
+ Instruction *Prev = RI->getPrevNonDebugInstruction();
+ if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
+ CheckLoc = Prev;
+ else if (Prev) {
+ Prev = Prev->getPrevNonDebugInstruction();
+ if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
+ CheckLoc = Prev;
+ }
+
// Generate epilogue instrumentation. The epilogue intrumentation can be
// function-based or inlined depending on which mechanism the target is
// providing.
if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
// Generate the function-based epilogue instrumentation.
// The target provides a guard check function, generate a call to it.
- IRBuilder<> B(RI);
+ IRBuilder<> B(CheckLoc);
LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard");
CallInst *Call = B.CreateCall(GuardCheck, {Guard});
Call->setAttributes(GuardCheck->getAttributes());
Call->setCallingConv(GuardCheck->getCallingConv());
} else {
// Generate the epilogue with inline instrumentation.
- // If we do not support SelectionDAG based tail calls, generate IR level
- // tail calls.
+ // If we do not support SelectionDAG based calls, generate IR level
+ // calls.
//
// For each block with a return instruction, convert this:
//
@@ -514,7 +529,8 @@ bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = CreateFailBB();
// Split the basic block before the return instruction.
- BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");
+ BasicBlock *NewBB =
+ BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return");
// Update the dominator tree if we need to.
if (DT && DT->isReachableFromEntry(BB)) {
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index a6f8974f3343..ebe00bd7402f 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -76,7 +76,7 @@ namespace {
// OrigAlignments - Alignments of stack objects before coloring.
SmallVector<Align, 16> OrigAlignments;
- // OrigSizes - Sizess of stack objects before coloring.
+ // OrigSizes - Sizes of stack objects before coloring.
SmallVector<unsigned, 16> OrigSizes;
// AllColors - If index is set, it's a spill slot, i.e. color.
@@ -157,12 +157,8 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
SSRefs.resize(MFI->getObjectIndexEnd());
// FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = &*MBBI;
- for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
- MII != EE; ++MII) {
- MachineInstr &MI = *MII;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isFI())
@@ -474,9 +470,8 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
++I;
}
- for (SmallVectorImpl<MachineInstr *>::iterator I = toErase.begin(),
- E = toErase.end(); I != E; ++I)
- (*I)->eraseFromParent();
+ for (MachineInstr *MI : toErase)
+ MI->eraseFromParent();
return changed;
}
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 575bf555c489..af735f2a0216 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -216,6 +216,9 @@ bool TailDuplicator::tailDuplicateAndUpdate(
// Rewrite uses that are outside of the original def's block.
MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+ // Only remove instructions after loop, as DBG_VALUE_LISTs with multiple
+ // uses of VReg may invalidate the use iterator when erased.
+ SmallPtrSet<MachineInstr *, 4> InstrsToRemove;
while (UI != MRI->use_end()) {
MachineOperand &UseMO = *UI;
MachineInstr *UseMI = UseMO.getParent();
@@ -225,13 +228,15 @@ bool TailDuplicator::tailDuplicateAndUpdate(
// a debug instruction that is a kill.
// FIXME: Should it SSAUpdate job to delete debug instructions
// instead of replacing the use with undef?
- UseMI->eraseFromParent();
+ InstrsToRemove.insert(UseMI);
continue;
}
if (UseMI->getParent() == DefBB && !UseMI->isPHI())
continue;
SSAUpdate.RewriteUse(UseMO);
}
+ for (auto *MI : InstrsToRemove)
+ MI->eraseFromParent();
}
SSAUpdateVRs.clear();
@@ -683,7 +688,7 @@ bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) {
return false;
if (TailBB->pred_empty())
return false;
- MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr();
+ MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(true);
if (I == TailBB->end())
return true;
return I->isUnconditionalBranch();
@@ -1035,10 +1040,9 @@ void TailDuplicator::removeDeadBlock(
MachineFunction *MF = MBB->getParent();
// Update the call site info.
- std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) {
+ for (const MachineInstr &MI : *MBB)
if (MI.shouldUpdateCallSiteInfo())
MF->eraseCallSiteInfo(&MI);
- });
if (RemovalCallback)
(*RemovalCallback)(MBB);
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 165860ef1aa8..2e4a656ea0c8 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -472,36 +472,33 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
return nullptr;
}
-void TargetInstrInfo::getNoop(MCInst &NopInst) const {
- llvm_unreachable("Not implemented");
-}
+MCInst TargetInstrInfo::getNop() const { llvm_unreachable("Not implemented"); }
-static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
- ArrayRef<unsigned> Ops, int FrameIndex,
- const TargetInstrInfo &TII) {
- unsigned StartIdx = 0;
- unsigned NumDefs = 0;
+std::pair<unsigned, unsigned>
+TargetInstrInfo::getPatchpointUnfoldableRange(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
- case TargetOpcode::STACKMAP: {
+ case TargetOpcode::STACKMAP:
// StackMapLiveValues are foldable
- StartIdx = StackMapOpers(&MI).getVarIdx();
- break;
- }
- case TargetOpcode::PATCHPOINT: {
+ return std::make_pair(0, StackMapOpers(&MI).getVarIdx());
+ case TargetOpcode::PATCHPOINT:
// For PatchPoint, the call args are not foldable (even if reported in the
// stackmap e.g. via anyregcc).
- StartIdx = PatchPointOpers(&MI).getVarIdx();
- break;
- }
- case TargetOpcode::STATEPOINT: {
+ return std::make_pair(0, PatchPointOpers(&MI).getVarIdx());
+ case TargetOpcode::STATEPOINT:
// For statepoints, fold deopt and gc arguments, but not call arguments.
- StartIdx = StatepointOpers(&MI).getVarIdx();
- NumDefs = MI.getNumDefs();
- break;
- }
+ return std::make_pair(MI.getNumDefs(), StatepointOpers(&MI).getVarIdx());
default:
llvm_unreachable("unexpected stackmap opcode");
}
+}
+
+static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
+ ArrayRef<unsigned> Ops, int FrameIndex,
+ const TargetInstrInfo &TII) {
+ unsigned StartIdx = 0;
+ unsigned NumDefs = 0;
+ // getPatchpointUnfoldableRange throws guarantee if MI is not a patchpoint.
+ std::tie(NumDefs, StartIdx) = TII.getPatchpointUnfoldableRange(MI);
unsigned DefToFoldIdx = MI.getNumOperands();
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 28c8bd0a7ded..3c5dd29036db 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -45,7 +45,6 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -114,17 +113,6 @@ static bool darwinHasSinCos(const Triple &TT) {
return true;
}
-// Although this default value is arbitrary, it is not random. It is assumed
-// that a condition that evaluates the same way by a higher percentage than this
-// is best represented as control flow. Therefore, the default value N should be
-// set such that the win from N% correct executions is greater than the loss
-// from (100 - N)% mispredicted executions for the majority of intended targets.
-static cl::opt<int> MinPercentageForPredictableBranch(
- "min-predictable-branch", cl::init(99),
- cl::desc("Minimum percentage (0-100) that a condition must be either true "
- "or false to assume that the condition is predictable"),
- cl::Hidden);
-
void TargetLoweringBase::InitLibcalls(const Triple &TT) {
#define HANDLE_LIBCALL(code, name) \
setLibcallName(RTLIB::code, name);
@@ -223,6 +211,23 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
}
}
+/// GetFPLibCall - Helper to return the right libcall for the given floating
+/// point type, or UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPLibCall(EVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::f128 ? Call_F128 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
/// getFPEXT - Return the FPEXT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
@@ -481,6 +486,11 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
return UNKNOWN_LIBCALL;
}
+RTLIB::Libcall RTLIB::getPOWI(EVT RetVT) {
+ return getFPLibCall(RetVT, POWI_F32, POWI_F64, POWI_F80, POWI_F128,
+ POWI_PPCF128);
+}
+
RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
MVT VT) {
unsigned ModeN, ModelN;
@@ -803,6 +813,10 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SUBC, VT, Expand);
setOperationAction(ISD::SUBE, VT, Expand);
+ // Absolute difference
+ setOperationAction(ISD::ABDS, VT, Expand);
+ setOperationAction(ISD::ABDU, VT, Expand);
+
// These default to Expand so they will be expanded to CTLZ/CTTZ by default.
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
@@ -849,6 +863,9 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Expand);
setOperationAction(ISD::VECREDUCE_SEQ_FMUL, VT, Expand);
+
+ // Named vector shuffles default to expand.
+ setOperationAction(ISD::VECTOR_SPLICE, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -985,9 +1002,6 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
if (NumElts.isScalar())
return LegalizeKind(TypeScalarizeVector, EltVT);
- if (VT.getVectorElementCount() == ElementCount::getScalable(1))
- report_fatal_error("Cannot legalize this vector");
-
// Try to widen vector elements until the element type is a power of two and
// promote it to a legal type later on, for example:
// <3 x i8> -> <4 x i8> -> <4 x i32>
@@ -1005,9 +1019,12 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// If type is to be expanded, split the vector.
// <4 x i140> -> <2 x i140>
- if (LK.first == TypeExpandInteger)
+ if (LK.first == TypeExpandInteger) {
+ if (VT.getVectorElementCount().isScalable())
+ return LegalizeKind(TypeScalarizeScalableVector, EltVT);
return LegalizeKind(TypeSplitVector,
VT.getHalfNumVectorElementsVT(Context));
+ }
// Promote the integer element types until a legal vector type is found
// or until the element integer type is too big. If a legal type was not
@@ -1066,6 +1083,9 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
return LegalizeKind(TypeWidenVector, NVT);
}
+ if (VT.getVectorElementCount() == ElementCount::getScalable(1))
+ return LegalizeKind(TypeScalarizeScalableVector, EltVT);
+
// Vectors with illegal element types are expanded.
EVT NVT = EVT::getVectorVT(Context, EltVT,
VT.getVectorElementCount().divideCoefficientBy(2));
@@ -1263,11 +1283,11 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
/// this allows us to compute derived properties we expose.
void TargetLoweringBase::computeRegisterProperties(
const TargetRegisterInfo *TRI) {
- static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
+ static_assert(MVT::VALUETYPE_SIZE <= MVT::MAX_ALLOWED_VALUETYPE,
"Too many value types for ValueTypeActions to hold!");
// Everything defaults to needing one register.
- for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) {
NumRegistersForVT[i] = 1;
RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
}
@@ -1479,7 +1499,7 @@ void TargetLoweringBase::computeRegisterProperties(
// not a sub-register class / subreg register class) legal register class for
// a group of value types. For example, on i386, i8, i16, and i32
// representative would be GR32; while on x86_64 it's GR64.
- for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) {
const TargetRegisterClass* RRC;
uint8_t Cost;
std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i);
@@ -1506,10 +1526,10 @@ MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
/// This method returns the number of registers needed, and the VT for each
/// register. It also returns the VT and quantity of the intermediate values
/// before they are promoted/expanded.
-unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
- EVT &IntermediateVT,
- unsigned &NumIntermediates,
- MVT &RegisterVT) const {
+unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
+ EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const {
ElementCount EltCnt = VT.getVectorElementCount();
// If there is a wider vector type with the same element type as this one,
@@ -1518,7 +1538,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
// This handles things like <2 x float> -> <4 x float> and
// <4 x i1> -> <4 x i32>.
LegalizeTypeAction TA = getTypeAction(Context, VT);
- if (EltCnt.getKnownMinValue() != 1 &&
+ if (!EltCnt.isScalar() &&
(TA == TypeWidenVector || TA == TypePromoteInteger)) {
EVT RegisterEVT = getTypeToTransformTo(Context, VT);
if (isTypeLegal(RegisterEVT)) {
@@ -1690,7 +1710,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment(
// For example, the ABI alignment may change based on software platform while
// this function should only be affected by hardware implementation.
Type *Ty = VT.getTypeForEVT(Context);
- if (Alignment >= DL.getABITypeAlign(Ty)) {
+ if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) {
// Assume that an access that meets the ABI-specified alignment is fast.
if (Fast != nullptr)
*Fast = true;
@@ -1698,8 +1718,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment(
}
// This is a misaligned access.
- return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment.value(), Flags,
- Fast);
+ return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
}
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
@@ -1734,10 +1753,6 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
MMO.getAlign(), MMO.getFlags(), Fast);
}
-BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
- return BranchProbability(MinPercentageForPredictableBranch, 100);
-}
-
//===----------------------------------------------------------------------===//
// TargetTransformInfo Helpers
//===----------------------------------------------------------------------===//
@@ -1821,19 +1836,22 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
llvm_unreachable("Unknown instruction type encountered!");
}
-std::pair<int, MVT>
+std::pair<InstructionCost, MVT>
TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const {
LLVMContext &C = Ty->getContext();
EVT MTy = getValueType(DL, Ty);
- int Cost = 1;
+ InstructionCost Cost = 1;
// We keep legalizing the type until we find a legal kind. We assume that
// the only operation that costs anything is the split. After splitting
// we need to handle two types.
while (true) {
LegalizeKind LK = getTypeConversion(C, MTy);
+ if (LK.first == TypeScalarizeScalableVector)
+ return std::make_pair(InstructionCost::getInvalid(), MVT::getVT(Ty));
+
if (LK.first == TypeLegal)
return std::make_pair(Cost, MTy.getSimpleVT());
@@ -1849,8 +1867,9 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
}
}
-Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
- bool UseTLS) const {
+Value *
+TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
+ bool UseTLS) const {
// compiler-rt provides a variable with a magic name. Targets that do not
// link with compiler-rt may also provide such a variable.
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
@@ -1881,7 +1900,8 @@ Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
return UnsafeStackPtr;
}
-Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+Value *
+TargetLoweringBase::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
if (!TM.getTargetTriple().isAndroid())
return getDefaultSafeStackPointerLocation(IRB, true);
@@ -1941,7 +1961,7 @@ bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
// For OpenBSD return its special guard variable. Otherwise return nullptr,
// so that SelectionDAG handle SSP.
-Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
+Value *TargetLoweringBase::getIRStackGuard(IRBuilderBase &IRB) const {
if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
@@ -2243,6 +2263,24 @@ TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI,
return Flags;
}
+Instruction *TargetLoweringBase::emitLeadingFence(IRBuilderBase &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
+ return Builder.CreateFence(Ord);
+ else
+ return nullptr;
+}
+
+Instruction *TargetLoweringBase::emitTrailingFence(IRBuilderBase &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ if (isAcquireOrStronger(Ord))
+ return Builder.CreateFence(Ord);
+ else
+ return nullptr;
+}
+
//===----------------------------------------------------------------------===//
// GlobalISel Hooks
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index fe64b38cf0be..add34eccc1f3 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -21,6 +21,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -47,6 +48,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionGOFF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCSectionXCOFF.h"
@@ -153,7 +155,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
((CM == CodeModel::Small || CM == CodeModel::Medium)
- ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
} else {
PersonalityEncoding =
(CM == CodeModel::Small || CM == CodeModel::Medium)
@@ -293,6 +295,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
}
}
+void TargetLoweringObjectFileELF::getModuleMetadata(Module &M) {
+ SmallVector<GlobalValue *, 4> Vec;
+ collectUsedGlobalVariables(M, Vec, false);
+ for (GlobalValue *GV : Vec)
+ if (auto *GO = dyn_cast<GlobalObject>(GV))
+ Used.insert(GO);
+}
+
void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
Module &M) const {
auto &C = getContext();
@@ -315,7 +325,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
if (NamedMDNode *DependentLibraries = M.getNamedMetadata("llvm.dependent-libraries")) {
auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES,
- ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+ ELF::SHF_MERGE | ELF::SHF_STRINGS, 1);
Streamer.SwitchSection(S);
@@ -522,8 +532,10 @@ static const Comdat *getELFComdat(const GlobalValue *GV) {
if (!C)
return nullptr;
- if (C->getSelectionKind() != Comdat::Any)
- report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" +
+ if (C->getSelectionKind() != Comdat::Any &&
+ C->getSelectionKind() != Comdat::NoDeduplicate)
+ report_fatal_error("ELF COMDATs only support SelectionKind::Any and "
+ "SelectionKind::NoDeduplicate, '" +
C->getName() + "' cannot be lowered.");
return C;
@@ -624,6 +636,8 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
Name.push_back('.');
TM.getNameWithPrefix(Name, GO, Mang, /*MayAlwaysUsePrivate*/true);
} else if (HasPrefix)
+ // For distinguishing between .text.${text-section-prefix}. (with trailing
+ // dot) and .text.${function-name}
Name.push_back('.');
return Name;
}
@@ -640,8 +654,85 @@ public:
};
}
-MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
- const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+/// Calculate an appropriate unique ID for a section, and update Flags,
+/// EntrySize and NextUniqueID where appropriate.
+static unsigned
+calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
+ SectionKind Kind, const TargetMachine &TM,
+ MCContext &Ctx, Mangler &Mang, unsigned &Flags,
+ unsigned &EntrySize, unsigned &NextUniqueID,
+ const bool Retain, const bool ForceUnique) {
+ // Increment uniqueID if we are forced to emit a unique section.
+ // This works perfectly fine with section attribute or pragma section as the
+ // sections with the same name are grouped together by the assembler.
+ if (ForceUnique)
+ return NextUniqueID++;
+
+ // A section can have at most one associated section. Put each global with
+ // MD_associated in a unique section.
+ const bool Associated = GO->getMetadata(LLVMContext::MD_associated);
+ if (Associated) {
+ Flags |= ELF::SHF_LINK_ORDER;
+ return NextUniqueID++;
+ }
+
+ if (Retain) {
+ if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))
+ Flags |= ELF::SHF_GNU_RETAIN;
+ return NextUniqueID++;
+ }
+
+ // If two symbols with differing sizes end up in the same mergeable section
+ // that section can be assigned an incorrect entry size. To avoid this we
+ // usually put symbols of the same size into distinct mergeable sections with
+ // the same name. Doing so relies on the ",unique ," assembly feature. This
+ // feature is not avalible until bintuils version 2.35
+ // (https://sourceware.org/bugzilla/show_bug.cgi?id=25380).
+ const bool SupportsUnique = Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 35);
+ if (!SupportsUnique) {
+ Flags &= ~ELF::SHF_MERGE;
+ EntrySize = 0;
+ return MCContext::GenericSectionID;
+ }
+
+ const bool SymbolMergeable = Flags & ELF::SHF_MERGE;
+ const bool SeenSectionNameBefore =
+ Ctx.isELFGenericMergeableSection(SectionName);
+ // If this is the first ocurrence of this section name, treat it as the
+ // generic section
+ if (!SymbolMergeable && !SeenSectionNameBefore)
+ return MCContext::GenericSectionID;
+
+ // Symbols must be placed into sections with compatible entry sizes. Generate
+ // unique sections for symbols that have not been assigned to compatible
+ // sections.
+ const auto PreviousID =
+ Ctx.getELFUniqueIDForEntsize(SectionName, Flags, EntrySize);
+ if (PreviousID)
+ return *PreviousID;
+
+ // If the user has specified the same section name as would be created
+ // implicitly for this symbol e.g. .rodata.str1.1, then we don't need
+ // to unique the section as the entry size for this symbol will be
+ // compatible with implicitly created sections.
+ SmallString<128> ImplicitSectionNameStem =
+ getELFSectionNameForGlobal(GO, Kind, Mang, TM, EntrySize, false);
+ if (SymbolMergeable &&
+ Ctx.isELFImplicitMergeableSectionNamePrefix(SectionName) &&
+ SectionName.startswith(ImplicitSectionNameStem))
+ return MCContext::GenericSectionID;
+
+ // We have seen this section name before, but with different flags or entity
+ // size. Create a new unique ID.
+ return NextUniqueID++;
+}
+
+static MCSection *selectExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM,
+ MCContext &Ctx, Mangler &Mang, unsigned &NextUniqueID,
+ bool Retain, bool ForceUnique) {
StringRef SectionName = GO->getSection();
// Check if '#pragma clang section' name is applicable.
@@ -669,76 +760,30 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
Kind = getELFKindForNamedSection(SectionName, Kind);
StringRef Group = "";
+ bool IsComdat = false;
unsigned Flags = getELFSectionFlags(Kind);
if (const Comdat *C = getELFComdat(GO)) {
Group = C->getName();
+ IsComdat = C->getSelectionKind() == Comdat::Any;
Flags |= ELF::SHF_GROUP;
}
unsigned EntrySize = getEntrySizeForKind(Kind);
+ const unsigned UniqueID = calcUniqueIDUpdateFlagsAndSize(
+ GO, SectionName, Kind, TM, Ctx, Mang, Flags, EntrySize, NextUniqueID,
+ Retain, ForceUnique);
- // A section can have at most one associated section. Put each global with
- // MD_associated in a unique section.
- unsigned UniqueID = MCContext::GenericSectionID;
const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM);
- if (GO->getMetadata(LLVMContext::MD_associated)) {
- UniqueID = NextUniqueID++;
- Flags |= ELF::SHF_LINK_ORDER;
- } else {
- if (getContext().getAsmInfo()->useIntegratedAssembler() ||
- getContext().getAsmInfo()->binutilsIsAtLeast(2, 35)) {
- // Symbols must be placed into sections with compatible entry
- // sizes. Generate unique sections for symbols that have not
- // been assigned to compatible sections.
- if (Flags & ELF::SHF_MERGE) {
- auto maybeID = getContext().getELFUniqueIDForEntsize(SectionName, Flags,
- EntrySize);
- if (maybeID)
- UniqueID = *maybeID;
- else {
- // If the user has specified the same section name as would be created
- // implicitly for this symbol e.g. .rodata.str1.1, then we don't need
- // to unique the section as the entry size for this symbol will be
- // compatible with implicitly created sections.
- SmallString<128> ImplicitSectionNameStem = getELFSectionNameForGlobal(
- GO, Kind, getMangler(), TM, EntrySize, false);
- if (!(getContext().isELFImplicitMergeableSectionNamePrefix(
- SectionName) &&
- SectionName.startswith(ImplicitSectionNameStem)))
- UniqueID = NextUniqueID++;
- }
- } else {
- // We need to unique the section if the user has explicity
- // assigned a non-mergeable symbol to a section name for
- // a generic mergeable section.
- if (getContext().isELFGenericMergeableSection(SectionName)) {
- auto maybeID = getContext().getELFUniqueIDForEntsize(
- SectionName, Flags, EntrySize);
- UniqueID = maybeID ? *maybeID : NextUniqueID++;
- }
- }
- } else {
- // If two symbols with differing sizes end up in the same mergeable
- // section that section can be assigned an incorrect entry size. To avoid
- // this we usually put symbols of the same size into distinct mergeable
- // sections with the same name. Doing so relies on the ",unique ,"
- // assembly feature. This feature is not avalible until bintuils
- // version 2.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=25380).
- Flags &= ~ELF::SHF_MERGE;
- EntrySize = 0;
- }
- }
-
- MCSectionELF *Section = getContext().getELFSection(
- SectionName, getELFSectionType(SectionName, Kind), Flags,
- EntrySize, Group, UniqueID, LinkedToSym);
+ MCSectionELF *Section = Ctx.getELFSection(
+ SectionName, getELFSectionType(SectionName, Kind), Flags, EntrySize,
+ Group, IsComdat, UniqueID, LinkedToSym);
// Make sure that we did not get some other section with incompatible sh_link.
// This should not be possible due to UniqueID code above.
assert(Section->getLinkedToSymbol() == LinkedToSym &&
"Associated symbol mismatch between sections");
- if (!(getContext().getAsmInfo()->useIntegratedAssembler() ||
- getContext().getAsmInfo()->binutilsIsAtLeast(2, 35))) {
+ if (!(Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 35))) {
// If we are using GNU as before 2.35, then this symbol might have
// been placed in an incompatible mergeable section. Emit an error if this
// is the case to avoid creating broken output.
@@ -757,15 +802,24 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
return Section;
}
+MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ return selectExplicitSectionGlobal(GO, Kind, TM, getContext(), getMangler(),
+ NextUniqueID, Used.count(GO),
+ /* ForceUnique = */false);
+}
+
static MCSectionELF *selectELFSectionForGlobal(
MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) {
StringRef Group = "";
+ bool IsComdat = false;
if (const Comdat *C = getELFComdat(GO)) {
Flags |= ELF::SHF_GROUP;
Group = C->getName();
+ IsComdat = C->getSelectionKind() == Comdat::Any;
}
// Get the section entry size based on the kind.
@@ -788,7 +842,30 @@ static MCSectionELF *selectELFSectionForGlobal(
if (Kind.isExecuteOnly())
UniqueID = 0;
return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
- EntrySize, Group, UniqueID, AssociatedSymbol);
+ EntrySize, Group, IsComdat, UniqueID,
+ AssociatedSymbol);
+}
+
+static MCSection *selectELFSectionForGlobal(
+ MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool Retain, bool EmitUniqueSection,
+ unsigned Flags, unsigned *NextUniqueID) {
+ const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM);
+ if (LinkedToSym) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_LINK_ORDER;
+ }
+ if (Retain && (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_GNU_RETAIN;
+ }
+
+ MCSectionELF *Section = selectELFSectionForGlobal(
+ Ctx, GO, Kind, Mang, TM, EmitUniqueSection, Flags,
+ NextUniqueID, LinkedToSym);
+ assert(Section->getLinkedToSymbol() == LinkedToSym);
+ return Section;
}
MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
@@ -805,18 +882,25 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
EmitUniqueSection = TM.getDataSections();
}
EmitUniqueSection |= GO->hasComdat();
+ return selectELFSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
+ Used.count(GO), EmitUniqueSection, Flags,
+ &NextUniqueID);
+}
- const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM);
- if (LinkedToSym) {
- EmitUniqueSection = true;
- Flags |= ELF::SHF_LINK_ORDER;
- }
-
- MCSectionELF *Section = selectELFSectionForGlobal(
- getContext(), GO, Kind, getMangler(), TM, EmitUniqueSection, Flags,
- &NextUniqueID, LinkedToSym);
- assert(Section->getLinkedToSymbol() == LinkedToSym);
- return Section;
+MCSection *TargetLoweringObjectFileELF::getUniqueSectionForFunction(
+ const Function &F, const TargetMachine &TM) const {
+ SectionKind Kind = SectionKind::getText();
+ unsigned Flags = getELFSectionFlags(Kind);
+ // If the function's section names is pre-determined via pragma or a
+ // section attribute, call selectExplicitSectionGlobal.
+ if (F.hasSection() || F.hasFnAttribute("implicit-section-name"))
+ return selectExplicitSectionGlobal(
+ &F, Kind, TM, getContext(), getMangler(), NextUniqueID,
+ Used.count(&F), /* ForceUnique = */true);
+ else
+ return selectELFSectionForGlobal(
+ getContext(), &F, Kind, getMangler(), TM, Used.count(&F),
+ /*EmitUniqueSection=*/true, Flags, &NextUniqueID);
}
MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
@@ -834,9 +918,8 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
/* AssociatedSymbol */ nullptr);
}
-MCSection *
-TargetLoweringObjectFileELF::getSectionForLSDA(const Function &F,
- const TargetMachine &TM) const {
+MCSection *TargetLoweringObjectFileELF::getSectionForLSDA(
+ const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const {
// If neither COMDAT nor function sections, use the monolithic LSDA section.
// Re-use this path if LSDASection is null as in the Arm EHABI.
if (!LSDASection || (!F.hasComdat() && !TM.getFunctionSections()))
@@ -844,31 +927,30 @@ TargetLoweringObjectFileELF::getSectionForLSDA(const Function &F,
const auto *LSDA = cast<MCSectionELF>(LSDASection);
unsigned Flags = LSDA->getFlags();
+ const MCSymbolELF *LinkedToSym = nullptr;
StringRef Group;
- if (F.hasComdat()) {
- Group = F.getComdat()->getName();
+ bool IsComdat = false;
+ if (const Comdat *C = getELFComdat(&F)) {
Flags |= ELF::SHF_GROUP;
+ Group = C->getName();
+ IsComdat = C->getSelectionKind() == Comdat::Any;
+ }
+ // Use SHF_LINK_ORDER to facilitate --gc-sections if we can use GNU ld>=2.36
+ // or LLD, which support mixed SHF_LINK_ORDER & non-SHF_LINK_ORDER.
+ if (TM.getFunctionSections() &&
+ (getContext().getAsmInfo()->useIntegratedAssembler() &&
+ getContext().getAsmInfo()->binutilsIsAtLeast(2, 36))) {
+ Flags |= ELF::SHF_LINK_ORDER;
+ LinkedToSym = cast<MCSymbolELF>(&FnSym);
}
// Append the function name as the suffix like GCC, assuming
// -funique-section-names applies to .gcc_except_table sections.
- if (TM.getUniqueSectionNames())
- return getContext().getELFSection(LSDA->getName() + "." + F.getName(),
- LSDA->getType(), Flags, 0, Group,
- MCSection::NonUniqueID, nullptr);
-
- // Allocate a unique ID if function sections && (integrated assembler or GNU
- // as>=2.35). Note we could use SHF_LINK_ORDER to facilitate --gc-sections but
- // that would require that we know the linker is a modern LLD (12.0 or later).
- // GNU ld as of 2.35 does not support mixed SHF_LINK_ORDER &
- // non-SHF_LINK_ORDER components in an output section
- // https://sourceware.org/bugzilla/show_bug.cgi?id=26256
- unsigned ID = TM.getFunctionSections() &&
- getContext().getAsmInfo()->useIntegratedAssembler()
- ? NextUniqueID++
- : MCSection::NonUniqueID;
- return getContext().getELFSection(LSDA->getName(), LSDA->getType(), Flags, 0,
- Group, ID, nullptr);
+ return getContext().getELFSection(
+ (TM.getUniqueSectionNames() ? LSDA->getName() + "." + F.getName()
+ : LSDA->getName()),
+ LSDA->getType(), Flags, 0, Group, IsComdat, MCSection::NonUniqueID,
+ LinkedToSym);
}
bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
@@ -920,7 +1002,8 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
} else {
Name += MBB.getParent()->getSection()->getName();
if (TM.getUniqueBasicBlockSectionNames()) {
- Name += ".";
+ if (!Name.endswith("."))
+ Name += ".";
Name += MBB.getSymbol()->getName();
} else {
UniqueID = NextUniqueID++;
@@ -934,8 +1017,8 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
GroupName = F.getComdat()->getName().str();
}
return getContext().getELFSection(Name, ELF::SHT_PROGBITS, Flags,
- 0 /* Entry Size */, GroupName, UniqueID,
- nullptr);
+ 0 /* Entry Size */, GroupName,
+ F.hasComdat(), UniqueID, nullptr);
}
static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray,
@@ -944,7 +1027,7 @@ static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray,
std::string Name;
unsigned Type;
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
- StringRef COMDAT = KeySym ? KeySym->getName() : "";
+ StringRef Comdat = KeySym ? KeySym->getName() : "";
if (KeySym)
Flags |= ELF::SHF_GROUP;
@@ -973,7 +1056,7 @@ static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray,
Type = ELF::SHT_PROGBITS;
}
- return Ctx.getELFSection(Name, Type, Flags, 0, COMDAT);
+ return Ctx.getELFSection(Name, Type, Flags, 0, Comdat, /*IsComdat=*/true);
}
MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
@@ -1027,7 +1110,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForCommandLines() const {
// -frecord-gcc-switches which in turn attempts to mimic GCC's switch of the
// same name.
return getContext().getELFSection(".GCC.command.line", ELF::SHT_PROGBITS,
- ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+ ELF::SHF_MERGE | ELF::SHF_STRINGS, 1);
}
void
@@ -1107,13 +1190,12 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
StringRef Segment, Section;
unsigned TAA = 0, StubSize = 0;
bool TAAParsed;
- std::string ErrorCode =
- MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section,
- TAA, TAAParsed, StubSize);
- if (!ErrorCode.empty())
+ if (Error E = MCSectionMachO::ParseSectionSpecifier(
+ SectionVal, Segment, Section, TAA, TAAParsed, StubSize)) {
// If invalid, report the error with report_fatal_error.
- report_fatal_error("Invalid section specifier '" + Section + "': " +
- ErrorCode + ".");
+ report_fatal_error("Invalid section specifier '" + Section +
+ "': " + toString(std::move(E)) + ".");
+ }
// Get the section.
MCSectionMachO *S = getContext().getMachOSection(
@@ -1137,6 +1219,14 @@ static void checkMachOComdat(const GlobalValue *GV) {
MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+
+ StringRef SectionName = GO->getSection();
+
+ const Function *F = dyn_cast<Function>(GO);
+ if (F && F->hasFnAttribute("implicit-section-name")) {
+ SectionName = F->getFnAttribute("implicit-section-name").getValueAsString();
+ }
+
// Parse the section specifier and create it if valid.
StringRef Segment, Section;
unsigned TAA = 0, StubSize = 0;
@@ -1144,14 +1234,12 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
checkMachOComdat(GO);
- std::string ErrorCode =
- MCSectionMachO::ParseSectionSpecifier(GO->getSection(), Segment, Section,
- TAA, TAAParsed, StubSize);
- if (!ErrorCode.empty()) {
+ if (Error E = MCSectionMachO::ParseSectionSpecifier(
+ SectionName, Segment, Section, TAA, TAAParsed, StubSize)) {
// If invalid, report the error with report_fatal_error.
report_fatal_error("Global variable '" + GO->getName() +
"' has an invalid section specifier '" +
- GO->getSection() + "': " + ErrorCode + ".");
+ GO->getSection() + "': " + toString(std::move(E)) + ".");
}
// Get the section.
@@ -1393,11 +1481,10 @@ static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
if (!AsmInfo.isSectionAtomizableBySymbols(Section))
return true;
- // If it is not dead stripped, it is safe to use private labels.
- const MCSectionMachO &SMO = cast<MCSectionMachO>(Section);
- if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
- return true;
-
+ // FIXME: we should be able to use private labels for sections that can't be
+ // dead-stripped (there's no issue with blocking atomization there), but `ld
+ // -r` sometimes drops the no_dead_strip attribute from sections so for safety
+ // we don't allow it.
return false;
}
@@ -1485,7 +1572,7 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH;
case Comdat::Largest:
return COFF::IMAGE_COMDAT_SELECT_LARGEST;
- case Comdat::NoDuplicates:
+ case Comdat::NoDeduplicate:
return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
case Comdat::SameSize:
return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE;
@@ -1576,7 +1663,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
// Append "$symbol" to the section name *before* IR-level mangling is
// applied when targetting mingw. This is what GCC does, and the ld.bfd
// COFF linker will not properly handle comdats otherwise.
- if (getTargetTriple().isWindowsGNUEnvironment())
+ if (getContext().getTargetTriple().isWindowsGNUEnvironment())
raw_svector_ostream(Name) << '$' << ComdatGV->getName();
return getContext().getCOFFSection(Name, Characteristics, Kind,
@@ -1693,7 +1780,8 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
std::string Flags;
for (const GlobalValue &GV : M.global_values()) {
raw_string_ostream OS(Flags);
- emitLinkerFlagsForGlobalCOFF(OS, &GV, getTargetTriple(), getMangler());
+ emitLinkerFlagsForGlobalCOFF(OS, &GV, getContext().getTargetTriple(),
+ getMangler());
OS.flush();
if (!Flags.empty()) {
Streamer.SwitchSection(getDrectveSection());
@@ -1717,7 +1805,8 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
continue;
raw_string_ostream OS(Flags);
- emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler());
+ emitLinkerFlagsForUsedCOFF(OS, GV, getContext().getTargetTriple(),
+ getMangler());
OS.flush();
if (!Flags.empty()) {
@@ -1796,16 +1885,16 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- return getCOFFStaticStructorSection(getContext(), getTargetTriple(), true,
- Priority, KeySym,
- cast<MCSectionCOFF>(StaticCtorSection));
+ return getCOFFStaticStructorSection(
+ getContext(), getContext().getTargetTriple(), true, Priority, KeySym,
+ cast<MCSectionCOFF>(StaticCtorSection));
}
MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- return getCOFFStaticStructorSection(getContext(), getTargetTriple(), false,
- Priority, KeySym,
- cast<MCSectionCOFF>(StaticDtorSection));
+ return getCOFFStaticStructorSection(
+ getContext(), getContext().getTargetTriple(), false, Priority, KeySym,
+ cast<MCSectionCOFF>(StaticDtorSection));
}
const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
@@ -1841,7 +1930,7 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
static std::string APIntToHexString(const APInt &AI) {
unsigned Width = (AI.getBitWidth() / 8) * 2;
- std::string HexString = AI.toString(16, /*Signed=*/false);
+ std::string HexString = toString(AI, 16, /*Signed=*/false);
llvm::transform(HexString, HexString.begin(), tolower);
unsigned Size = HexString.size();
assert(Width >= Size && "hex string is too large!");
@@ -1934,6 +2023,20 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) {
return C;
}
+static unsigned getWasmSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (K.isThreadLocal())
+ Flags |= wasm::WASM_SEG_FLAG_TLS;
+
+ if (K.isMergeableCString())
+ Flags |= wasm::WASM_SEG_FLAG_STRINGS;
+
+ // TODO(sbc): Add suport for K.isMergeableConst()
+
+ return Flags;
+}
+
MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
// We don't support explict section names for functions in the wasm object
@@ -1957,9 +2060,9 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
Group = C->getName();
}
- MCSectionWasm* Section =
- getContext().getWasmSection(Name, Kind, Group,
- MCContext::GenericSectionID);
+ unsigned Flags = getWasmSectionFlags(Kind);
+ MCSectionWasm *Section = getContext().getWasmSection(
+ Name, Kind, Flags, Group, MCContext::GenericSectionID);
return Section;
}
@@ -1991,7 +2094,8 @@ static MCSectionWasm *selectWasmSectionForGlobal(
(*NextUniqueID)++;
}
- return Ctx.getWasmSection(Name, Kind, Group, UniqueID);
+ unsigned Flags = getWasmSectionFlags(Kind);
+ return Ctx.getWasmSection(Name, Kind, Flags, Group, UniqueID);
}
MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
@@ -2075,14 +2179,26 @@ bool TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(
if (!F.hasPersonalityFn() || !F.needsUnwindTableEntry())
return false;
- const Function *Per =
- dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ const GlobalValue *Per =
+ dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
+ assert(Per && "Personality routine is not a GlobalValue type.");
if (isNoOpWithoutInvoke(classifyEHPersonality(Per)))
return false;
return true;
}
+bool TargetLoweringObjectFileXCOFF::ShouldSetSSPCanaryBitInTB(
+ const MachineFunction *MF) {
+ const Function &F = MF->getFunction();
+ if (!F.hasStackProtectorFnAttr())
+ return false;
+ // FIXME: check presence of canary word
+ // There are cases that the stack protectors are not really inserted even if
+ // the attributes are on.
+ return true;
+}
+
MCSymbol *
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) {
return MF->getMMI().getContext().getOrCreateSymbol(
@@ -2101,6 +2217,12 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
// function entry point. We choose to always return a function descriptor
// here.
if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) {
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasAttribute("toc-data"))
+ return cast<MCSectionXCOFF>(
+ SectionForGlobal(GVar, SectionKind::getData(), TM))
+ ->getQualNameSymbol();
+
if (GO->isDeclarationForLinker())
return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM))
->getQualNameSymbol();
@@ -2110,8 +2232,8 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
return cast<MCSectionXCOFF>(
getSectionForFunctionDescriptor(cast<Function>(GO), TM))
->getQualNameSymbol();
- if ((TM.getDataSections() && !GO->hasSection()) || GOKind.isCommon() ||
- GOKind.isBSSLocal())
+ if ((TM.getDataSections() && !GO->hasSection()) || GO->hasCommonLinkage() ||
+ GOKind.isBSSLocal() || GOKind.isThreadBSSLocal())
return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM))
->getQualNameSymbol();
}
@@ -2126,6 +2248,15 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
report_fatal_error("#pragma clang section is not yet supported");
StringRef SectionName = GO->getSection();
+
+ // Handle the XCOFF::TD case first, then deal with the rest.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO))
+ if (GVar->hasAttribute("toc-data"))
+ return getContext().getXCOFFSection(
+ SectionName, Kind,
+ XCOFF::CsectProperties(/*MappingClass*/ XCOFF::XMC_TD, XCOFF::XTY_SD),
+ /* MultiSymbolsAllowed*/ true);
+
XCOFF::StorageMappingClass MappingClass;
if (Kind.isText())
MappingClass = XCOFF::XMC_PR;
@@ -2136,8 +2267,9 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
else
report_fatal_error("XCOFF other section types not yet implemented.");
- return getContext().getXCOFFSection(SectionName, MappingClass, XCOFF::XTY_SD,
- Kind, /* MultiSymbolsAllowed*/ true);
+ return getContext().getXCOFFSection(
+ SectionName, Kind, XCOFF::CsectProperties(MappingClass, XCOFF::XTY_SD),
+ /* MultiSymbolsAllowed*/ true);
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
@@ -2148,22 +2280,41 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
SmallString<128> Name;
getNameWithPrefix(Name, GO, TM);
+ XCOFF::StorageMappingClass SMC =
+ isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA;
+ if (GO->isThreadLocal())
+ SMC = XCOFF::XMC_UL;
+
// Externals go into a csect of type ER.
return getContext().getXCOFFSection(
- Name, isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA, XCOFF::XTY_ER,
- SectionKind::getMetadata());
+ Name, SectionKind::getMetadata(),
+ XCOFF::CsectProperties(SMC, XCOFF::XTY_ER));
}
MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ // Handle the XCOFF::TD case first, then deal with the rest.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO))
+ if (GVar->hasAttribute("toc-data")) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(
+ Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_TD, XCOFF::XTY_SD),
+ /* MultiSymbolsAllowed*/ true);
+ }
+
// Common symbols go into a csect with matching name which will get mapped
// into the .bss section.
- if (Kind.isBSSLocal() || Kind.isCommon()) {
+ // Zero-initialized local TLS symbols go into a csect with matching name which
+ // will get mapped into the .tbss section.
+ if (Kind.isBSSLocal() || GO->hasCommonLinkage() || Kind.isThreadBSSLocal()) {
SmallString<128> Name;
getNameWithPrefix(Name, GO, TM);
+ XCOFF::StorageMappingClass SMC = Kind.isBSSLocal() ? XCOFF::XMC_BS
+ : Kind.isCommon() ? XCOFF::XMC_RW
+ : XCOFF::XMC_UL;
return getContext().getXCOFFSection(
- Name, Kind.isBSSLocal() ? XCOFF::XMC_BS : XCOFF::XMC_RW, XCOFF::XTY_CM,
- Kind);
+ Name, Kind, XCOFF::CsectProperties(SMC, XCOFF::XTY_CM));
}
if (Kind.isMergeableCString()) {
@@ -2179,7 +2330,7 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
getNameWithPrefix(Name, GO, TM);
return getContext().getXCOFFSection(
- Name, XCOFF::XMC_RO, XCOFF::XTY_SD, Kind,
+ Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD),
/* MultiSymbolsAllowed*/ !TM.getDataSections());
}
@@ -2202,8 +2353,9 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
if (TM.getDataSections()) {
SmallString<128> Name;
getNameWithPrefix(Name, GO, TM);
- return getContext().getXCOFFSection(Name, XCOFF::XMC_RW, XCOFF::XTY_SD,
- SectionKind::getData());
+ return getContext().getXCOFFSection(
+ Name, SectionKind::getData(),
+ XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD));
}
return DataSection;
}
@@ -2212,12 +2364,27 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
if (TM.getDataSections()) {
SmallString<128> Name;
getNameWithPrefix(Name, GO, TM);
- return getContext().getXCOFFSection(Name, XCOFF::XMC_RO, XCOFF::XTY_SD,
- SectionKind::getReadOnly());
+ return getContext().getXCOFFSection(
+ Name, SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD));
}
return ReadOnlySection;
}
+ // External/weak TLS data and initialized local TLS data are not eligible
+ // to be put into common csect. If data sections are enabled, thread
+ // data are emitted into separate sections. Otherwise, thread data
+ // are emitted into the .tdata section.
+ if (Kind.isThreadLocal()) {
+ if (TM.getDataSections()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(
+ Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_TL, XCOFF::XTY_SD));
+ }
+ return TLSDataSection;
+ }
+
report_fatal_error("XCOFF other section types not yet implemented.");
}
@@ -2232,8 +2399,9 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForJumpTable(
// the table doesn't prevent the removal.
SmallString<128> NameStr(".rodata.jmp..");
getNameWithPrefix(NameStr, &F, TM);
- return getContext().getXCOFFSection(NameStr, XCOFF::XMC_RO, XCOFF::XTY_SD,
- SectionKind::getReadOnly());
+ return getContext().getXCOFFSection(
+ NameStr, SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD));
}
bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
@@ -2324,9 +2492,11 @@ MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol(
Func->isDeclaration()) &&
isa<Function>(Func)) {
return getContext()
- .getXCOFFSection(NameStr, XCOFF::XMC_PR,
- Func->isDeclaration() ? XCOFF::XTY_ER : XCOFF::XTY_SD,
- SectionKind::getText())
+ .getXCOFFSection(
+ NameStr, SectionKind::getText(),
+ XCOFF::CsectProperties(XCOFF::XMC_PR, Func->isDeclaration()
+ ? XCOFF::XTY_ER
+ : XCOFF::XTY_SD))
->getQualNameSymbol();
}
@@ -2337,8 +2507,9 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor(
const Function *F, const TargetMachine &TM) const {
SmallString<128> NameStr;
getNameWithPrefix(NameStr, F, TM);
- return getContext().getXCOFFSection(NameStr, XCOFF::XMC_DS, XCOFF::XTY_SD,
- SectionKind::getData());
+ return getContext().getXCOFFSection(
+ NameStr, SectionKind::getData(),
+ XCOFF::CsectProperties(XCOFF::XMC_DS, XCOFF::XTY_SD));
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
@@ -2346,7 +2517,29 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
// Use TE storage-mapping class when large code model is enabled so that
// the chance of needing -bbigtoc is decreased.
return getContext().getXCOFFSection(
- cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(),
- TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC,
- XCOFF::XTY_SD, SectionKind::getData());
+ cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
+ XCOFF::CsectProperties(
+ TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC,
+ XCOFF::XTY_SD));
+}
+
+//===----------------------------------------------------------------------===//
+// GOFF
+//===----------------------------------------------------------------------===//
+TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF()
+ : TargetLoweringObjectFile() {}
+
+MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ return SelectSectionForGlobal(GO, Kind, TM);
+}
+
+MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ auto *Symbol = TM.getSymbol(GO);
+ if (Kind.isBSS())
+ return getContext().getGOFFSection(Symbol->getName(),
+ SectionKind::getBSS());
+
+ return getContext().getObjectFileInfo()->getTextSection();
}
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index e844d03854e2..4024fd452fc4 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Discriminator.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/Threading.h"
@@ -165,6 +166,13 @@ static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2",
"Disable the abort but emit a diagnostic on failure")));
+// An option that disables inserting FS-AFDO discriminators before emit.
+// This is mainly for debugging and tuning purpose.
+static cl::opt<bool>
+ FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
+ cl::desc("Do not insert FS-AFDO discriminators before "
+ "emit."));
+
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
@@ -334,6 +342,8 @@ struct InsertedPass {
namespace llvm {
+extern cl::opt<bool> EnableFSDiscriminator;
+
class PassConfigImpl {
public:
// List of passes explicitly substituted by this target. Normally this is
@@ -847,8 +857,8 @@ void TargetPassConfig::addIRPasses() {
// Run GC lowering passes for builtin collectors
// TODO: add a pass insertion point here
- addPass(createGCLoweringPass());
- addPass(createShadowStackGCLoweringPass());
+ addPass(&GCLoweringID);
+ addPass(&ShadowStackGCLoweringID);
addPass(createLowerConstantIntrinsicsPass());
// Make sure that no unreachable blocks are instruction selected.
@@ -858,11 +868,16 @@ void TargetPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting)
addPass(createConstantHoistingPass());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createReplaceWithVeclibLegacyPass());
+
if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
addPass(createPartiallyInlineLibCallsPass());
- // Instrument function entry and exit, e.g. with calls to mcount().
- addPass(createPostInlineEntryExitInstrumenterPass());
+ // Expand vector predication intrinsics into standard IR instructions.
+ // This pass has to run before ScalarizeMaskedMemIntrin and ExpandReduction
+ // passes since it emits those kinds of intrinsics.
+ addPass(createExpandVectorPredicationPass());
// Add scalarization of target's unsupported masked memory intrinsics pass.
// the unsupported intrinsic will be replaced with a chain of basic blocks,
@@ -924,7 +939,6 @@ void TargetPassConfig::addPassesToHandleExceptions() {
void TargetPassConfig::addCodeGenPrepare() {
if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
addPass(createCodeGenPreparePass());
- addPass(createRewriteSymbolsPass());
}
/// Add common passes that perform LLVM IR to IR transforms in preparation for
@@ -1109,6 +1123,8 @@ void TargetPassConfig::addMachinePasses() {
// Run post-ra passes.
addPostRegAlloc();
+ addPass(&RemoveRedundantDebugValuesID, false);
+
addPass(&FixupStatepointCallerSavedID);
// Insert prolog/epilog code. Eliminate abstract frame index references...
@@ -1162,6 +1178,14 @@ void TargetPassConfig::addMachinePasses() {
addPass(&XRayInstrumentationID);
addPass(&PatchableFunctionID);
+ if (EnableFSDiscriminator && !FSNoFinalDiscrim)
+ // Add FS discriminators here so that all the instruction duplicates
+ // in different BBs get their own discriminators. With this, we can "sum"
+ // the SampleFDO counters instead of using MAX. This will improve the
+ // SampleFDO profile quality.
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::PassLast));
+
addPreEmitPass();
if (TM->Options.EnableIPRA)
@@ -1187,12 +1211,14 @@ void TargetPassConfig::addMachinePasses() {
}
// Machine function splitter uses the basic block sections feature. Both
- // cannot be enabled at the same time.
- if (TM->Options.EnableMachineFunctionSplitter ||
- EnableMachineFunctionSplitter) {
- addPass(createMachineFunctionSplitterPass());
- } else if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) {
+ // cannot be enabled at the same time. Basic block sections takes precedence.
+ // FIXME: In principle, BasicBlockSection::Labels and splitting can used
+ // together. Update this check once we have addressed any issues.
+ if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) {
addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf()));
+ } else if (TM->Options.EnableMachineFunctionSplitter ||
+ EnableMachineFunctionSplitter) {
+ addPass(createMachineFunctionSplitterPass());
}
// Add passes that directly emit MI after all other MI passes.
@@ -1309,11 +1335,15 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
}
bool TargetPassConfig::addRegAssignAndRewriteFast() {
- if (RegAlloc != &useDefaultRegisterAllocator &&
- RegAlloc != &createFastRegisterAllocator)
+ if (RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator &&
+ RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&createFastRegisterAllocator)
report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
addPass(createRegAllocPass(false));
+
+ // Allow targets to change the register assignments after
+ // fast register allocation.
+ addPostFastRegAllocRewrite();
return true;
}
diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index 5fd7eef5808f..f4bb71535f7f 100644
--- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -225,6 +225,23 @@ TargetRegisterInfo::getMinimalPhysRegClass(MCRegister reg, MVT VT) const {
return BestRC;
}
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClassLLT(MCRegister reg, LLT Ty) const {
+ assert(Register::isPhysicalRegister(reg) &&
+ "reg must be a physical register");
+
+ // Pick the most sub register class of the right type that contains
+ // this physreg.
+ const TargetRegisterClass *BestRC = nullptr;
+ for (const TargetRegisterClass *RC : regclasses()) {
+ if ((!Ty.isValid() || isTypeLegalForClass(*RC, Ty)) && RC->contains(reg) &&
+ (!BestRC || BestRC->hasSubClass(RC)))
+ BestRC = RC;
+ }
+
+ return BestRC;
+}
+
/// getAllocatableSetForRC - Toggle the bits that represent allocatable
/// registers for the specific register class.
static void getAllocatableSetForRC(const MachineFunction &MF,
@@ -250,8 +267,9 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
}
// Mask out the reserved registers
- BitVector Reserved = getReservedRegs(MF);
- Allocatable &= Reserved.flip();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const BitVector &Reserved = MRI.getReservedRegs();
+ Allocatable.reset(Reserved);
return Allocatable;
}
@@ -461,21 +479,13 @@ bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return !MF.getFunction().hasFnAttribute("no-realign-stack");
}
-bool TargetRegisterInfo::needsStackRealignment(
- const MachineFunction &MF) const {
+bool TargetRegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const Function &F = MF.getFunction();
- Align StackAlign = TFI->getStackAlign();
- bool requiresRealignment = ((MFI.getMaxAlign() > StackAlign) ||
- F.hasFnAttribute(Attribute::StackAlignment));
- if (F.hasFnAttribute("stackrealign") || requiresRealignment) {
- if (canRealignStack(MF))
- return true;
- LLVM_DEBUG(dbgs() << "Can't realign function's stack: " << F.getName()
- << "\n");
- }
- return false;
+ return F.hasFnAttribute("stackrealign") ||
+ (MFI.getMaxAlign() > TFI->getStackAlign()) ||
+ F.hasFnAttribute(Attribute::StackAlignment);
}
bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
@@ -510,6 +520,77 @@ TargetRegisterInfo::getRegSizeInBits(Register Reg,
return getRegSizeInBits(*RC);
}
+bool TargetRegisterInfo::getCoveringSubRegIndexes(
+ const MachineRegisterInfo &MRI, const TargetRegisterClass *RC,
+ LaneBitmask LaneMask, SmallVectorImpl<unsigned> &NeededIndexes) const {
+ SmallVector<unsigned, 8> PossibleIndexes;
+ unsigned BestIdx = 0;
+ unsigned BestCover = 0;
+
+ for (unsigned Idx = 1, E = getNumSubRegIndices(); Idx < E; ++Idx) {
+ // Is this index even compatible with the given class?
+ if (getSubClassWithSubReg(RC, Idx) != RC)
+ continue;
+ LaneBitmask SubRegMask = getSubRegIndexLaneMask(Idx);
+ // Early exit if we found a perfect match.
+ if (SubRegMask == LaneMask) {
+ BestIdx = Idx;
+ break;
+ }
+
+ // The index must not cover any lanes outside \p LaneMask.
+ if ((SubRegMask & ~LaneMask).any())
+ continue;
+
+ unsigned PopCount = SubRegMask.getNumLanes();
+ PossibleIndexes.push_back(Idx);
+ if (PopCount > BestCover) {
+ BestCover = PopCount;
+ BestIdx = Idx;
+ }
+ }
+
+ // Abort if we cannot possibly implement the COPY with the given indexes.
+ if (BestIdx == 0)
+ return 0;
+
+ NeededIndexes.push_back(BestIdx);
+
+ // Greedy heuristic: Keep iterating keeping the best covering subreg index
+ // each time.
+ LaneBitmask LanesLeft = LaneMask & ~getSubRegIndexLaneMask(BestIdx);
+ while (LanesLeft.any()) {
+ unsigned BestIdx = 0;
+ int BestCover = std::numeric_limits<int>::min();
+ for (unsigned Idx : PossibleIndexes) {
+ LaneBitmask SubRegMask = getSubRegIndexLaneMask(Idx);
+ // Early exit if we found a perfect match.
+ if (SubRegMask == LanesLeft) {
+ BestIdx = Idx;
+ break;
+ }
+
+ // Try to cover as much of the remaining lanes as possible but
+ // as few of the already covered lanes as possible.
+ int Cover = (SubRegMask & LanesLeft).getNumLanes() -
+ (SubRegMask & ~LanesLeft).getNumLanes();
+ if (Cover > BestCover) {
+ BestCover = Cover;
+ BestIdx = Idx;
+ }
+ }
+
+ if (BestIdx == 0)
+ return 0; // Impossible to handle
+
+ NeededIndexes.push_back(BestIdx);
+
+ LanesLeft &= ~getSubRegIndexLaneMask(BestIdx);
+ }
+
+ return BestIdx;
+}
+
Register
TargetRegisterInfo::lookThruCopyLike(Register SrcReg,
const MachineRegisterInfo *MRI) const {
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index ecee4aed7f88..1664b4dadfec 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -527,6 +527,11 @@ bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
if (isRevCopyChain(RegB, RegA, MaxDataFlowEdge))
return false;
+ // Look for other target specific commute preference.
+ bool Commute;
+ if (TII->hasCommutePreference(*MI, Commute))
+ return Commute;
+
// Since there are no intervening uses for both registers, then commute
// if the def of RegC is closer. Its live interval is shorter.
return LastDefB && LastDefC && LastDefC > LastDefB;
@@ -801,8 +806,8 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
for (MachineInstr &OtherMI : make_range(End, KillPos)) {
- // Debug instructions cannot be counted against the limit.
- if (OtherMI.isDebugInstr())
+ // Debug or pseudo instructions cannot be counted against the limit.
+ if (OtherMI.isDebugOrPseudoInstr())
continue;
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
@@ -974,8 +979,8 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
unsigned NumVisited = 0;
for (MachineInstr &OtherMI :
make_range(mi, MachineBasicBlock::iterator(KillMI))) {
- // Debug instructions cannot be counted against the limit.
- if (OtherMI.isDebugInstr())
+ // Debug or pseudo instructions cannot be counted against the limit.
+ if (OtherMI.isDebugOrPseudoInstr())
continue;
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
@@ -1357,11 +1362,9 @@ void
TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
TiedPairList &TiedPairs,
unsigned &Dist) {
- bool IsEarlyClobber = false;
- for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
- const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second);
- IsEarlyClobber |= DstMO.isEarlyClobber();
- }
+ bool IsEarlyClobber = llvm::find_if(TiedPairs, [MI](auto const &TP) {
+ return MI->getOperand(TP.second).isEarlyClobber();
+ }) != TiedPairs.end();
bool RemovedKillFlag = false;
bool AllUsesCopied = true;
@@ -1369,9 +1372,9 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
SlotIndex LastCopyIdx;
Register RegB = 0;
unsigned SubRegB = 0;
- for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
- unsigned SrcIdx = TiedPairs[tpi].first;
- unsigned DstIdx = TiedPairs[tpi].second;
+ for (auto &TP : TiedPairs) {
+ unsigned SrcIdx = TP.first;
+ unsigned DstIdx = TP.second;
const MachineOperand &DstMO = MI->getOperand(DstIdx);
Register RegA = DstMO.getReg();
@@ -1549,9 +1552,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
.set(MachineFunctionProperties::Property::TiedOpsRewritten);
TiedOperandMap TiedOperands;
- for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
- MBBI != MBBE; ++MBBI) {
- MBB = &*MBBI;
+ for (MachineBasicBlock &MBBI : *MF) {
+ MBB = &MBBI;
unsigned Dist = 0;
DistanceMap.clear();
SrcRegMap.clear();
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index a42095d8718a..2ce6ea1d4212 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -30,9 +30,6 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
@@ -923,9 +920,6 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
return false;
- if (ToPromote < 2)
- return false;
-
IRPromoter Promoter(*Ctx, cast<IntegerType>(OrigTy), PromotedWidth,
CurrentVisited, Sources, Sinks, SafeWrap);
Promoter.Mutate();
@@ -952,7 +946,8 @@ bool TypePromotion::runOnFunction(Function &F) {
const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
const TargetTransformInfo &TII =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- RegisterBitWidth = TII.getRegisterBitWidth(false);
+ RegisterBitWidth =
+ TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize();
Ctx = &F.getParent()->getContext();
// Search up from icmps to try to promote their operands.
diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index f5dc589a98cb..c9a19948ff2f 100644
--- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -114,25 +114,23 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// Loop over all dead blocks, remembering them and deleting all instructions
// in them.
std::vector<MachineBasicBlock*> DeadBlocks;
- for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- MachineBasicBlock *BB = &*I;
-
+ for (MachineBasicBlock &BB : F) {
// Test for deadness.
- if (!Reachable.count(BB)) {
- DeadBlocks.push_back(BB);
+ if (!Reachable.count(&BB)) {
+ DeadBlocks.push_back(&BB);
// Update dominator and loop info.
- if (MLI) MLI->removeBlock(BB);
- if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);
+ if (MLI) MLI->removeBlock(&BB);
+ if (MDT && MDT->getNode(&BB)) MDT->eraseNode(&BB);
- while (BB->succ_begin() != BB->succ_end()) {
- MachineBasicBlock* succ = *BB->succ_begin();
+ while (BB.succ_begin() != BB.succ_end()) {
+ MachineBasicBlock* succ = *BB.succ_begin();
MachineBasicBlock::iterator start = succ->begin();
while (start != succ->end() && start->isPHI()) {
for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
if (start->getOperand(i).isMBB() &&
- start->getOperand(i).getMBB() == BB) {
+ start->getOperand(i).getMBB() == &BB) {
start->RemoveOperand(i);
start->RemoveOperand(i-1);
}
@@ -140,7 +138,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
start++;
}
- BB->removeSuccessor(BB->succ_begin());
+ BB.removeSuccessor(BB.succ_begin());
}
}
}
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 978357d8f539..9daebfd9e63d 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -15,18 +15,20 @@
using namespace llvm;
EVT EVT::changeExtendedTypeToInteger() const {
+ assert(isExtended() && "Type is not extended!");
LLVMContext &Context = LLVMTy->getContext();
return getIntegerVT(Context, getSizeInBits());
}
EVT EVT::changeExtendedVectorElementTypeToInteger() const {
+ assert(isExtended() && "Type is not extended!");
LLVMContext &Context = LLVMTy->getContext();
EVT IntTy = getIntegerVT(Context, getScalarSizeInBits());
- return getVectorVT(Context, IntTy, getVectorNumElements(),
- isScalableVector());
+ return getVectorVT(Context, IntTy, getVectorElementCount());
}
EVT EVT::changeExtendedVectorElementType(EVT EltVT) const {
+ assert(isExtended() && "Type is not extended!");
LLVMContext &Context = LLVMTy->getContext();
return getVectorVT(Context, EltVT, getVectorElementCount());
}
@@ -196,6 +198,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
case MVT::x86mmx: return Type::getX86_MMXTy(Context);
case MVT::x86amx: return Type::getX86_AMXTy(Context);
+ case MVT::externref:
+ return PointerType::get(StructType::create(Context), 10);
+ case MVT::funcref:
+ return PointerType::get(StructType::create(Context), 20);
case MVT::v1i1:
return FixedVectorType::get(Type::getInt1Ty(Context), 1);
case MVT::v2i1:
@@ -236,6 +242,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt8Ty(Context), 128);
case MVT::v256i8:
return FixedVectorType::get(Type::getInt8Ty(Context), 256);
+ case MVT::v512i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 512);
+ case MVT::v1024i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 1024);
case MVT::v1i16:
return FixedVectorType::get(Type::getInt16Ty(Context), 1);
case MVT::v2i16:
@@ -254,6 +264,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt16Ty(Context), 64);
case MVT::v128i16:
return FixedVectorType::get(Type::getInt16Ty(Context), 128);
+ case MVT::v256i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 256);
+ case MVT::v512i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 512);
case MVT::v1i32:
return FixedVectorType::get(Type::getInt32Ty(Context), 1);
case MVT::v2i32:
@@ -264,6 +278,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt32Ty(Context), 4);
case MVT::v5i32:
return FixedVectorType::get(Type::getInt32Ty(Context), 5);
+ case MVT::v6i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 6);
+ case MVT::v7i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 7);
case MVT::v8i32:
return FixedVectorType::get(Type::getInt32Ty(Context), 8);
case MVT::v16i32:
@@ -286,6 +304,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt64Ty(Context), 1);
case MVT::v2i64:
return FixedVectorType::get(Type::getInt64Ty(Context), 2);
+ case MVT::v3i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 3);
case MVT::v4i64:
return FixedVectorType::get(Type::getInt64Ty(Context), 4);
case MVT::v8i64:
@@ -302,6 +322,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt64Ty(Context), 256);
case MVT::v1i128:
return FixedVectorType::get(Type::getInt128Ty(Context), 1);
+ case MVT::v1f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 1);
case MVT::v2f16:
return FixedVectorType::get(Type::getHalfTy(Context), 2);
case MVT::v3f16:
@@ -318,6 +340,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getHalfTy(Context), 64);
case MVT::v128f16:
return FixedVectorType::get(Type::getHalfTy(Context), 128);
+ case MVT::v256f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 256);
+ case MVT::v512f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 512);
case MVT::v2bf16:
return FixedVectorType::get(Type::getBFloatTy(Context), 2);
case MVT::v3bf16:
@@ -344,6 +370,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getFloatTy(Context), 4);
case MVT::v5f32:
return FixedVectorType::get(Type::getFloatTy(Context), 5);
+ case MVT::v6f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 6);
+ case MVT::v7f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 7);
case MVT::v8f32:
return FixedVectorType::get(Type::getFloatTy(Context), 8);
case MVT::v16f32:
@@ -366,6 +396,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getDoubleTy(Context), 1);
case MVT::v2f64:
return FixedVectorType::get(Type::getDoubleTy(Context), 2);
+ case MVT::v3f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 3);
case MVT::v4f64:
return FixedVectorType::get(Type::getDoubleTy(Context), 4);
case MVT::v8f64:
@@ -456,6 +488,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return ScalableVectorType::get(Type::getHalfTy(Context), 16);
case MVT::nxv32f16:
return ScalableVectorType::get(Type::getHalfTy(Context), 32);
+ case MVT::nxv1bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 1);
case MVT::nxv2bf16:
return ScalableVectorType::get(Type::getBFloatTy(Context), 2);
case MVT::nxv4bf16:
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 5e0ff9d9092c..0f164e2637a2 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -94,12 +95,18 @@ void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) {
unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
unsigned Size = TRI->getSpillSize(*RC);
Align Alignment = TRI->getSpillAlign(*RC);
+ // Set preferred alignment if we are still able to realign the stack
+ auto &ST = MF->getSubtarget();
+ Align CurrentAlign = ST.getFrameLowering()->getStackAlign();
+ if (Alignment > CurrentAlign && !ST.getRegisterInfo()->canRealignStack(*MF)) {
+ Alignment = CurrentAlign;
+ }
int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Alignment);
++NumSpillSlots;
return SS;
}
-bool VirtRegMap::hasPreferredPhys(Register VirtReg) {
+bool VirtRegMap::hasPreferredPhys(Register VirtReg) const {
Register Hint = MRI->getSimpleHint(VirtReg);
if (!Hint.isValid())
return false;
@@ -108,7 +115,7 @@ bool VirtRegMap::hasPreferredPhys(Register VirtReg) {
return Register(getPhys(VirtReg)) == Hint;
}
-bool VirtRegMap::hasKnownPreference(Register VirtReg) {
+bool VirtRegMap::hasKnownPreference(Register VirtReg) const {
std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
if (Register::isPhysicalRegister(Hint.second))
return true;
@@ -181,27 +188,35 @@ class VirtRegRewriter : public MachineFunctionPass {
SlotIndexes *Indexes;
LiveIntervals *LIS;
VirtRegMap *VRM;
+ LiveDebugVariables *DebugVars;
+ DenseSet<Register> RewriteRegs;
+ bool ClearVirtRegs;
void rewrite();
void addMBBLiveIns();
bool readsUndefSubreg(const MachineOperand &MO) const;
- void addLiveInsForSubRanges(const LiveInterval &LI, Register PhysReg) const;
- void handleIdentityCopy(MachineInstr &MI) const;
+ void addLiveInsForSubRanges(const LiveInterval &LI, MCRegister PhysReg) const;
+ void handleIdentityCopy(MachineInstr &MI);
void expandCopyBundle(MachineInstr &MI) const;
bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
public:
static char ID;
-
- VirtRegRewriter() : MachineFunctionPass(ID) {}
+ VirtRegRewriter(bool ClearVirtRegs_ = true) :
+ MachineFunctionPass(ID),
+ ClearVirtRegs(ClearVirtRegs_) {}
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnMachineFunction(MachineFunction&) override;
MachineFunctionProperties getSetProperties() const override {
- return MachineFunctionProperties().set(
+ if (ClearVirtRegs) {
+ return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ return MachineFunctionProperties();
}
};
@@ -224,12 +239,17 @@ INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
AU.addRequired<VirtRegMap>();
+
+ if (!ClearVirtRegs)
+ AU.addPreserved<LiveDebugVariables>();
+
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -241,6 +261,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
Indexes = &getAnalysis<SlotIndexes>();
LIS = &getAnalysis<LiveIntervals>();
VRM = &getAnalysis<VirtRegMap>();
+ DebugVars = getAnalysisIfAvailable<LiveDebugVariables>();
LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
<< "********** Function: " << MF->getName() << '\n');
LLVM_DEBUG(VRM->dump());
@@ -254,18 +275,24 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
// Rewrite virtual registers.
rewrite();
- // Write out new DBG_VALUE instructions.
- getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+ if (DebugVars && ClearVirtRegs) {
+ // Write out new DBG_VALUE instructions.
+
+ // We only do this if ClearVirtRegs is specified since this should be the
+ // final run of the pass and we don't want to emit them multiple times.
+ DebugVars->emitDebugValues(VRM);
+
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
+ }
- // All machine operands and other references to virtual registers have been
- // replaced. Remove the virtual registers and release all the transient data.
- VRM->clearAllVirt();
- MRI->clearVirtRegs();
return true;
}
void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
- Register PhysReg) const {
+ MCRegister PhysReg) const {
assert(!LI.empty());
assert(LI.hasSubRanges());
@@ -321,7 +348,12 @@ void VirtRegRewriter::addMBBLiveIns() {
// This is a virtual register that is live across basic blocks. Its
// assigned PhysReg must be marked as live-in to those blocks.
Register PhysReg = VRM->getPhys(VirtReg);
- assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
+ if (PhysReg == VirtRegMap::NO_PHYS_REG) {
+ // There may be no physical register assigned if only some register
+ // classes were already allocated.
+ assert(!ClearVirtRegs && "Unmapped virtual register");
+ continue;
+ }
if (LI.hasSubRanges()) {
addLiveInsForSubRanges(LI, PhysReg);
@@ -372,12 +404,21 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
return true;
}
-void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
+void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) {
if (!MI.isIdentityCopy())
return;
LLVM_DEBUG(dbgs() << "Identity copy: " << MI);
++NumIdCopies;
+ Register DstReg = MI.getOperand(0).getReg();
+
+ // We may have deferred allocation of the virtual register, and the rewrite
+ // regs code doesn't handle the liveness update.
+ if (DstReg.isVirtual())
+ return;
+
+ RewriteRegs.insert(DstReg);
+
// Copies like:
// %r0 = COPY undef %r0
// %al = COPY %al, implicit-def %eax
@@ -517,8 +558,12 @@ void VirtRegRewriter::rewrite() {
continue;
Register VirtReg = MO.getReg();
MCRegister PhysReg = VRM->getPhys(VirtReg);
- assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
- "Instruction uses unmapped VirtReg");
+ if (PhysReg == VirtRegMap::NO_PHYS_REG)
+ continue;
+
+ assert(Register(PhysReg).isPhysical());
+
+ RewriteRegs.insert(PhysReg);
assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
// Preserve semantics of sub-register operands.
@@ -590,4 +635,21 @@ void VirtRegRewriter::rewrite() {
handleIdentityCopy(*MI);
}
}
+
+ if (LIS) {
+ // Don't bother maintaining accurate LiveIntervals for registers which were
+ // already allocated.
+ for (Register PhysReg : RewriteRegs) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid();
+ ++Units) {
+ LIS->removeRegUnit(*Units);
+ }
+ }
+ }
+
+ RewriteRegs.clear();
+}
+
+FunctionPass *llvm::createVirtRegRewriter(bool ClearVirtRegs) {
+ return new VirtRegRewriter(ClearVirtRegs);
}
diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 53424556682d..c4c84cd921fa 100644
--- a/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -77,21 +77,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/BreadthFirstIterator.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -117,19 +108,14 @@ class WasmEHPrepare : public FunctionPass {
FunctionCallee CallPersonalityF =
nullptr; // _Unwind_CallPersonality() wrapper
- bool prepareEHPads(Function &F);
bool prepareThrows(Function &F);
-
- bool IsEHPadFunctionsSetUp = false;
- void setupEHPadFunctions(Function &F);
- void prepareEHPad(BasicBlock *BB, bool NeedPersonality, bool NeedLSDA = false,
- unsigned Index = 0);
+ bool prepareEHPads(Function &F);
+ void prepareEHPad(BasicBlock *BB, bool NeedPersonality, unsigned Index = 0);
public:
static char ID; // Pass identification, replacement for typeid
WasmEHPrepare() : FunctionPass(ID) {}
- void getAnalysisUsage(AnalysisUsage &AU) const override;
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@@ -142,16 +128,11 @@ public:
char WasmEHPrepare::ID = 0;
INITIALIZE_PASS_BEGIN(WasmEHPrepare, DEBUG_TYPE,
"Prepare WebAssembly exceptions", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions",
false, false)
FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); }
-void WasmEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTreeWrapperPass>();
-}
-
bool WasmEHPrepare::doInitialization(Module &M) {
IRBuilder<> IRB(M.getContext());
LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index
@@ -164,19 +145,18 @@ bool WasmEHPrepare::doInitialization(Module &M) {
// Erase the specified BBs if the BB does not have any remaining predecessors,
// and also all its dead children.
template <typename Container>
-static void eraseDeadBBsAndChildren(const Container &BBs, DomTreeUpdater *DTU) {
+static void eraseDeadBBsAndChildren(const Container &BBs) {
SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end());
while (!WL.empty()) {
auto *BB = WL.pop_back_val();
if (!pred_empty(BB))
continue;
WL.append(succ_begin(BB), succ_end(BB));
- DeleteDeadBlock(BB, DTU);
+ DeleteDeadBlock(BB);
}
}
bool WasmEHPrepare::runOnFunction(Function &F) {
- IsEHPadFunctionsSetUp = false;
bool Changed = false;
Changed |= prepareThrows(F);
Changed |= prepareEHPads(F);
@@ -184,9 +164,6 @@ bool WasmEHPrepare::runOnFunction(Function &F) {
}
bool WasmEHPrepare::prepareThrows(Function &F) {
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DomTreeUpdater DTU(&DT, /*PostDominatorTree*/ nullptr,
- DomTreeUpdater::UpdateStrategy::Eager);
Module &M = *F.getParent();
IRBuilder<> IRB(F.getContext());
bool Changed = false;
@@ -209,102 +186,30 @@ bool WasmEHPrepare::prepareThrows(Function &F) {
InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end());
IRB.SetInsertPoint(BB);
IRB.CreateUnreachable();
- eraseDeadBBsAndChildren(Succs, &DTU);
+ eraseDeadBBsAndChildren(Succs);
}
return Changed;
}
bool WasmEHPrepare::prepareEHPads(Function &F) {
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- bool Changed = false;
+ Module &M = *F.getParent();
+ IRBuilder<> IRB(F.getContext());
- // There are two things to decide: whether we need a personality function call
- // and whether we need a `wasm.lsda()` call and its store.
- //
- // For the personality function call, catchpads with `catch (...)` and
- // cleanuppads don't need it, because exceptions are always caught. Others all
- // need it.
- //
- // For `wasm.lsda()` and its store, in order to minimize the number of them,
- // we need a way to figure out whether we have encountered `wasm.lsda()` call
- // in any of EH pads that dominates the current EH pad. To figure that out, we
- // now visit EH pads in BFS order in the dominator tree so that we visit
- // parent BBs first before visiting its child BBs in the domtree.
- //
- // We keep a set named `ExecutedLSDA`, which basically means "Do we have
- // `wasm.lsda() either in the current EH pad or any of its parent EH pads in
- // the dominator tree?". This is to prevent scanning the domtree up to the
- // root every time we examine an EH pad, in the worst case: each EH pad only
- // needs to check its immediate parent EH pad.
- //
- // - If any of its parent EH pads in the domtree has `wasm.lsda`, this means
- // we don't need `wasm.lsda()` in the current EH pad. We also insert the
- // current EH pad in `ExecutedLSDA` set.
- // - If none of its parent EH pad has `wasm.lsda()`,
- // - If the current EH pad is a `catch (...)` or a cleanuppad, done.
- // - If the current EH pad is neither a `catch (...)` nor a cleanuppad,
- // add `wasm.lsda()` and the store in the current EH pad, and add the
- // current EH pad to `ExecutedLSDA` set.
- //
- // TODO Can we not store LSDA address in user function but make libcxxabi
- // compute it?
- DenseSet<Value *> ExecutedLSDA;
- unsigned Index = 0;
- for (auto DomNode : breadth_first(&DT)) {
- auto *BB = DomNode->getBlock();
- auto *Pad = BB->getFirstNonPHI();
- if (!Pad || (!isa<CatchPadInst>(Pad) && !isa<CleanupPadInst>(Pad)))
+ SmallVector<BasicBlock *, 16> CatchPads;
+ SmallVector<BasicBlock *, 16> CleanupPads;
+ for (BasicBlock &BB : F) {
+ if (!BB.isEHPad())
continue;
- Changed = true;
-
- Value *ParentPad = nullptr;
- if (CatchPadInst *CPI = dyn_cast<CatchPadInst>(Pad)) {
- ParentPad = CPI->getCatchSwitch()->getParentPad();
- if (ExecutedLSDA.count(ParentPad)) {
- ExecutedLSDA.insert(CPI);
- // We insert its associated catchswitch too, because
- // FuncletPadInst::getParentPad() returns a CatchSwitchInst if the child
- // FuncletPadInst is a CleanupPadInst.
- ExecutedLSDA.insert(CPI->getCatchSwitch());
- }
- } else { // CleanupPadInst
- ParentPad = cast<CleanupPadInst>(Pad)->getParentPad();
- if (ExecutedLSDA.count(ParentPad))
- ExecutedLSDA.insert(Pad);
- }
-
- if (CatchPadInst *CPI = dyn_cast<CatchPadInst>(Pad)) {
- if (CPI->getNumArgOperands() == 1 &&
- cast<Constant>(CPI->getArgOperand(0))->isNullValue())
- // In case of a single catch (...), we need neither personality call nor
- // wasm.lsda() call
- prepareEHPad(BB, false);
- else {
- if (ExecutedLSDA.count(CPI))
- // catch (type), but one of parents already has wasm.lsda() call
- prepareEHPad(BB, true, false, Index++);
- else {
- // catch (type), and none of parents has wasm.lsda() call. We have to
- // add the call in this EH pad, and record this EH pad in
- // ExecutedLSDA.
- ExecutedLSDA.insert(CPI);
- ExecutedLSDA.insert(CPI->getCatchSwitch());
- prepareEHPad(BB, true, true, Index++);
- }
- }
- } else if (isa<CleanupPadInst>(Pad)) {
- // Cleanup pads need neither personality call nor wasm.lsda() call
- prepareEHPad(BB, false);
- }
+ auto *Pad = BB.getFirstNonPHI();
+ if (isa<CatchPadInst>(Pad))
+ CatchPads.push_back(&BB);
+ else if (isa<CleanupPadInst>(Pad))
+ CleanupPads.push_back(&BB);
}
+ if (CatchPads.empty() && CleanupPads.empty())
+ return false;
- return Changed;
-}
-
-void WasmEHPrepare::setupEHPadFunctions(Function &F) {
- Module &M = *F.getParent();
- IRBuilder<> IRB(F.getContext());
assert(F.hasPersonalityFn() && "Personality function not found");
// __wasm_lpad_context global variable
@@ -336,16 +241,30 @@ void WasmEHPrepare::setupEHPadFunctions(Function &F) {
"_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy());
if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee()))
F->setDoesNotThrow();
+
+ unsigned Index = 0;
+ for (auto *BB : CatchPads) {
+ auto *CPI = cast<CatchPadInst>(BB->getFirstNonPHI());
+ // In case of a single catch (...), we don't need to emit a personalify
+ // function call
+ if (CPI->getNumArgOperands() == 1 &&
+ cast<Constant>(CPI->getArgOperand(0))->isNullValue())
+ prepareEHPad(BB, false);
+ else
+ prepareEHPad(BB, true, Index++);
+ }
+
+ // Cleanup pads don't need a personality function call.
+ for (auto *BB : CleanupPads)
+ prepareEHPad(BB, false);
+
+ return true;
}
// Prepare an EH pad for Wasm EH handling. If NeedPersonality is false, Index is
// ignored.
void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
- bool NeedLSDA, unsigned Index) {
- if (!IsEHPadFunctionsSetUp) {
- IsEHPadFunctionsSetUp = true;
- setupEHPadFunctions(*BB->getParent());
- }
+ unsigned Index) {
assert(BB->isEHPad() && "BB is not an EHPad!");
IRBuilder<> IRB(BB->getContext());
IRB.SetInsertPoint(&*BB->getFirstInsertionPt());
@@ -361,8 +280,8 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
}
}
- // Cleanup pads w/o __clang_call_terminate call do not have any of
- // wasm.get.exception() or wasm.get.ehselector() calls. We need to do nothing.
+ // Cleanup pads do not have any of wasm.get.exception() or
+ // wasm.get.ehselector() calls. We need to do nothing.
if (!GetExnCI) {
assert(!GetSelectorCI &&
"wasm.get.ehselector() cannot exist w/o wasm.get.exception()");
@@ -399,9 +318,11 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
IRB.CreateStore(IRB.getInt32(Index), LPadIndexField);
auto *CPI = cast<CatchPadInst>(FPI);
- if (NeedLSDA)
- // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda();
- IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField);
+ // TODO Sometimes storing the LSDA address every time is not necessary, in
+ // case it is already set in a dominating EH pad and there is no function call
+ // between from that EH pad to here. Consider optimizing those cases.
+ // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda();
+ IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField);
// Pseudocode: _Unwind_CallPersonality(exn);
CallInst *PersCI = IRB.CreateCall(CallPersonalityF, CatchCI,
@@ -436,9 +357,9 @@ void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) {
const Instruction *UnwindPad = UnwindBB->getFirstNonPHI();
if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad))
// Currently there should be only one handler per a catchswitch.
- EHInfo.setEHPadUnwindDest(&BB, *CatchSwitch->handlers().begin());
+ EHInfo.setUnwindDest(&BB, *CatchSwitch->handlers().begin());
else // cleanuppad
- EHInfo.setEHPadUnwindDest(&BB, UnwindBB);
+ EHInfo.setUnwindDest(&BB, UnwindBB);
}
}
}
diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp
index 96d256ba57a3..4564aa1c1278 100644
--- a/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -714,16 +714,14 @@ void WinEHPrepare::demotePHIsOnFunclets(Function &F,
bool DemoteCatchSwitchPHIOnly) {
// Strip PHI nodes off of EH pads.
SmallVector<PHINode *, 16> PHINodes;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
- BasicBlock *BB = &*FI++;
- if (!BB->isEHPad())
+ for (BasicBlock &BB : make_early_inc_range(F)) {
+ if (!BB.isEHPad())
continue;
- if (DemoteCatchSwitchPHIOnly && !isa<CatchSwitchInst>(BB->getFirstNonPHI()))
+ if (DemoteCatchSwitchPHIOnly && !isa<CatchSwitchInst>(BB.getFirstNonPHI()))
continue;
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *I = &*BI++;
- auto *PN = dyn_cast<PHINode>(I);
+ for (Instruction &I : make_early_inc_range(BB)) {
+ auto *PN = dyn_cast<PHINode>(&I);
// Stop at the first non-PHI.
if (!PN)
break;
@@ -986,9 +984,9 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) {
BasicBlock::iterator CallI =
std::prev(BB->getTerminator()->getIterator());
auto *CI = cast<CallInst>(&*CallI);
- changeToUnreachable(CI, /*UseLLVMTrap=*/false);
+ changeToUnreachable(CI);
} else {
- changeToUnreachable(&I, /*UseLLVMTrap=*/false);
+ changeToUnreachable(&I);
}
// There are no more instructions in the block (except for unreachable),
@@ -1009,7 +1007,7 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) {
IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad;
if (IsUnreachableRet || IsUnreachableCatchret ||
IsUnreachableCleanupret) {
- changeToUnreachable(TI, /*UseLLVMTrap=*/false);
+ changeToUnreachable(TI);
} else if (isa<InvokeInst>(TI)) {
if (Personality == EHPersonality::MSVC_CXX && CleanupPad) {
// Invokes within a cleanuppad for the MSVC++ personality never
@@ -1025,11 +1023,10 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) {
void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
// Clean-up some of the mess we made by removing useles PHI nodes, trivial
// branches, etc.
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
- BasicBlock *BB = &*FI++;
- SimplifyInstructionsInBlock(BB);
- ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true);
- MergeBlockIntoPredecessor(BB);
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
+ SimplifyInstructionsInBlock(&BB);
+ ConstantFoldTerminator(&BB, /*DeleteDeadConditions=*/true);
+ MergeBlockIntoPredecessor(&BB);
}
// We might have some unreachable blocks after cleaning up some impossible
@@ -1109,9 +1106,7 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
// Otherwise, we have a PHI on a terminator EHPad, and we give up and insert
// loads of the slot before every use.
DenseMap<BasicBlock *, Value *> Loads;
- for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
- UI != UE;) {
- Use &U = *UI++;
+ for (Use &U : llvm::make_early_inc_range(PN->uses())) {
auto *UsingInst = cast<Instruction>(U.getUser());
if (isa<PHINode>(UsingInst) && UsingInst->getParent()->isEHPad()) {
// Use is on an EH pad phi. Leave it alone; we'll insert loads and