aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIMachineScheduler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineScheduler.cpp')
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp168
1 files changed, 53 insertions, 115 deletions
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 1cfa98430020..7125b411c603 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPU.h"
#include "SIMachineScheduler.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -295,7 +295,7 @@ static bool isDefBetween(unsigned Reg,
const MachineInstr* MI = &*UI;
if (MI->isDebugValue())
continue;
- SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
+ SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
if (InstSlot >= First && InstSlot <= Last)
return true;
}
@@ -327,9 +327,9 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
// Do not Track Physical Registers, because it messes up.
- for (unsigned Reg : RPTracker.getPressure().LiveInRegs) {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- LiveInRegs.insert(Reg);
+ for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
+ if (TargetRegisterInfo::isVirtualRegister(RegMaskPair.RegUnit))
+ LiveInRegs.insert(RegMaskPair.RegUnit);
}
LiveOutRegs.clear();
// There is several possibilities to distinguish:
@@ -354,11 +354,12 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
// The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
// Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
// The use of findDefBetween removes the case 4.
- for (unsigned Reg : RPTracker.getPressure().LiveOutRegs) {
+ for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
+ unsigned Reg = RegMaskPair.RegUnit;
if (TargetRegisterInfo::isVirtualRegister(Reg) &&
- isDefBetween(Reg, LIS->getInstructionIndex(BeginBlock).getRegSlot(),
- LIS->getInstructionIndex(EndBlock).getRegSlot(),
- MRI, LIS)) {
+ isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
+ LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
+ LIS)) {
LiveOutRegs.insert(Reg);
}
}
@@ -463,6 +464,9 @@ void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
for (SDep& Succ : SU->Succs) {
SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->NodeNum >= DAG->SUnits.size())
+ continue;
+
if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
continue;
@@ -521,12 +525,9 @@ void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
}
Preds.push_back(Pred);
-#ifndef NDEBUG
- for (SIScheduleBlock* S : Succs) {
- if (PredID == S->getID())
- assert(!"Loop in the Block Graph!\n");
- }
-#endif
+ assert(none_of(Succs,
+ [=](SIScheduleBlock *S) { return PredID == S->getID(); }) &&
+ "Loop in the Block Graph!");
}
void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) {
@@ -540,12 +541,9 @@ void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) {
if (Succ->isHighLatencyBlock())
++NumHighLatencySuccessors;
Succs.push_back(Succ);
-#ifndef NDEBUG
- for (SIScheduleBlock* P : Preds) {
- if (SuccID == P->getID())
- assert("Loop in the Block Graph!\n");
- }
-#endif
+ assert(none_of(Preds,
+ [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&
+ "Loop in the Block Graph!");
}
#ifndef NDEBUG
@@ -712,8 +710,8 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
// Traverse TopDown, and give different colors to SUs depending
// on which combination of High Latencies they depend on.
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[DAG->TopDownIndex2SU[i]];
+ for (unsigned SUNum : DAG->TopDownIndex2SU) {
+ SUnit *SU = &DAG->SUnits[SUNum];
std::set<unsigned> SUColors;
// Already given.
@@ -754,8 +752,8 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
// Same as before, but BottomUp.
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ for (unsigned SUNum : DAG->BottomUpIndex2SU) {
+ SUnit *SU = &DAG->SUnits[SUNum];
std::set<unsigned> SUColors;
// Already given.
@@ -826,8 +824,8 @@ void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
unsigned DAGSize = DAG->SUnits.size();
std::vector<int> PendingColoring = CurrentColoring;
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ for (unsigned SUNum : DAG->BottomUpIndex2SU) {
+ SUnit *SU = &DAG->SUnits[SUNum];
std::set<unsigned> SUColors;
std::set<unsigned> SUColorsPending;
@@ -893,8 +891,8 @@ void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
unsigned DAGSize = DAG->SUnits.size();
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ for (unsigned SUNum : DAG->BottomUpIndex2SU) {
+ SUnit *SU = &DAG->SUnits[SUNum];
std::set<unsigned> SUColors;
if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
@@ -919,8 +917,8 @@ void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
unsigned DAGSize = DAG->SUnits.size();
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ for (unsigned SUNum : DAG->BottomUpIndex2SU) {
+ SUnit *SU = &DAG->SUnits[SUNum];
std::set<unsigned> SUColors;
if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
@@ -940,8 +938,8 @@ void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
unsigned DAGSize = DAG->SUnits.size();
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ for (unsigned SUNum : DAG->BottomUpIndex2SU) {
+ SUnit *SU = &DAG->SUnits[SUNum];
std::set<unsigned> SUColors;
if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
@@ -962,8 +960,8 @@ void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
unsigned DAGSize = DAG->SUnits.size();
std::map<unsigned, unsigned> ColorCount;
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ for (unsigned SUNum : DAG->BottomUpIndex2SU) {
+ SUnit *SU = &DAG->SUnits[SUNum];
unsigned color = CurrentColoring[SU->NodeNum];
std::map<unsigned, unsigned>::iterator Pos = ColorCount.find(color);
if (Pos != ColorCount.end()) {
@@ -973,8 +971,8 @@ void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
}
}
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ for (unsigned SUNum : DAG->BottomUpIndex2SU) {
+ SUnit *SU = &DAG->SUnits[SUNum];
unsigned color = CurrentColoring[SU->NodeNum];
std::set<unsigned> SUColors;
@@ -1006,8 +1004,8 @@ void SIScheduleBlockCreator::regroupNoUserInstructions() {
unsigned DAGSize = DAG->SUnits.size();
int GroupID = NextNonReservedID++;
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ for (unsigned SUNum : DAG->BottomUpIndex2SU) {
+ SUnit *SU = &DAG->SUnits[SUNum];
bool hasSuccessor = false;
if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
@@ -1223,7 +1221,7 @@ void SIScheduleBlockCreator::scheduleInsideBlocks() {
// is the most cpu intensive operation of the scheduler.
// It would gain a lot if there was a way to recompute the
// LiveIntervals for the entire scheduling region.
- DAG->getLIS()->handleMove(MI, /*UpdateFlags=*/true);
+ DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true);
PosNew.push_back(CurrentTopFastSched);
}
}
@@ -1249,7 +1247,7 @@ void SIScheduleBlockCreator::scheduleInsideBlocks() {
DAG->getBB()->splice(POld, DAG->getBB(), PNew);
// Update LiveIntervals.
- DAG->getLIS()->handleMove(POld, /*UpdateFlags=*/true);
+ DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true);
}
}
@@ -1675,70 +1673,10 @@ ScheduleDAGInstrs *llvm::createSIMachineScheduler(MachineSchedContext *C) {
// Does a topological sort over the SUs.
// Both TopDown and BottomUp
void SIScheduleDAGMI::topologicalSort() {
- std::vector<int> TopDownSU2Index;
- unsigned DAGSize = SUnits.size();
- std::vector<SUnit*> WorkList;
-
- DEBUG(dbgs() << "Topological Sort\n");
- WorkList.reserve(DAGSize);
-
- TopDownIndex2SU.resize(DAGSize);
- TopDownSU2Index.resize(DAGSize);
- BottomUpIndex2SU.resize(DAGSize);
-
- WorkList.push_back(&getExitSU());
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- int NodeNum = SU->NodeNum;
- unsigned Degree = SU->Succs.size();
- TopDownSU2Index[NodeNum] = Degree;
- if (Degree == 0) {
- assert(SU->Succs.empty() && "SUnit should have no successors");
- WorkList.push_back(SU);
- }
- }
-
- int Id = DAGSize;
- while (!WorkList.empty()) {
- SUnit *SU = WorkList.back();
- WorkList.pop_back();
- if (SU->NodeNum < DAGSize) {
- TopDownSU2Index[SU->NodeNum] = --Id;
- TopDownIndex2SU[Id] = SU->NodeNum;
- }
- for (SDep& Pred : SU->Preds) {
- SUnit *SU = Pred.getSUnit();
- if (SU->NodeNum < DAGSize && !--TopDownSU2Index[SU->NodeNum])
- WorkList.push_back(SU);
- }
- }
-
- BottomUpIndex2SU = std::vector<int>(TopDownIndex2SU.rbegin(),
- TopDownIndex2SU.rend());
+ Topo.InitDAGTopologicalSorting();
-#ifndef NDEBUG
- // Check correctness of the ordering
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- for (SDep& Pred : SU->Preds) {
- if (Pred.getSUnit()->NodeNum >= DAGSize)
- continue;
- assert(TopDownSU2Index[SU->NodeNum] >
- TopDownSU2Index[Pred.getSUnit()->NodeNum] &&
- "Wrong Top Down topological sorting");
- }
- }
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- for (SDep& Succ : SU->Succs) {
- if (Succ.getSUnit()->NodeNum >= DAGSize)
- continue;
- assert(TopDownSU2Index[SU->NodeNum] <
- TopDownSU2Index[Succ.getSUnit()->NodeNum] &&
- "Wrong Bottom Up topological sorting");
- }
- }
-#endif
+ TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
+ BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
}
// Move low latencies further from their user without
@@ -1759,7 +1697,7 @@ void SIScheduleDAGMI::moveLowLatencies() {
for (SDep& PredDep : SU->Preds) {
SUnit *Pred = PredDep.getSUnit();
- if (SITII->isLowLatencyInstruction(Pred->getInstr())) {
+ if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
IsLowLatencyUser = true;
}
if (Pred->NodeNum >= DAGSize)
@@ -1769,7 +1707,7 @@ void SIScheduleDAGMI::moveLowLatencies() {
MinPos = PredPos + 1;
}
- if (SITII->isLowLatencyInstruction(SU->getInstr())) {
+ if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
unsigned BestPos = LastLowLatencyUser + 1;
if ((int)BestPos <= LastLowLatencyPos)
BestPos = LastLowLatencyPos + 1;
@@ -1794,7 +1732,7 @@ void SIScheduleDAGMI::moveLowLatencies() {
bool CopyForLowLat = false;
for (SDep& SuccDep : SU->Succs) {
SUnit *Succ = SuccDep.getSUnit();
- if (SITII->isLowLatencyInstruction(Succ->getInstr())) {
+ if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
CopyForLowLat = true;
}
}
@@ -1855,7 +1793,6 @@ void SIScheduleDAGMI::schedule()
SU.dumpAll(this)
);
- Topo.InitDAGTopologicalSorting();
topologicalSort();
findRootsAndBiasEdges(TopRoots, BotRoots);
// We reuse several ScheduleDAGMI and ScheduleDAGMILive
@@ -1878,20 +1815,21 @@ void SIScheduleDAGMI::schedule()
for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
SUnit *SU = &SUnits[i];
- unsigned BaseLatReg, OffLatReg;
- if (SITII->isLowLatencyInstruction(SU->getInstr())) {
+ unsigned BaseLatReg;
+ int64_t OffLatReg;
+ if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
IsLowLatencySU[i] = 1;
- if (SITII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseLatReg,
- OffLatReg, TRI))
+ if (SITII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseLatReg, OffLatReg,
+ TRI))
LowLatencyOffset[i] = OffLatReg;
- } else if (SITII->isHighLatencyInstruction(SU->getInstr()))
+ } else if (SITII->isHighLatencyInstruction(*SU->getInstr()))
IsHighLatencySU[i] = 1;
}
SIScheduler Scheduler(this);
Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
-#if 0 // To enable when handleMove fix lands
+
// if VGPR usage is extremely high, try other good performing variants
// which could lead to lower VGPR usage
if (Best.MaxVGPRUsage > 180) {
@@ -1930,7 +1868,7 @@ void SIScheduleDAGMI::schedule()
Best = Temp;
}
}
-#endif
+
ScheduledSUnits = Best.SUs;
ScheduledSUnitsInv.resize(SUnits.size());