diff options
Diffstat (limited to 'lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | lib/CodeGen/MachineScheduler.cpp | 144 |
1 files changed, 86 insertions, 58 deletions
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 90dad9d399fe..ae1170ad1be6 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1,9 +1,8 @@ //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -487,13 +486,17 @@ getSchedRegions(MachineBasicBlock *MBB, MachineInstr &MI = *std::prev(I); if (isSchedBoundary(&MI, &*MBB, MF, TII)) break; - if (!MI.isDebugInstr()) + if (!MI.isDebugInstr()) { // MBB::size() uses instr_iterator to count. Here we need a bundle to // count as a single instruction. ++NumRegionInstrs; + } } - Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); + // It's possible we found a scheduling region that only has debug + // instructions. Don't bother scheduling these. + if (NumRegionInstrs != 0) + Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); } if (RegionsTopDown) @@ -605,23 +608,6 @@ LLVM_DUMP_METHOD void ReadyQueue::dump() const { // Provide a vtable anchor. ScheduleDAGMI::~ScheduleDAGMI() = default; -bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { - return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); -} - -bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { - if (SuccSU != &ExitSU) { - // Do not use WillCreateCycle, it assumes SD scheduling. - // If Pred is reachable from Succ, then the edge creates a cycle. - if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) - return false; - Topo.AddPred(SuccSU, PredDep.getSUnit()); - } - SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); - // Return true regardless of whether a new edge needed to be inserted. - return true; -} - /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When /// NumPredsLeft reaches zero, release the successor node. /// @@ -762,8 +748,6 @@ void ScheduleDAGMI::schedule() { // Build the DAG. buildSchedGraph(AA); - Topo.InitDAGTopologicalSorting(); - postprocessDAG(); SmallVector<SUnit*, 8> TopRoots, BotRoots; @@ -1212,8 +1196,6 @@ void ScheduleDAGMILive::schedule() { LLVM_DEBUG(SchedImpl->dumpPolicy()); buildDAGWithRegPressure(); - Topo.InitDAGTopologicalSorting(); - postprocessDAG(); SmallVector<SUnit*, 8> TopRoots, BotRoots; @@ -1484,10 +1466,10 @@ namespace { class BaseMemOpClusterMutation : public ScheduleDAGMutation { struct MemOpInfo { SUnit *SU; - MachineOperand *BaseOp; + const MachineOperand *BaseOp; int64_t Offset; - MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs) + MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs) : SU(su), BaseOp(Op), Offset(ofs) {} bool operator<(const MemOpInfo &RHS) const { @@ -1533,7 +1515,7 @@ public: void apply(ScheduleDAGInstrs *DAGInstrs) override; protected: - void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG); + void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG); }; class StoreClusterMutation : public BaseMemOpClusterMutation { @@ -1570,10 +1552,10 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII, } // end namespace llvm void BaseMemOpClusterMutation::clusterNeighboringMemOps( - ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { + ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) { SmallVector<MemOpInfo, 32> MemOpRecords; for (SUnit *SU : MemOps) { - MachineOperand *BaseOp; + const MachineOperand *BaseOp; int64_t Offset; if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI)) MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); @@ -1610,9 +1592,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( } /// Callback from DAG postProcessing to create cluster edges for loads. -void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { - ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); - +void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { // Map DAG NodeNum to store chain ID. DenseMap<unsigned, unsigned> StoreChainIDs; // Map each store chain to a set of dependent MemOps. @@ -1857,9 +1837,15 @@ SchedBoundary::~SchedBoundary() { delete HazardRec; } /// Given a Count of resource usage and a Latency value, return true if a /// SchedBoundary becomes resource limited. +/// If we are checking after scheduling a node, we should return true when +/// we just reach the resource limit. static bool checkResourceLimit(unsigned LFactor, unsigned Count, - unsigned Latency) { - return (int)(Count - (Latency * LFactor)) > (int)LFactor; + unsigned Latency, bool AfterSchedNode) { + int ResCntFactor = (int)(Count - (Latency * LFactor)); + if (AfterSchedNode) + return ResCntFactor >= (int)LFactor; + else + return ResCntFactor > (int)LFactor; } void SchedBoundary::reset() { @@ -1883,6 +1869,7 @@ void SchedBoundary::reset() { ZoneCritResIdx = 0; IsResourceLimited = false; ReservedCycles.clear(); + ReservedCyclesIndex.clear(); #ifndef NDEBUG // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is @@ -1921,8 +1908,17 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { SchedModel = smodel; Rem = rem; if (SchedModel->hasInstrSchedModel()) { - ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); - ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); + unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); + ReservedCyclesIndex.resize(ResourceCount); + ExecutedResCounts.resize(ResourceCount); + unsigned NumUnits = 0; + + for (unsigned i = 0; i < ResourceCount; ++i) { + ReservedCyclesIndex[i] = NumUnits; + NumUnits += SchedModel->getProcResource(i)->NumUnits; + } + + ReservedCycles.resize(NumUnits, InvalidCycle); } } @@ -1943,11 +1939,11 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { return 0; } -/// Compute the next cycle at which the given processor resource can be -/// scheduled. -unsigned SchedBoundary:: -getNextResourceCycle(unsigned PIdx, unsigned Cycles) { - unsigned NextUnreserved = ReservedCycles[PIdx]; +/// Compute the next cycle at which the given processor resource unit +/// can be scheduled. +unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx, + unsigned Cycles) { + unsigned NextUnreserved = ReservedCycles[InstanceIdx]; // If this resource has never been used, always return cycle zero. if (NextUnreserved == InvalidCycle) return 0; @@ -1957,6 +1953,29 @@ getNextResourceCycle(unsigned PIdx, unsigned Cycles) { return NextUnreserved; } +/// Compute the next cycle at which the given processor resource can be +/// scheduled. Returns the next cycle and the index of the processor resource +/// instance in the reserved cycles vector. +std::pair<unsigned, unsigned> +SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) { + unsigned MinNextUnreserved = InvalidCycle; + unsigned InstanceIdx = 0; + unsigned StartIndex = ReservedCyclesIndex[PIdx]; + unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; + assert(NumberOfInstances > 0 && + "Cannot have zero instances of a ProcResource"); + + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; + ++I) { + unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); + if (MinNextUnreserved > NextUnreserved) { + InstanceIdx = I; + MinNextUnreserved = NextUnreserved; + } + } + return std::make_pair(MinNextUnreserved, InstanceIdx); +} + /// Does this SU have a hazard within the current instruction group. /// /// The scheduler supports two modes of hazard recognition. The first is the @@ -1998,14 +2017,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) { SchedModel->getWriteProcResEnd(SC))) { unsigned ResIdx = PE.ProcResourceIdx; unsigned Cycles = PE.Cycles; - unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles); + unsigned NRCycle, InstanceIdx; + std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles); if (NRCycle > CurrCycle) { #ifndef NDEBUG MaxObservedStall = std::max(Cycles, MaxObservedStall); #endif LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " - << SchedModel->getResourceName(ResIdx) << "=" - << NRCycle << "c\n"); + << SchedModel->getResourceName(ResIdx) + << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' + << "=" << NRCycle << "c\n"); return true; } } @@ -2119,7 +2140,7 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) { CheckPending = true; IsResourceLimited = checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), - getScheduledLatency()); + getScheduledLatency(), true); LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); @@ -2160,10 +2181,12 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { << "c\n"); } // For reserved resources, record the highest cycle using the resource. - unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); + unsigned NextAvailable, InstanceIdx; + std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles); if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " - << SchedModel->getProcResource(PIdx)->Name + << SchedModel->getResourceName(PIdx) + << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']' << " reserved until @" << NextAvailable << "\n"); } return NextAvailable; @@ -2179,6 +2202,8 @@ void SchedBoundary::bumpNode(SUnit *SU) { HazardRec->Reset(); } HazardRec->EmitInstruction(SU); + // Scheduling an instruction may have made pending instructions available. + CheckPending = true; } // checkHazard should prevent scheduling multiple instructions per cycle that // exceed the issue width. @@ -2251,12 +2276,13 @@ void SchedBoundary::bumpNode(SUnit *SU) { PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + unsigned ReservedUntil, InstanceIdx; + std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0); if (isTop()) { - ReservedCycles[PIdx] = - std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); - } - else - ReservedCycles[PIdx] = NextCycle; + ReservedCycles[InstanceIdx] = + std::max(ReservedUntil, NextCycle + PI->Cycles); + } else + ReservedCycles[InstanceIdx] = NextCycle; } } } @@ -2282,7 +2308,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { // resource limited. If a stall occurred, bumpCycle does this. IsResourceLimited = checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), - getScheduledLatency()); + getScheduledLatency(), true); // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle // resets CurrMOps. Loop to handle instructions with more MOps than issue in @@ -2501,7 +2527,7 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, RemLatency = computeRemLatency(CurrZone); RemLatencyComputed = true; OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), - OtherCount, RemLatency); + OtherCount, RemLatency, false); } // Schedule aggressively for latency in PostRA mode. We don't check for @@ -2741,8 +2767,10 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs); // After subtarget overrides, apply command line options. - if (!EnableRegPressure) + if (!EnableRegPressure) { RegionPolicy.ShouldTrackPressure = false; + RegionPolicy.ShouldTrackLaneMasks = false; + } // Check -misched-topdown/bottomup can force or unforce scheduling direction. // e.g. -misched-bottomup=false allows scheduling in both directions. |