diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
commit | 344a3780b2e33f6ca763666c380202b18aab72a3 (patch) | |
tree | f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/CodeGen/RegAllocGreedy.cpp | |
parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) | |
download | src-344a3780b2e33f6ca763666c380202b18aab72a3.tar.gz src-344a3780b2e33f6ca763666c380202b18aab72a3.zip |
Vendor import of llvm-project main 88e66fa60ae5, the last commit beforevendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0evendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f
the upstream release/13.x branch was created.
Diffstat (limited to 'llvm/lib/CodeGen/RegAllocGreedy.cpp')
-rw-r--r-- | llvm/lib/CodeGen/RegAllocGreedy.cpp | 438 |
1 files changed, 283 insertions, 155 deletions
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 166414e4ffa1..4eb12aa30ee9 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -69,6 +69,7 @@ #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/IR/DebugInfoMetadata.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -406,8 +407,12 @@ class RAGreedy : public MachineFunctionPass, /// Set of broken hints that may be reconciled later because of eviction. SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; + /// The register cost values. This list will be recreated for each Machine + /// Function + ArrayRef<uint8_t> RegCosts; + public: - RAGreedy(); + RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); /// Return the pass name. StringRef getPassName() const override { return "Greedy Register Allocator"; } @@ -416,7 +421,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; void releaseMemory() override; Spiller &spiller() override { return *SpillerInstance; } - void enqueue(LiveInterval *LI) override; + void enqueueImpl(LiveInterval *LI) override; LiveInterval *dequeue() override; MCRegister selectOrSplit(LiveInterval &, SmallVectorImpl<Register> &) override; @@ -463,28 +468,29 @@ private: bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); void calcGapWeights(MCRegister, SmallVectorImpl<float> &); - Register canReassign(LiveInterval &VirtReg, Register PrevReg); - bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); + Register canReassign(LiveInterval &VirtReg, Register PrevReg) const; + bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const; bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &, - const SmallVirtRegSet &); - bool canEvictInterferenceInRange(LiveInterval &VirtReg, MCRegister PhysReg, - SlotIndex Start, SlotIndex End, - EvictionCost &MaxCost); + const SmallVirtRegSet &) const; + bool canEvictInterferenceInRange(const LiveInterval &VirtReg, + MCRegister PhysReg, SlotIndex Start, + SlotIndex End, EvictionCost &MaxCost) const; MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order, - LiveInterval &VirtReg, SlotIndex Start, - SlotIndex End, float *BestEvictWeight); + const LiveInterval &VirtReg, + SlotIndex Start, SlotIndex End, + float *BestEvictWeight) const; void evictInterference(LiveInterval &, MCRegister, SmallVectorImpl<Register> &); bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters); - Register tryAssign(LiveInterval&, AllocationOrder&, + MCRegister tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl<Register>&, const SmallVirtRegSet&); - unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl<Register>&, unsigned, - const SmallVirtRegSet&); + MCRegister tryEvict(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &, uint8_t, + const SmallVirtRegSet &); MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &); /// Calculate cost of region splitting. @@ -501,7 +507,7 @@ private: /// time. MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg, - unsigned &CostPerUseLimit, + uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs); void initializeCSRCost(); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, @@ -541,19 +547,50 @@ private: bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; - /// Compute and report the number of spills and reloads for a loop. - void reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, - unsigned &FoldedReloads, unsigned &Spills, - unsigned &FoldedSpills); - - /// Report the number of spills and reloads for each loop. - void reportNumberOfSplillsReloads() { - for (MachineLoop *L : *Loops) { - unsigned Reloads, FoldedReloads, Spills, FoldedSpills; - reportNumberOfSplillsReloads(L, Reloads, FoldedReloads, Spills, - FoldedSpills); + /// Greedy RA statistic to remark. + struct RAGreedyStats { + unsigned Reloads = 0; + unsigned FoldedReloads = 0; + unsigned ZeroCostFoldedReloads = 0; + unsigned Spills = 0; + unsigned FoldedSpills = 0; + unsigned Copies = 0; + float ReloadsCost = 0.0f; + float FoldedReloadsCost = 0.0f; + float SpillsCost = 0.0f; + float FoldedSpillsCost = 0.0f; + float CopiesCost = 0.0f; + + bool isEmpty() { + return !(Reloads || FoldedReloads || Spills || FoldedSpills || + ZeroCostFoldedReloads || Copies); } - } + + void add(RAGreedyStats other) { + Reloads += other.Reloads; + FoldedReloads += other.FoldedReloads; + ZeroCostFoldedReloads += other.ZeroCostFoldedReloads; + Spills += other.Spills; + FoldedSpills += other.FoldedSpills; + Copies += other.Copies; + ReloadsCost += other.ReloadsCost; + FoldedReloadsCost += other.FoldedReloadsCost; + SpillsCost += other.SpillsCost; + FoldedSpillsCost += other.FoldedSpillsCost; + CopiesCost += other.CopiesCost; + } + + void report(MachineOptimizationRemarkMissed &R); + }; + + /// Compute statistic for a basic block. + RAGreedyStats computeStats(MachineBasicBlock &MBB); + + /// Compute and report statistic through a remark. + RAGreedyStats reportStats(MachineLoop *L); + + /// Report the statistic for each loop. + void reportStats(); }; } // end anonymous namespace @@ -599,7 +636,22 @@ FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -RAGreedy::RAGreedy(): MachineFunctionPass(ID) { +namespace llvm { +FunctionPass* createGreedyRegisterAllocator( + std::function<bool(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC)> Ftor); + +} + +FunctionPass* llvm::createGreedyRegisterAllocator( + std::function<bool(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC)> Ftor) { + return new RAGreedy(Ftor); +} + +RAGreedy::RAGreedy(RegClassFilterFunc F): + MachineFunctionPass(ID), + RegAllocBase(F) { } void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { @@ -656,7 +708,7 @@ void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) { // Register is assigned, put it back on the queue for reassignment. LiveInterval &LI = LIS->getInterval(VirtReg); Matrix->unassign(LI); - enqueue(&LI); + RegAllocBase::enqueue(&LI); } void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) { @@ -679,7 +731,7 @@ void RAGreedy::releaseMemory() { GlobalCand.clear(); } -void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } +void RAGreedy::enqueueImpl(LiveInterval *LI) { enqueue(Queue, LI); } void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. @@ -708,6 +760,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); + bool AddPriorityToGlobal = TRI->addAllocPriorityToGlobalRanges(); const TargetRegisterClass &RC = *MRI->getRegClass(Reg); bool ForceGlobal = !ReverseLocal && (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); @@ -731,6 +784,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // don't fit should be spilled (or split) ASAP so they don't create // interference. Mark a bit to prioritize global above local ranges. Prio = (1u << 29) + Size; + + if (AddPriorityToGlobal) + Prio |= RC.AllocationPriority << 24; } // Mark a higher bit to prioritize global and local above RS_Split. Prio |= (1u << 31); @@ -759,11 +815,11 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { //===----------------------------------------------------------------------===// /// tryAssign - Try to assign VirtReg to an available register. -Register RAGreedy::tryAssign(LiveInterval &VirtReg, +MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs, const SmallVirtRegSet &FixedRegisters) { - Register PhysReg; + MCRegister PhysReg; for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { assert(*I); if (!Matrix->checkInterference(VirtReg, *I)) { @@ -797,7 +853,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, } // Try to evict interference from a cheaper alternative. - unsigned Cost = TRI->getCostPerUse(PhysReg); + uint8_t Cost = RegCosts[PhysReg]; // Most registers have 0 additional cost. if (!Cost) @@ -805,7 +861,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " << Cost << '\n'); - Register CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters); + MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters); return CheapReg ? CheapReg : PhysReg; } @@ -813,7 +869,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// -Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { +Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) const { auto Order = AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); MCRegister PhysReg; @@ -853,7 +909,7 @@ Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { /// @param B The live range to be evicted. /// @param BreaksHint True when B is already assigned to its preferred register. bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, - LiveInterval &B, bool BreaksHint) { + LiveInterval &B, bool BreaksHint) const { bool CanSplit = getStage(B) < RS_Spill; // Be fairly aggressive about following hints as long as the evictee can be @@ -877,9 +933,9 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg, - bool IsHint, EvictionCost &MaxCost, - const SmallVirtRegSet &FixedRegisters) { +bool RAGreedy::canEvictInterference( + LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, + EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) return false; @@ -975,14 +1031,15 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg, /// \param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// \return True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, +bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg, MCRegister PhysReg, SlotIndex Start, SlotIndex End, - EvictionCost &MaxCost) { + EvictionCost &MaxCost) const { EvictionCost Cost; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + Q.collectInterferingVRegs(); // Check if any interfering live range is heavier than MaxWeight. for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { @@ -1027,9 +1084,9 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, /// \return The PhysReg which is the best candidate for eviction and the /// eviction cost in BestEvictweight MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, - LiveInterval &VirtReg, + const LiveInterval &VirtReg, SlotIndex Start, SlotIndex End, - float *BestEvictweight) { + float *BestEvictweight) const { EvictionCost BestEvictCost; BestEvictCost.setMax(); BestEvictCost.MaxWeight = VirtReg.weight(); @@ -1109,10 +1166,9 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const { /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. /// @return Physreg to assign VirtReg, or 0. -unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, - AllocationOrder &Order, +MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs, - unsigned CostPerUseLimit, + uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) { NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); @@ -1125,13 +1181,13 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // When we are just looking for a reduced cost per use, don't break any // hints, and only evict smaller spill weights. - if (CostPerUseLimit < ~0u) { + if (CostPerUseLimit < uint8_t(~0u)) { BestCost.BrokenHints = 0; BestCost.MaxWeight = VirtReg.weight(); // Check of any registers in RC are below CostPerUseLimit. const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg()); - unsigned MinCost = RegClassInfo.getMinCost(RC); + uint8_t MinCost = RegClassInfo.getMinCost(RC); if (MinCost >= CostPerUseLimit) { LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost << ", no cheaper registers to be found.\n"); @@ -1140,7 +1196,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // It is normal for register classes to have a long tail of registers with // the same cost. We don't need to look at them if they're too expensive. - if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { + if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) { OrderLimit = RegClassInfo.getLastCostChange(RC); LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); @@ -1151,7 +1207,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, ++I) { MCRegister PhysReg = *I; assert(PhysReg); - if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) + if (RegCosts[PhysReg] >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. // Don't start using a CSR when the CostPerUseLimit is low. @@ -1175,10 +1231,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, break; } - if (!BestPhys) - return 0; - - evictInterference(VirtReg, BestPhys, NewVRegs); + if (BestPhys.isValid()) + evictInterference(VirtReg, BestPhys, NewVRegs); return BestPhys; } @@ -1289,8 +1343,9 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, // Abort if the spill cannot be inserted at the MBB' start MachineBasicBlock *MBB = MF->getBlockNumbered(Number); - if (!MBB->empty() && - SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()), + auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr(); + if (FirstNonDebugInstr != MBB->end() && + SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr), SA->getFirstSplitPoint(Number))) return false; // Interference for the live-in value. @@ -1331,9 +1386,7 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) { for (unsigned Bundle : NewBundles) { // Look at all blocks connected to Bundle in the full graph. ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); - for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); - I != E; ++I) { - unsigned Block = *I; + for (unsigned Block : Blocks) { if (!Todo.test(Block)) continue; Todo.reset(Block); @@ -1557,25 +1610,9 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit, return false; } - // Check if the local interval will evict a cheaper interval. - float CheapestEvictWeight = 0; - MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight( - Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(), - Cand.Intf.last(), &CheapestEvictWeight); - - // Have we found an interval that can be evicted? - if (FutureEvictedPhysReg) { - float splitArtifactWeight = - VRAI->futureWeight(LIS->getInterval(VirtRegToSplit), - Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); - // Will the weight of the local interval be higher than the cheapest evictee - // weight? If so it will evict it and will not cause a spill. - if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight) - return false; - } - - // The local interval is not able to find non interferencing assignment and - // not able to evict a less worthy interval, therfore, it can cause a spill. + // The local interval is not able to find non interferencing assignment + // and not able to evict a less worthy interval, therfore, it can cause a + // spill. return true; } @@ -2650,18 +2687,16 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // with VirtReg on PhysReg (or one of its aliases). // Enqueue them for recoloring and perform the actual recoloring. PQueue RecoloringQueue; - for (SmallLISet::iterator It = RecoloringCandidates.begin(), - EndIt = RecoloringCandidates.end(); - It != EndIt; ++It) { - Register ItVirtReg = (*It)->reg(); - enqueue(RecoloringQueue, *It); + for (LiveInterval *RC : RecoloringCandidates) { + Register ItVirtReg = RC->reg(); + enqueue(RecoloringQueue, RC); assert(VRM->hasPhys(ItVirtReg) && "Interferences are supposed to be with allocated variables"); // Record the current allocation. VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); // unset the related struct. - Matrix->unassign(**It); + Matrix->unassign(*RC); } // Do as if VirtReg was assigned to PhysReg so that the underlying @@ -2695,22 +2730,18 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // don't add it to NewVRegs because its physical register will be restored // below. Other vregs in CurrentNewVRegs are created by calling // selectOrSplit and should be added into NewVRegs. - for (SmallVectorImpl<Register>::iterator Next = CurrentNewVRegs.begin(), - End = CurrentNewVRegs.end(); - Next != End; ++Next) { - if (RecoloringCandidates.count(&LIS->getInterval(*Next))) + for (Register &R : CurrentNewVRegs) { + if (RecoloringCandidates.count(&LIS->getInterval(R))) continue; - NewVRegs.push_back(*Next); + NewVRegs.push_back(R); } - for (SmallLISet::iterator It = RecoloringCandidates.begin(), - EndIt = RecoloringCandidates.end(); - It != EndIt; ++It) { - Register ItVirtReg = (*It)->reg(); + for (LiveInterval *RC : RecoloringCandidates) { + Register ItVirtReg = RC->reg(); if (VRM->hasPhys(ItVirtReg)) - Matrix->unassign(**It); + Matrix->unassign(*RC); MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg]; - Matrix->assign(**It, ItPhysReg); + Matrix->assign(*RC, ItPhysReg); } } @@ -2793,7 +2824,7 @@ MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg, /// to use the CSR; otherwise return 0. MCRegister RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, - MCRegister PhysReg, unsigned &CostPerUseLimit, + MCRegister PhysReg, uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) { if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { // We choose spill over using the CSR for the first time if the spill cost @@ -2924,7 +2955,12 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { if (Register::isPhysicalRegister(Reg)) continue; - assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); + // This may be a skipped class + if (!VRM->hasPhys(Reg)) { + assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) && + "We have an unallocated variable which should have been handled"); + continue; + } // Get the live interval mapped with this virtual register to be able // to check for the interference with the new color. @@ -3024,13 +3060,13 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters, unsigned Depth) { - unsigned CostPerUseLimit = ~0u; + uint8_t CostPerUseLimit = uint8_t(~0u); // First try assigning a free register. auto Order = AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); if (MCRegister PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { - // If VirtReg got an assignment, the eviction info is no longre relevant. + // If VirtReg got an assignment, the eviction info is no longer relevant. LastEvicted.clearEvicteeInfo(VirtReg.reg()); // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical @@ -3067,7 +3103,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, if (Hint && Hint != PhysReg) SetOfBrokenHints.insert(&VirtReg); // If VirtReg eviction someone, the eviction info for it as an evictee is - // no longre relevant. + // no longer relevant. LastEvicted.clearEvicteeInfo(VirtReg.reg()); return PhysReg; } @@ -3133,75 +3169,162 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, return 0; } -void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, - unsigned &FoldedReloads, - unsigned &Spills, - unsigned &FoldedSpills) { - Reloads = 0; - FoldedReloads = 0; - Spills = 0; - FoldedSpills = 0; - - // Sum up the spill and reloads in subloops. - for (MachineLoop *SubLoop : *L) { - unsigned SubReloads; - unsigned SubFoldedReloads; - unsigned SubSpills; - unsigned SubFoldedSpills; - - reportNumberOfSplillsReloads(SubLoop, SubReloads, SubFoldedReloads, - SubSpills, SubFoldedSpills); - Reloads += SubReloads; - FoldedReloads += SubFoldedReloads; - Spills += SubSpills; - FoldedSpills += SubFoldedSpills; +void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) { + using namespace ore; + if (Spills) { + R << NV("NumSpills", Spills) << " spills "; + R << NV("TotalSpillsCost", SpillsCost) << " total spills cost "; + } + if (FoldedSpills) { + R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; + R << NV("TotalFoldedSpillsCost", FoldedSpillsCost) + << " total folded spills cost "; + } + if (Reloads) { + R << NV("NumReloads", Reloads) << " reloads "; + R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost "; + } + if (FoldedReloads) { + R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; + R << NV("TotalFoldedReloadsCost", FoldedReloadsCost) + << " total folded reloads cost "; + } + if (ZeroCostFoldedReloads) + R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads) + << " zero cost folded reloads "; + if (Copies) { + R << NV("NumVRCopies", Copies) << " virtual registers copies "; + R << NV("TotalCopiesCost", CopiesCost) << " total copies cost "; } +} +RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) { + RAGreedyStats Stats; const MachineFrameInfo &MFI = MF->getFrameInfo(); int FI; + auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) { + return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>( + A->getPseudoValue())->getFrameIndex()); + }; + auto isPatchpointInstr = [](const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::PATCHPOINT || + MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::STATEPOINT; + }; + for (MachineInstr &MI : MBB) { + if (MI.isCopy()) { + MachineOperand &Dest = MI.getOperand(0); + MachineOperand &Src = MI.getOperand(1); + if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() && + Src.getReg().isVirtual()) + ++Stats.Copies; + continue; + } + + SmallVector<const MachineMemOperand *, 2> Accesses; + if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) { + ++Stats.Reloads; + continue; + } + if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) { + ++Stats.Spills; + continue; + } + if (TII->hasLoadFromStackSlot(MI, Accesses) && + llvm::any_of(Accesses, isSpillSlotAccess)) { + if (!isPatchpointInstr(MI)) { + Stats.FoldedReloads += Accesses.size(); + continue; + } + // For statepoint there may be folded and zero cost folded stack reloads. + std::pair<unsigned, unsigned> NonZeroCostRange = + TII->getPatchpointUnfoldableRange(MI); + SmallSet<unsigned, 16> FoldedReloads; + SmallSet<unsigned, 16> ZeroCostFoldedReloads; + for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) { + MachineOperand &MO = MI.getOperand(Idx); + if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex())) + continue; + if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second) + FoldedReloads.insert(MO.getIndex()); + else + ZeroCostFoldedReloads.insert(MO.getIndex()); + } + // If stack slot is used in folded reload it is not zero cost then. + for (unsigned Slot : FoldedReloads) + ZeroCostFoldedReloads.erase(Slot); + Stats.FoldedReloads += FoldedReloads.size(); + Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size(); + continue; + } + Accesses.clear(); + if (TII->hasStoreToStackSlot(MI, Accesses) && + llvm::any_of(Accesses, isSpillSlotAccess)) { + Stats.FoldedSpills += Accesses.size(); + } + } + // Set cost of collected statistic by multiplication to relative frequency of + // this basic block. + float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB); + Stats.ReloadsCost = RelFreq * Stats.Reloads; + Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads; + Stats.SpillsCost = RelFreq * Stats.Spills; + Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills; + Stats.CopiesCost = RelFreq * Stats.Copies; + return Stats; +} + +RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) { + RAGreedyStats Stats; + + // Sum up the spill and reloads in subloops. + for (MachineLoop *SubLoop : *L) + Stats.add(reportStats(SubLoop)); + for (MachineBasicBlock *MBB : L->getBlocks()) // Handle blocks that were not included in subloops. if (Loops->getLoopFor(MBB) == L) - for (MachineInstr &MI : *MBB) { - SmallVector<const MachineMemOperand *, 2> Accesses; - auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) { - return MFI.isSpillSlotObjectIndex( - cast<FixedStackPseudoSourceValue>(A->getPseudoValue()) - ->getFrameIndex()); - }; - - if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) - ++Reloads; - else if (TII->hasLoadFromStackSlot(MI, Accesses) && - llvm::any_of(Accesses, isSpillSlotAccess)) - ++FoldedReloads; - else if (TII->isStoreToStackSlot(MI, FI) && - MFI.isSpillSlotObjectIndex(FI)) - ++Spills; - else if (TII->hasStoreToStackSlot(MI, Accesses) && - llvm::any_of(Accesses, isSpillSlotAccess)) - ++FoldedSpills; - } + Stats.add(computeStats(*MBB)); - if (Reloads || FoldedReloads || Spills || FoldedSpills) { + if (!Stats.isEmpty()) { using namespace ore; ORE->emit([&]() { - MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload", + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies", L->getStartLoc(), L->getHeader()); - if (Spills) - R << NV("NumSpills", Spills) << " spills "; - if (FoldedSpills) - R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; - if (Reloads) - R << NV("NumReloads", Reloads) << " reloads "; - if (FoldedReloads) - R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; + Stats.report(R); R << "generated in loop"; return R; }); } + return Stats; +} + +void RAGreedy::reportStats() { + if (!ORE->allowExtraAnalysis(DEBUG_TYPE)) + return; + RAGreedyStats Stats; + for (MachineLoop *L : *Loops) + Stats.add(reportStats(L)); + // Process non-loop blocks. + for (MachineBasicBlock &MBB : *MF) + if (!Loops->getLoopFor(&MBB)) + Stats.add(computeStats(MBB)); + if (!Stats.isEmpty()) { + using namespace ore; + + ORE->emit([&]() { + DebugLoc Loc; + if (auto *SP = MF->getFunction().getSubprogram()) + Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP); + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc, + &MF->front()); + Stats.report(R); + R << "generated in function"; + return R; + }); + } } bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { @@ -3232,7 +3355,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); DomTree = &getAnalysis<MachineDominatorTree>(); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); - SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis<MachineLoopInfo>(); Bundles = &getAnalysis<EdgeBundles>(); SpillPlacer = &getAnalysis<SpillPlacement>(); @@ -3241,14 +3363,17 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { initializeCSRCost(); + RegCosts = TRI->getRegisterCosts(*MF); + VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI)); VRAI->calculateSpillWeightsAndHints(); LLVM_DEBUG(LIS->dump()); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); - SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); + SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; @@ -3259,8 +3384,11 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { allocatePhysRegs(); tryHintsRecoloring(); + + if (VerifyEnabled) + MF->verify(this, "Before post optimization"); postOptimization(); - reportNumberOfSplillsReloads(); + reportStats(); releaseMemory(); return true; |