diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp | 637 |
1 files changed, 637 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp new file mode 100644 index 000000000000..307c9eba9d3b --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp @@ -0,0 +1,637 @@ +//===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass tries to remove unnecessary VGPR live ranges in divergent if-else +/// structures and waterfall loops. +/// +/// When we do structurization, we usually transform an if-else into two +/// sucessive if-then (with a flow block to do predicate inversion). Consider a +/// simple case after structurization: A divergent value %a was defined before +/// if-else and used in both THEN (use in THEN is optional) and ELSE part: +/// bb.if: +/// %a = ... +/// ... +/// bb.then: +/// ... = op %a +/// ... // %a can be dead here +/// bb.flow: +/// ... +/// bb.else: +/// ... = %a +/// ... +/// bb.endif +/// +/// As register allocator has no idea of the thread-control-flow, it will just +/// assume %a would be alive in the whole range of bb.then because of a later +/// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect +/// to exec mask. For this if-else case, the lanes active in bb.then will be +/// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead +/// after the last use in bb.then until the end of the block. The reason is +/// the instructions in bb.then will only overwrite lanes that will never be +/// accessed in bb.else. +/// +/// This pass aims to to tell register allocator that %a is in-fact dead, +/// through inserting a phi-node in bb.flow saying that %a is undef when coming +/// from bb.then, and then replace the uses in the bb.else with the result of +/// newly inserted phi. +/// +/// Two key conditions must be met to ensure correctness: +/// 1.) The def-point should be in the same loop-level as if-else-endif to make +/// sure the second loop iteration still get correct data. +/// 2.) There should be no further uses after the IF-ELSE region. +/// +/// +/// Waterfall loops get inserted around instructions that use divergent values +/// but can only be executed with a uniform value. For example an indirect call +/// to a divergent address: +/// bb.start: +/// %a = ... +/// %fun = ... +/// ... +/// bb.loop: +/// call %fun (%a) +/// ... // %a can be dead here +/// loop %bb.loop +/// +/// The loop block is executed multiple times, but it is run exactly once for +/// each active lane. Similar to the if-else case, the register allocator +/// assumes that %a is live throughout the loop as it is used again in the next +/// iteration. If %a is a VGPR that is unused after the loop, it does not need +/// to be live after its last use in the loop block. By inserting a phi-node at +/// the start of bb.loop that is undef when coming from bb.loop, the register +/// allocation knows that the value of %a does not need to be preserved through +/// iterations of the loop. +/// +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-opt-vgpr-liverange" + +namespace { + +class SIOptimizeVGPRLiveRange : public MachineFunctionPass { +private: + const SIRegisterInfo *TRI = nullptr; + const SIInstrInfo *TII = nullptr; + LiveVariables *LV = nullptr; + MachineDominatorTree *MDT = nullptr; + const MachineLoopInfo *Loops = nullptr; + MachineRegisterInfo *MRI = nullptr; + +public: + static char ID; + + MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const; + + void collectElseRegionBlocks(MachineBasicBlock *Flow, + MachineBasicBlock *Endif, + SmallSetVector<MachineBasicBlock *, 16> &) const; + + void + collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow, + MachineBasicBlock *Endif, + SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, + SmallVectorImpl<Register> &CandidateRegs) const; + + void collectWaterfallCandidateRegisters( + MachineBasicBlock *Loop, + SmallSetVector<Register, 16> &CandidateRegs) const; + + void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB, + SmallVectorImpl<MachineInstr *> &Uses) const; + + void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If, + MachineBasicBlock *Flow) const; + + void updateLiveRangeInElseRegion( + Register Reg, Register NewReg, MachineBasicBlock *Flow, + MachineBasicBlock *Endif, + SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; + + void + optimizeLiveRange(Register Reg, MachineBasicBlock *If, + MachineBasicBlock *Flow, MachineBasicBlock *Endif, + SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; + + void optimizeWaterfallLiveRange(Register Reg, MachineBasicBlock *If) const; + + SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "SI Optimize VGPR LiveRange"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LiveVariables>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<LiveVariables>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } +}; + +} // end anonymous namespace + +// Check whether the MBB is a else flow block and get the branching target which +// is the Endif block +MachineBasicBlock * +SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const { + for (auto &BR : MBB->terminators()) { + if (BR.getOpcode() == AMDGPU::SI_ELSE) + return BR.getOperand(2).getMBB(); + } + return nullptr; +} + +void SIOptimizeVGPRLiveRange::collectElseRegionBlocks( + MachineBasicBlock *Flow, MachineBasicBlock *Endif, + SmallSetVector<MachineBasicBlock *, 16> &Blocks) const { + assert(Flow != Endif); + + MachineBasicBlock *MBB = Endif; + unsigned Cur = 0; + while (MBB) { + for (auto *Pred : MBB->predecessors()) { + if (Pred != Flow && !Blocks.contains(Pred)) + Blocks.insert(Pred); + } + + if (Cur < Blocks.size()) + MBB = Blocks[Cur++]; + else + MBB = nullptr; + } + + LLVM_DEBUG({ + dbgs() << "Found Else blocks: "; + for (auto *MBB : Blocks) + dbgs() << printMBBReference(*MBB) << ' '; + dbgs() << '\n'; + }); +} + +/// Find the instructions(excluding phi) in \p MBB that uses the \p Reg. +void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock( + Register Reg, MachineBasicBlock *MBB, + SmallVectorImpl<MachineInstr *> &Uses) const { + for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) { + if (UseMI.getParent() == MBB && !UseMI.isPHI()) + Uses.push_back(&UseMI); + } +} + +/// Collect the killed registers in the ELSE region which are not alive through +/// the whole THEN region. +void SIOptimizeVGPRLiveRange::collectCandidateRegisters( + MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif, + SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, + SmallVectorImpl<Register> &CandidateRegs) const { + + SmallSet<Register, 8> KillsInElse; + + for (auto *Else : ElseBlocks) { + for (auto &MI : Else->instrs()) { + if (MI.isDebugInstr()) + continue; + + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.getReg() || MO.isDef()) + continue; + + Register MOReg = MO.getReg(); + // We can only optimize AGPR/VGPR virtual register + if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) + continue; + + if (MO.readsReg()) { + LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); + const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); + // Make sure two conditions are met: + // a.) the value is defined before/in the IF block + // b.) should be defined in the same loop-level. + if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && + Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) { + // Check if the register is live into the endif block. If not, + // consider it killed in the else region. + LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); + if (!VI.isLiveIn(*Endif, MOReg, *MRI)) { + KillsInElse.insert(MOReg); + } else { + LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI) + << " as Live in Endif\n"); + } + } + } + } + } + } + + // Check the phis in the Endif, looking for value coming from the ELSE + // region. Make sure the phi-use is the last use. + for (auto &MI : Endif->phis()) { + for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { + auto &MO = MI.getOperand(Idx); + auto *Pred = MI.getOperand(Idx + 1).getMBB(); + if (Pred == Flow) + continue; + assert(ElseBlocks.contains(Pred) && "Should be from Else region\n"); + + if (!MO.isReg() || !MO.getReg() || MO.isUndef()) + continue; + + Register Reg = MO.getReg(); + if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg)) + continue; + + LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); + + if (VI.isLiveIn(*Endif, Reg, *MRI)) { + LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI) + << " as Live in Endif\n"); + continue; + } + // Make sure two conditions are met: + // a.) the value is defined before/in the IF block + // b.) should be defined in the same loop-level. + const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent(); + if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && + Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) + KillsInElse.insert(Reg); + } + } + + auto IsLiveThroughThen = [&](Register Reg) { + for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; + ++I) { + if (!I->readsReg()) + continue; + auto *UseMI = I->getParent(); + auto *UseMBB = UseMI->getParent(); + if (UseMBB == Flow || UseMBB == Endif) { + if (!UseMI->isPHI()) + return true; + + auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB(); + // The register is live through the path If->Flow or Flow->Endif. + // we should not optimize for such cases. + if ((UseMBB == Flow && IncomingMBB != If) || + (UseMBB == Endif && IncomingMBB == Flow)) + return true; + } + } + return false; + }; + + for (auto Reg : KillsInElse) { + if (!IsLiveThroughThen(Reg)) + CandidateRegs.push_back(Reg); + } +} + +/// Collect the registers used in the waterfall loop block that are defined +/// before. +void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( + MachineBasicBlock *Loop, + SmallSetVector<Register, 16> &CandidateRegs) const { + + for (auto &MI : Loop->instrs()) { + if (MI.isDebugInstr()) + continue; + + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.getReg() || MO.isDef()) + continue; + + Register MOReg = MO.getReg(); + // We can only optimize AGPR/VGPR virtual register + if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) + continue; + + if (MO.readsReg()) { + const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); + // Make sure the value is defined before the LOOP block + if (DefMBB != Loop && !CandidateRegs.contains(MOReg)) { + // If the variable is used after the loop, the register coalescer will + // merge the newly created register and remove the phi node again. + // Just do nothing in that case. + LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg); + bool IsUsed = false; + for (auto *Succ : Loop->successors()) { + if (Succ != Loop && OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) { + IsUsed = true; + break; + } + } + if (!IsUsed) { + LLVM_DEBUG(dbgs() << "Found candidate reg: " + << printReg(MOReg, TRI, 0, MRI) << '\n'); + CandidateRegs.insert(MOReg); + } else { + LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: " + << printReg(MOReg, TRI, 0, MRI) << '\n'); + } + } + } + } + } +} + +// Re-calculate the liveness of \p Reg in the THEN-region +void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion( + Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const { + + SmallPtrSet<MachineBasicBlock *, 16> PHIIncoming; + + MachineBasicBlock *ThenEntry = nullptr; + for (auto *Succ : If->successors()) { + if (Succ != Flow) { + ThenEntry = Succ; + break; + } + } + assert(ThenEntry && "No successor in Then region?"); + + LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); + df_iterator_default_set<MachineBasicBlock *, 16> Visited; + + for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) { + if (MBB == Flow) + break; + + // Clear Live bit, as we will recalculate afterwards + LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB) + << '\n'); + OldVarInfo.AliveBlocks.reset(MBB->getNumber()); + } + + // Get the blocks the Reg should be alive through + for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; + ++I) { + auto *UseMI = I->getParent(); + if (UseMI->isPHI() && I->readsReg()) { + if (Visited.contains(UseMI->getParent())) + PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB()); + } + } + + Visited.clear(); + + for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) { + if (MBB == Flow) + break; + + SmallVector<MachineInstr *> Uses; + // PHI instructions has been processed before. + findNonPHIUsesInBlock(Reg, MBB, Uses); + + if (Uses.size() == 1) { + LLVM_DEBUG(dbgs() << "Found one Non-PHI use in " + << printMBBReference(*MBB) << '\n'); + LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin())); + } else if (Uses.size() > 1) { + // Process the instructions in-order + LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in " + << printMBBReference(*MBB) << '\n'); + for (MachineInstr &MI : *MBB) { + if (llvm::is_contained(Uses, &MI)) + LV->HandleVirtRegUse(Reg, MBB, MI); + } + } + + // Mark Reg alive through the block if this is a PHI incoming block + if (PHIIncoming.contains(MBB)) + LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(), + MBB); + } + + // Set the isKilled flag if we get new Kills in the THEN region. + for (auto *MI : OldVarInfo.Kills) { + if (Visited.contains(MI->getParent())) + MI->addRegisterKilled(Reg, TRI); + } +} + +void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion( + Register Reg, Register NewReg, MachineBasicBlock *Flow, + MachineBasicBlock *Endif, + SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { + LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); + LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); + + // Transfer aliveBlocks from Reg to NewReg + for (auto *MBB : ElseBlocks) { + unsigned BBNum = MBB->getNumber(); + if (OldVarInfo.AliveBlocks.test(BBNum)) { + NewVarInfo.AliveBlocks.set(BBNum); + LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB) + << '\n'); + OldVarInfo.AliveBlocks.reset(BBNum); + } + } + + // Transfer the possible Kills in ElseBlocks from Reg to NewReg + auto I = OldVarInfo.Kills.begin(); + while (I != OldVarInfo.Kills.end()) { + if (ElseBlocks.contains((*I)->getParent())) { + NewVarInfo.Kills.push_back(*I); + I = OldVarInfo.Kills.erase(I); + } else { + ++I; + } + } +} + +void SIOptimizeVGPRLiveRange::optimizeLiveRange( + Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow, + MachineBasicBlock *Endif, + SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { + // Insert a new PHI, marking the value from the THEN region being + // undef. + LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); + const auto *RC = MRI->getRegClass(Reg); + Register NewReg = MRI->createVirtualRegister(RC); + Register UndefReg = MRI->createVirtualRegister(RC); + MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + for (auto *Pred : Flow->predecessors()) { + if (Pred == If) + PHI.addReg(Reg).addMBB(Pred); + else + PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); + } + + // Replace all uses in the ELSE region or the PHIs in ENDIF block + // Use early increment range because setReg() will update the linked list. + for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { + auto *UseMI = O.getParent(); + auto *UseBlock = UseMI->getParent(); + // Replace uses in Endif block + if (UseBlock == Endif) { + assert(UseMI->isPHI() && "Uses should be PHI in Endif block"); + O.setReg(NewReg); + continue; + } + + // Replace uses in Else region + if (ElseBlocks.contains(UseBlock)) + O.setReg(NewReg); + } + + // The optimized Reg is not alive through Flow blocks anymore. + LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); + OldVarInfo.AliveBlocks.reset(Flow->getNumber()); + + updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks); + updateLiveRangeInThenRegion(Reg, If, Flow); +} + +void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( + Register Reg, MachineBasicBlock *Loop) const { + // Insert a new PHI, marking the value from the last loop iteration undef. + LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); + const auto *RC = MRI->getRegClass(Reg); + Register NewReg = MRI->createVirtualRegister(RC); + Register UndefReg = MRI->createVirtualRegister(RC); + + // Replace all uses in the LOOP region + // Use early increment range because setReg() will update the linked list. + for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { + auto *UseMI = O.getParent(); + auto *UseBlock = UseMI->getParent(); + // Replace uses in Loop block + if (UseBlock == Loop) + O.setReg(NewReg); + } + + MachineInstrBuilder PHI = BuildMI(*Loop, Loop->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + for (auto *Pred : Loop->predecessors()) { + if (Pred == Loop) + PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); + else + PHI.addReg(Reg).addMBB(Pred); + } + + LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); + LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); + + // collectWaterfallCandidateRegisters only collects registers that are dead + // after the loop. So we know that the old reg is not live throughout the + // whole block anymore. + OldVarInfo.AliveBlocks.reset(Loop->getNumber()); + + // Mark the last use as kill + for (auto &MI : reverse(Loop->instrs())) { + if (MI.readsRegister(NewReg, TRI)) { + MI.addRegisterKilled(NewReg, TRI); + NewVarInfo.Kills.push_back(&MI); + break; + } + } + assert(!NewVarInfo.Kills.empty() && + "Failed to find last usage of register in loop"); +} + +char SIOptimizeVGPRLiveRange::ID = 0; + +INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE, + "SI Optimize VGPR LiveRange", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) +INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE, + "SI Optimize VGPR LiveRange", false, false) + +char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID; + +FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() { + return new SIOptimizeVGPRLiveRange(); +} + +bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { + + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + TII = ST.getInstrInfo(); + TRI = &TII->getRegisterInfo(); + MDT = &getAnalysis<MachineDominatorTree>(); + Loops = &getAnalysis<MachineLoopInfo>(); + LV = &getAnalysis<LiveVariables>(); + MRI = &MF.getRegInfo(); + + if (skipFunction(MF.getFunction())) + return false; + + bool MadeChange = false; + + // TODO: we need to think about the order of visiting the blocks to get + // optimal result for nesting if-else cases. + for (MachineBasicBlock &MBB : MF) { + for (auto &MI : MBB.terminators()) { + // Detect the if-else blocks + if (MI.getOpcode() == AMDGPU::SI_IF) { + MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB(); + auto *Endif = getElseTarget(IfTarget); + if (!Endif) + continue; + + SmallSetVector<MachineBasicBlock *, 16> ElseBlocks; + SmallVector<Register> CandidateRegs; + + LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: " + << printMBBReference(MBB) << ' ' + << printMBBReference(*IfTarget) << ' ' + << printMBBReference(*Endif) << '\n'); + + // Collect all the blocks in the ELSE region + collectElseRegionBlocks(IfTarget, Endif, ElseBlocks); + + // Collect the registers can be optimized + collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks, + CandidateRegs); + MadeChange |= !CandidateRegs.empty(); + // Now we are safe to optimize. + for (auto Reg : CandidateRegs) + optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks); + } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) { + LLVM_DEBUG(dbgs() << "Checking Waterfall loop: " + << printMBBReference(MBB) << '\n'); + + SmallSetVector<Register, 16> CandidateRegs; + collectWaterfallCandidateRegisters(&MBB, CandidateRegs); + MadeChange |= !CandidateRegs.empty(); + // Now we are safe to optimize. + for (auto Reg : CandidateRegs) + optimizeWaterfallLiveRange(Reg, &MBB); + } + } + } + + return MadeChange; +} |