aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp637
1 files changed, 637 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
new file mode 100644
index 000000000000..307c9eba9d3b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
@@ -0,0 +1,637 @@
+//===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
+/// structures and waterfall loops.
+///
+/// When we do structurization, we usually transform an if-else into two
+/// sucessive if-then (with a flow block to do predicate inversion). Consider a
+/// simple case after structurization: A divergent value %a was defined before
+/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
+/// bb.if:
+/// %a = ...
+/// ...
+/// bb.then:
+/// ... = op %a
+/// ... // %a can be dead here
+/// bb.flow:
+/// ...
+/// bb.else:
+/// ... = %a
+/// ...
+/// bb.endif
+///
+/// As register allocator has no idea of the thread-control-flow, it will just
+/// assume %a would be alive in the whole range of bb.then because of a later
+/// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
+/// to exec mask. For this if-else case, the lanes active in bb.then will be
+/// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
+/// after the last use in bb.then until the end of the block. The reason is
+/// the instructions in bb.then will only overwrite lanes that will never be
+/// accessed in bb.else.
+///
+/// This pass aims to to tell register allocator that %a is in-fact dead,
+/// through inserting a phi-node in bb.flow saying that %a is undef when coming
+/// from bb.then, and then replace the uses in the bb.else with the result of
+/// newly inserted phi.
+///
+/// Two key conditions must be met to ensure correctness:
+/// 1.) The def-point should be in the same loop-level as if-else-endif to make
+/// sure the second loop iteration still get correct data.
+/// 2.) There should be no further uses after the IF-ELSE region.
+///
+///
+/// Waterfall loops get inserted around instructions that use divergent values
+/// but can only be executed with a uniform value. For example an indirect call
+/// to a divergent address:
+/// bb.start:
+/// %a = ...
+/// %fun = ...
+/// ...
+/// bb.loop:
+/// call %fun (%a)
+/// ... // %a can be dead here
+/// loop %bb.loop
+///
+/// The loop block is executed multiple times, but it is run exactly once for
+/// each active lane. Similar to the if-else case, the register allocator
+/// assumes that %a is live throughout the loop as it is used again in the next
+/// iteration. If %a is a VGPR that is unused after the loop, it does not need
+/// to be live after its last use in the loop block. By inserting a phi-node at
+/// the start of bb.loop that is undef when coming from bb.loop, the register
+/// allocation knows that the value of %a does not need to be preserved through
+/// iterations of the loop.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-opt-vgpr-liverange"
+
+namespace {
+
+class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
+private:
+ const SIRegisterInfo *TRI = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ LiveVariables *LV = nullptr;
+ MachineDominatorTree *MDT = nullptr;
+ const MachineLoopInfo *Loops = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+
+public:
+ static char ID;
+
+ MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
+
+ void collectElseRegionBlocks(MachineBasicBlock *Flow,
+ MachineBasicBlock *Endif,
+ SmallSetVector<MachineBasicBlock *, 16> &) const;
+
+ void
+ collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
+ MachineBasicBlock *Endif,
+ SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
+ SmallVectorImpl<Register> &CandidateRegs) const;
+
+ void collectWaterfallCandidateRegisters(
+ MachineBasicBlock *Loop,
+ SmallSetVector<Register, 16> &CandidateRegs) const;
+
+ void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &Uses) const;
+
+ void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
+ MachineBasicBlock *Flow) const;
+
+ void updateLiveRangeInElseRegion(
+ Register Reg, Register NewReg, MachineBasicBlock *Flow,
+ MachineBasicBlock *Endif,
+ SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
+
+ void
+ optimizeLiveRange(Register Reg, MachineBasicBlock *If,
+ MachineBasicBlock *Flow, MachineBasicBlock *Endif,
+ SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
+
+ void optimizeWaterfallLiveRange(Register Reg, MachineBasicBlock *If) const;
+
+ SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "SI Optimize VGPR LiveRange";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveVariables>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+};
+
+} // end anonymous namespace
+
+// Check whether the MBB is a else flow block and get the branching target which
+// is the Endif block
+MachineBasicBlock *
+SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
+ for (auto &BR : MBB->terminators()) {
+ if (BR.getOpcode() == AMDGPU::SI_ELSE)
+ return BR.getOperand(2).getMBB();
+ }
+ return nullptr;
+}
+
+void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
+ MachineBasicBlock *Flow, MachineBasicBlock *Endif,
+ SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
+ assert(Flow != Endif);
+
+ MachineBasicBlock *MBB = Endif;
+ unsigned Cur = 0;
+ while (MBB) {
+ for (auto *Pred : MBB->predecessors()) {
+ if (Pred != Flow && !Blocks.contains(Pred))
+ Blocks.insert(Pred);
+ }
+
+ if (Cur < Blocks.size())
+ MBB = Blocks[Cur++];
+ else
+ MBB = nullptr;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Found Else blocks: ";
+ for (auto *MBB : Blocks)
+ dbgs() << printMBBReference(*MBB) << ' ';
+ dbgs() << '\n';
+ });
+}
+
+/// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
+void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
+ Register Reg, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &Uses) const {
+ for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
+ if (UseMI.getParent() == MBB && !UseMI.isPHI())
+ Uses.push_back(&UseMI);
+ }
+}
+
+/// Collect the killed registers in the ELSE region which are not alive through
+/// the whole THEN region.
+void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
+ MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
+ SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
+ SmallVectorImpl<Register> &CandidateRegs) const {
+
+ SmallSet<Register, 8> KillsInElse;
+
+ for (auto *Else : ElseBlocks) {
+ for (auto &MI : Else->instrs()) {
+ if (MI.isDebugInstr())
+ continue;
+
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.getReg() || MO.isDef())
+ continue;
+
+ Register MOReg = MO.getReg();
+ // We can only optimize AGPR/VGPR virtual register
+ if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
+ continue;
+
+ if (MO.readsReg()) {
+ LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
+ const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
+ // Make sure two conditions are met:
+ // a.) the value is defined before/in the IF block
+ // b.) should be defined in the same loop-level.
+ if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
+ Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
+ // Check if the register is live into the endif block. If not,
+ // consider it killed in the else region.
+ LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
+ if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
+ KillsInElse.insert(MOReg);
+ } else {
+ LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
+ << " as Live in Endif\n");
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Check the phis in the Endif, looking for value coming from the ELSE
+ // region. Make sure the phi-use is the last use.
+ for (auto &MI : Endif->phis()) {
+ for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
+ auto &MO = MI.getOperand(Idx);
+ auto *Pred = MI.getOperand(Idx + 1).getMBB();
+ if (Pred == Flow)
+ continue;
+ assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
+
+ if (!MO.isReg() || !MO.getReg() || MO.isUndef())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
+ continue;
+
+ LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+
+ if (VI.isLiveIn(*Endif, Reg, *MRI)) {
+ LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
+ << " as Live in Endif\n");
+ continue;
+ }
+ // Make sure two conditions are met:
+ // a.) the value is defined before/in the IF block
+ // b.) should be defined in the same loop-level.
+ const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
+ if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
+ Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
+ KillsInElse.insert(Reg);
+ }
+ }
+
+ auto IsLiveThroughThen = [&](Register Reg) {
+ for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
+ ++I) {
+ if (!I->readsReg())
+ continue;
+ auto *UseMI = I->getParent();
+ auto *UseMBB = UseMI->getParent();
+ if (UseMBB == Flow || UseMBB == Endif) {
+ if (!UseMI->isPHI())
+ return true;
+
+ auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
+ // The register is live through the path If->Flow or Flow->Endif.
+ // we should not optimize for such cases.
+ if ((UseMBB == Flow && IncomingMBB != If) ||
+ (UseMBB == Endif && IncomingMBB == Flow))
+ return true;
+ }
+ }
+ return false;
+ };
+
+ for (auto Reg : KillsInElse) {
+ if (!IsLiveThroughThen(Reg))
+ CandidateRegs.push_back(Reg);
+ }
+}
+
+/// Collect the registers used in the waterfall loop block that are defined
+/// before.
+void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
+ MachineBasicBlock *Loop,
+ SmallSetVector<Register, 16> &CandidateRegs) const {
+
+ for (auto &MI : Loop->instrs()) {
+ if (MI.isDebugInstr())
+ continue;
+
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.getReg() || MO.isDef())
+ continue;
+
+ Register MOReg = MO.getReg();
+ // We can only optimize AGPR/VGPR virtual register
+ if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
+ continue;
+
+ if (MO.readsReg()) {
+ const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
+ // Make sure the value is defined before the LOOP block
+ if (DefMBB != Loop && !CandidateRegs.contains(MOReg)) {
+ // If the variable is used after the loop, the register coalescer will
+ // merge the newly created register and remove the phi node again.
+ // Just do nothing in that case.
+ LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
+ bool IsUsed = false;
+ for (auto *Succ : Loop->successors()) {
+ if (Succ != Loop && OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
+ IsUsed = true;
+ break;
+ }
+ }
+ if (!IsUsed) {
+ LLVM_DEBUG(dbgs() << "Found candidate reg: "
+ << printReg(MOReg, TRI, 0, MRI) << '\n');
+ CandidateRegs.insert(MOReg);
+ } else {
+ LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
+ << printReg(MOReg, TRI, 0, MRI) << '\n');
+ }
+ }
+ }
+ }
+ }
+}
+
+// Re-calculate the liveness of \p Reg in the THEN-region
+void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
+ Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
+
+ SmallPtrSet<MachineBasicBlock *, 16> PHIIncoming;
+
+ MachineBasicBlock *ThenEntry = nullptr;
+ for (auto *Succ : If->successors()) {
+ if (Succ != Flow) {
+ ThenEntry = Succ;
+ break;
+ }
+ }
+ assert(ThenEntry && "No successor in Then region?");
+
+ LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
+ df_iterator_default_set<MachineBasicBlock *, 16> Visited;
+
+ for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
+ if (MBB == Flow)
+ break;
+
+ // Clear Live bit, as we will recalculate afterwards
+ LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
+ << '\n');
+ OldVarInfo.AliveBlocks.reset(MBB->getNumber());
+ }
+
+ // Get the blocks the Reg should be alive through
+ for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
+ ++I) {
+ auto *UseMI = I->getParent();
+ if (UseMI->isPHI() && I->readsReg()) {
+ if (Visited.contains(UseMI->getParent()))
+ PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
+ }
+ }
+
+ Visited.clear();
+
+ for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
+ if (MBB == Flow)
+ break;
+
+ SmallVector<MachineInstr *> Uses;
+ // PHI instructions has been processed before.
+ findNonPHIUsesInBlock(Reg, MBB, Uses);
+
+ if (Uses.size() == 1) {
+ LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
+ << printMBBReference(*MBB) << '\n');
+ LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
+ } else if (Uses.size() > 1) {
+ // Process the instructions in-order
+ LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
+ << printMBBReference(*MBB) << '\n');
+ for (MachineInstr &MI : *MBB) {
+ if (llvm::is_contained(Uses, &MI))
+ LV->HandleVirtRegUse(Reg, MBB, MI);
+ }
+ }
+
+ // Mark Reg alive through the block if this is a PHI incoming block
+ if (PHIIncoming.contains(MBB))
+ LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
+ MBB);
+ }
+
+ // Set the isKilled flag if we get new Kills in the THEN region.
+ for (auto *MI : OldVarInfo.Kills) {
+ if (Visited.contains(MI->getParent()))
+ MI->addRegisterKilled(Reg, TRI);
+ }
+}
+
+void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
+ Register Reg, Register NewReg, MachineBasicBlock *Flow,
+ MachineBasicBlock *Endif,
+ SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
+ LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
+ LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
+
+ // Transfer aliveBlocks from Reg to NewReg
+ for (auto *MBB : ElseBlocks) {
+ unsigned BBNum = MBB->getNumber();
+ if (OldVarInfo.AliveBlocks.test(BBNum)) {
+ NewVarInfo.AliveBlocks.set(BBNum);
+ LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
+ << '\n');
+ OldVarInfo.AliveBlocks.reset(BBNum);
+ }
+ }
+
+ // Transfer the possible Kills in ElseBlocks from Reg to NewReg
+ auto I = OldVarInfo.Kills.begin();
+ while (I != OldVarInfo.Kills.end()) {
+ if (ElseBlocks.contains((*I)->getParent())) {
+ NewVarInfo.Kills.push_back(*I);
+ I = OldVarInfo.Kills.erase(I);
+ } else {
+ ++I;
+ }
+ }
+}
+
+void SIOptimizeVGPRLiveRange::optimizeLiveRange(
+ Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
+ MachineBasicBlock *Endif,
+ SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
+ // Insert a new PHI, marking the value from the THEN region being
+ // undef.
+ LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
+ const auto *RC = MRI->getRegClass(Reg);
+ Register NewReg = MRI->createVirtualRegister(RC);
+ Register UndefReg = MRI->createVirtualRegister(RC);
+ MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ for (auto *Pred : Flow->predecessors()) {
+ if (Pred == If)
+ PHI.addReg(Reg).addMBB(Pred);
+ else
+ PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
+ }
+
+ // Replace all uses in the ELSE region or the PHIs in ENDIF block
+ // Use early increment range because setReg() will update the linked list.
+ for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
+ auto *UseMI = O.getParent();
+ auto *UseBlock = UseMI->getParent();
+ // Replace uses in Endif block
+ if (UseBlock == Endif) {
+ assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
+ O.setReg(NewReg);
+ continue;
+ }
+
+ // Replace uses in Else region
+ if (ElseBlocks.contains(UseBlock))
+ O.setReg(NewReg);
+ }
+
+ // The optimized Reg is not alive through Flow blocks anymore.
+ LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
+ OldVarInfo.AliveBlocks.reset(Flow->getNumber());
+
+ updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
+ updateLiveRangeInThenRegion(Reg, If, Flow);
+}
+
+void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
+ Register Reg, MachineBasicBlock *Loop) const {
+ // Insert a new PHI, marking the value from the last loop iteration undef.
+ LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
+ const auto *RC = MRI->getRegClass(Reg);
+ Register NewReg = MRI->createVirtualRegister(RC);
+ Register UndefReg = MRI->createVirtualRegister(RC);
+
+ // Replace all uses in the LOOP region
+ // Use early increment range because setReg() will update the linked list.
+ for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
+ auto *UseMI = O.getParent();
+ auto *UseBlock = UseMI->getParent();
+ // Replace uses in Loop block
+ if (UseBlock == Loop)
+ O.setReg(NewReg);
+ }
+
+ MachineInstrBuilder PHI = BuildMI(*Loop, Loop->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ for (auto *Pred : Loop->predecessors()) {
+ if (Pred == Loop)
+ PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
+ else
+ PHI.addReg(Reg).addMBB(Pred);
+ }
+
+ LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
+ LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
+
+ // collectWaterfallCandidateRegisters only collects registers that are dead
+ // after the loop. So we know that the old reg is not live throughout the
+ // whole block anymore.
+ OldVarInfo.AliveBlocks.reset(Loop->getNumber());
+
+ // Mark the last use as kill
+ for (auto &MI : reverse(Loop->instrs())) {
+ if (MI.readsRegister(NewReg, TRI)) {
+ MI.addRegisterKilled(NewReg, TRI);
+ NewVarInfo.Kills.push_back(&MI);
+ break;
+ }
+ }
+ assert(!NewVarInfo.Kills.empty() &&
+ "Failed to find last usage of register in loop");
+}
+
+char SIOptimizeVGPRLiveRange::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
+ "SI Optimize VGPR LiveRange", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
+ "SI Optimize VGPR LiveRange", false, false)
+
+char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
+
+FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
+ return new SIOptimizeVGPRLiveRange();
+}
+
+bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ LV = &getAnalysis<LiveVariables>();
+ MRI = &MF.getRegInfo();
+
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ bool MadeChange = false;
+
+ // TODO: we need to think about the order of visiting the blocks to get
+ // optimal result for nesting if-else cases.
+ for (MachineBasicBlock &MBB : MF) {
+ for (auto &MI : MBB.terminators()) {
+ // Detect the if-else blocks
+ if (MI.getOpcode() == AMDGPU::SI_IF) {
+ MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
+ auto *Endif = getElseTarget(IfTarget);
+ if (!Endif)
+ continue;
+
+ SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
+ SmallVector<Register> CandidateRegs;
+
+ LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
+ << printMBBReference(MBB) << ' '
+ << printMBBReference(*IfTarget) << ' '
+ << printMBBReference(*Endif) << '\n');
+
+ // Collect all the blocks in the ELSE region
+ collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
+
+ // Collect the registers can be optimized
+ collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
+ CandidateRegs);
+ MadeChange |= !CandidateRegs.empty();
+ // Now we are safe to optimize.
+ for (auto Reg : CandidateRegs)
+ optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
+ } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
+ LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
+ << printMBBReference(MBB) << '\n');
+
+ SmallSetVector<Register, 16> CandidateRegs;
+ collectWaterfallCandidateRegisters(&MBB, CandidateRegs);
+ MadeChange |= !CandidateRegs.empty();
+ // Now we are safe to optimize.
+ for (auto Reg : CandidateRegs)
+ optimizeWaterfallLiveRange(Reg, &MBB);
+ }
+ }
+ }
+
+ return MadeChange;
+}