aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp63
1 files changed, 55 insertions, 8 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 418296684d76..84d72e1b579f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -20,21 +20,25 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "SIDefines.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -70,17 +74,25 @@ char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
"Unify divergent function exit nodes", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
"Unify divergent function exit nodes", false, false)
void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
- // TODO: Preserve dominator tree.
+ if (RequireAndPreserveDomTree)
+ AU.addRequired<DominatorTreeWrapperPass>();
+
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<LegacyDivergenceAnalysis>();
+ if (RequireAndPreserveDomTree) {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ // FIXME: preserve PostDominatorTreeWrapperPass
+ }
+
// No divergent values are changed, only blocks and branch edges.
AU.addPreserved<LegacyDivergenceAnalysis>();
@@ -133,7 +145,7 @@ static void removeDoneExport(Function &F) {
}
}
-static BasicBlock *unifyReturnBlockSet(Function &F,
+static BasicBlock *unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU,
ArrayRef<BasicBlock *> ReturningBlocks,
bool InsertExport,
const TargetTransformInfo &TTI,
@@ -153,7 +165,7 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
Value *Undef = UndefValue::get(B.getFloatTy());
B.CreateIntrinsic(Intrinsic::amdgcn_exp, { B.getFloatTy() },
{
- B.getInt32(9), // target, SQ_EXP_NULL
+ B.getInt32(AMDGPU::Exp::ET_NULL),
B.getInt32(0), // enabled channels
Undef, Undef, Undef, Undef, // values
B.getTrue(), // done
@@ -174,6 +186,8 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
// Loop over all of the blocks, replacing the return instruction with an
// unconditional branch.
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(ReturningBlocks.size());
for (BasicBlock *BB : ReturningBlocks) {
// Add an incoming element to the PHI node for every return instruction that
// is merging into this new block...
@@ -183,17 +197,27 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
// Remove and delete the return inst.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(NewRetBlock, BB);
+ Updates.push_back({DominatorTree::Insert, BB, NewRetBlock});
}
+ if (RequireAndPreserveDomTree)
+ DTU.applyUpdates(Updates);
+ Updates.clear();
+
for (BasicBlock *BB : ReturningBlocks) {
// Cleanup possible branch to unconditional branch to the return.
- simplifyCFG(BB, TTI, {2});
+ simplifyCFG(BB, TTI, RequireAndPreserveDomTree ? &DTU : nullptr,
+ SimplifyCFGOptions().bonusInstThreshold(2));
}
return NewRetBlock;
}
bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
+ DominatorTree *DT = nullptr;
+ if (RequireAndPreserveDomTree)
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
// If there's only one exit, we don't need to do anything, unless this is a
@@ -216,6 +240,8 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
bool InsertExport = false;
bool Changed = false;
+ std::vector<DominatorTree::UpdateType> Updates;
+
for (BasicBlock *BB : PDT.roots()) {
if (isa<ReturnInst>(BB->getTerminator())) {
if (!isUniformlyReached(DA, *BB))
@@ -272,14 +298,28 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
BI->eraseFromParent(); // Delete the unconditional branch.
// Add a new conditional branch with a dummy edge to the return block.
BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
+ Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
} else { // Conditional branch.
+ SmallVector<BasicBlock *, 2> Successors(succ_begin(BB), succ_end(BB));
+
// Create a new transition block to hold the conditional branch.
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
+ Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
+
+ // 'Successors' become successors of TransitionBB instead of BB,
+ // and TransitionBB becomes a single successor of BB.
+ Updates.push_back({DominatorTree::Insert, BB, TransitionBB});
+ for (BasicBlock *Successor : Successors) {
+ Updates.push_back({DominatorTree::Insert, TransitionBB, Successor});
+ Updates.push_back({DominatorTree::Delete, BB, Successor});
+ }
+
// Create a branch that will always branch to the transition block and
// references DummyReturnBB.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
+ Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
}
Changed = true;
}
@@ -295,10 +335,12 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
"UnifiedUnreachableBlock", &F);
new UnreachableInst(F.getContext(), UnreachableBlock);
+ Updates.reserve(Updates.size() + UnreachableBlocks.size());
for (BasicBlock *BB : UnreachableBlocks) {
// Remove and delete the unreachable inst.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(UnreachableBlock, BB);
+ Updates.push_back({DominatorTree::Insert, BB, UnreachableBlock});
}
Changed = true;
}
@@ -328,6 +370,12 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
}
}
+ // FIXME: add PDT here once simplifycfg is ready.
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ if (RequireAndPreserveDomTree)
+ DTU.applyUpdates(Updates);
+ Updates.clear();
+
// Now handle return blocks.
if (ReturningBlocks.empty())
return Changed; // No blocks return
@@ -345,11 +393,10 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
// uniformly reached block with the "done" bit cleared.
auto BlocksToUnify = std::move(ReturningBlocks);
if (InsertExport) {
- BlocksToUnify.insert(BlocksToUnify.end(), UniformlyReachedRetBlocks.begin(),
- UniformlyReachedRetBlocks.end());
+ llvm::append_range(BlocksToUnify, UniformlyReachedRetBlocks);
}
- unifyReturnBlockSet(F, BlocksToUnify, InsertExport, TTI,
+ unifyReturnBlockSet(F, DTU, BlocksToUnify, InsertExport, TTI,
"UnifiedReturnBlock");
return true;
}