aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h')
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h344
1 files changed, 337 insertions, 7 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index c22787531117..f581b18bff17 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -42,15 +42,20 @@
#include <iterator>
#include <limits>
#include <list>
+#include <queue>
#include <string>
+#include <unordered_set>
#include <utility>
#include <vector>
#define DEBUG_TYPE "block-freq"
+namespace llvm {
extern llvm::cl::opt<bool> CheckBFIUnknownBlockQueries;
-namespace llvm {
+extern llvm::cl::opt<bool> UseIterativeBFIInference;
+extern llvm::cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock;
+extern llvm::cl::opt<double> IterativeBFIPrecision;
class BranchProbabilityInfo;
class Function;
@@ -450,12 +455,6 @@ public:
bool addToDist(Distribution &Dist, const LoopData *OuterLoop,
const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight);
- LoopData &getLoopPackage(const BlockNode &Head) {
- assert(Head.Index < Working.size());
- assert(Working[Head.Index].isLoopHeader());
- return *Working[Head.Index].Loop;
- }
-
/// Analyze irreducible SCCs.
///
/// Separate irreducible SCCs from \c G, which is an explicit graph of \c
@@ -968,6 +967,45 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
return bfi_detail::getBlockName(getBlock(Node));
}
+ /// The current implementation for computing relative block frequencies does
+ /// not handle correctly control-flow graphs containing irreducible loops. To
+ /// resolve the problem, we apply a post-processing step, which iteratively
+ /// updates block frequencies based on the frequencies of their predesessors.
+ /// This corresponds to finding the stationary point of the Markov chain by
+ /// an iterative method aka "PageRank computation".
+ /// The algorithm takes at most O(|E| * IterativeBFIMaxIterations) steps but
+ /// typically converges faster.
+ ///
+ /// Decide whether we want to apply iterative inference for a given function.
+ bool needIterativeInference() const;
+
+ /// Apply an iterative post-processing to infer correct counts for irr loops.
+ void applyIterativeInference();
+
+ using ProbMatrixType = std::vector<std::vector<std::pair<size_t, Scaled64>>>;
+
+ /// Run iterative inference for a probability matrix and initial frequencies.
+ void iterativeInference(const ProbMatrixType &ProbMatrix,
+ std::vector<Scaled64> &Freq) const;
+
+ /// Find all blocks to apply inference on, that is, reachable from the entry
+ /// and backward reachable from exists along edges with positive probability.
+ void findReachableBlocks(std::vector<const BlockT *> &Blocks) const;
+
+ /// Build a matrix of probabilities with transitions (edges) between the
+ /// blocks: ProbMatrix[I] holds pairs (J, P), where Pr[J -> I | J] = P
+ void initTransitionProbabilities(
+ const std::vector<const BlockT *> &Blocks,
+ const DenseMap<const BlockT *, size_t> &BlockIndex,
+ ProbMatrixType &ProbMatrix) const;
+
+#ifndef NDEBUG
+ /// Compute the discrepancy between current block frequencies and the
+ /// probability matrix.
+ Scaled64 discrepancy(const ProbMatrixType &ProbMatrix,
+ const std::vector<Scaled64> &Freq) const;
+#endif
+
public:
BlockFrequencyInfoImpl() = default;
@@ -1094,6 +1132,10 @@ void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F,
computeMassInLoops();
computeMassInFunction();
unwrapLoops();
+ // Apply a post-processing step improving computed frequencies for functions
+ // with irreducible loops.
+ if (needIterativeInference())
+ applyIterativeInference();
finalizeMetrics();
if (CheckBFIUnknownBlockQueries) {
@@ -1314,6 +1356,294 @@ template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() {
llvm_unreachable("unhandled irreducible control flow");
}
+template <class BT>
+bool BlockFrequencyInfoImpl<BT>::needIterativeInference() const {
+ if (!UseIterativeBFIInference)
+ return false;
+ if (!F->getFunction().hasProfileData())
+ return false;
+ // Apply iterative inference only if the function contains irreducible loops;
+ // otherwise, computed block frequencies are reasonably correct.
+ for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) {
+ if (L->isIrreducible())
+ return true;
+ }
+ return false;
+}
+
+template <class BT> void BlockFrequencyInfoImpl<BT>::applyIterativeInference() {
+ // Extract blocks for processing: a block is considered for inference iff it
+ // can be reached from the entry by edges with a positive probability.
+ // Non-processed blocks are assigned with the zero frequency and are ignored
+ // in the computation
+ std::vector<const BlockT *> ReachableBlocks;
+ findReachableBlocks(ReachableBlocks);
+ if (ReachableBlocks.empty())
+ return;
+
+ // The map is used to to index successors/predecessors of reachable blocks in
+ // the ReachableBlocks vector
+ DenseMap<const BlockT *, size_t> BlockIndex;
+ // Extract initial frequencies for the reachable blocks
+ auto Freq = std::vector<Scaled64>(ReachableBlocks.size());
+ Scaled64 SumFreq;
+ for (size_t I = 0; I < ReachableBlocks.size(); I++) {
+ const BlockT *BB = ReachableBlocks[I];
+ BlockIndex[BB] = I;
+ Freq[I] = getFloatingBlockFreq(BB);
+ SumFreq += Freq[I];
+ }
+ assert(!SumFreq.isZero() && "empty initial block frequencies");
+
+ LLVM_DEBUG(dbgs() << "Applying iterative inference for " << F->getName()
+ << " with " << ReachableBlocks.size() << " blocks\n");
+
+ // Normalizing frequencies so they sum up to 1.0
+ for (auto &Value : Freq) {
+ Value /= SumFreq;
+ }
+
+ // Setting up edge probabilities using sparse matrix representation:
+ // ProbMatrix[I] holds a vector of pairs (J, P) where Pr[J -> I | J] = P
+ ProbMatrixType ProbMatrix;
+ initTransitionProbabilities(ReachableBlocks, BlockIndex, ProbMatrix);
+
+ // Run the propagation
+ iterativeInference(ProbMatrix, Freq);
+
+ // Assign computed frequency values
+ for (const BlockT &BB : *F) {
+ auto Node = getNode(&BB);
+ if (!Node.isValid())
+ continue;
+ if (BlockIndex.count(&BB)) {
+ Freqs[Node.Index].Scaled = Freq[BlockIndex[&BB]];
+ } else {
+ Freqs[Node.Index].Scaled = Scaled64::getZero();
+ }
+ }
+}
+
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::iterativeInference(
+ const ProbMatrixType &ProbMatrix, std::vector<Scaled64> &Freq) const {
+ assert(0.0 < IterativeBFIPrecision && IterativeBFIPrecision < 1.0 &&
+ "incorrectly specified precision");
+ // Convert double precision to Scaled64
+ const auto Precision =
+ Scaled64::getInverse(static_cast<uint64_t>(1.0 / IterativeBFIPrecision));
+ const size_t MaxIterations = IterativeBFIMaxIterationsPerBlock * Freq.size();
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << " Initial discrepancy = "
+ << discrepancy(ProbMatrix, Freq).toString() << "\n");
+#endif
+
+ // Successors[I] holds unique sucessors of the I-th block
+ auto Successors = std::vector<std::vector<size_t>>(Freq.size());
+ for (size_t I = 0; I < Freq.size(); I++) {
+ for (auto &Jump : ProbMatrix[I]) {
+ Successors[Jump.first].push_back(I);
+ }
+ }
+
+ // To speedup computation, we maintain a set of "active" blocks whose
+ // frequencies need to be updated based on the incoming edges.
+ // The set is dynamic and changes after every update. Initially all blocks
+ // with a positive frequency are active
+ auto IsActive = std::vector<bool>(Freq.size(), false);
+ std::queue<size_t> ActiveSet;
+ for (size_t I = 0; I < Freq.size(); I++) {
+ if (Freq[I] > 0) {
+ ActiveSet.push(I);
+ IsActive[I] = true;
+ }
+ }
+
+ // Iterate over the blocks propagating frequencies
+ size_t It = 0;
+ while (It++ < MaxIterations && !ActiveSet.empty()) {
+ size_t I = ActiveSet.front();
+ ActiveSet.pop();
+ IsActive[I] = false;
+
+ // Compute a new frequency for the block: NewFreq := Freq \times ProbMatrix.
+ // A special care is taken for self-edges that needs to be scaled by
+ // (1.0 - SelfProb), where SelfProb is the sum of probabilities on the edges
+ Scaled64 NewFreq;
+ Scaled64 OneMinusSelfProb = Scaled64::getOne();
+ for (auto &Jump : ProbMatrix[I]) {
+ if (Jump.first == I) {
+ OneMinusSelfProb -= Jump.second;
+ } else {
+ NewFreq += Freq[Jump.first] * Jump.second;
+ }
+ }
+ if (OneMinusSelfProb != Scaled64::getOne())
+ NewFreq /= OneMinusSelfProb;
+
+ // If the block's frequency has changed enough, then
+ // make sure the block and its successors are in the active set
+ auto Change = Freq[I] >= NewFreq ? Freq[I] - NewFreq : NewFreq - Freq[I];
+ if (Change > Precision) {
+ ActiveSet.push(I);
+ IsActive[I] = true;
+ for (size_t Succ : Successors[I]) {
+ if (!IsActive[Succ]) {
+ ActiveSet.push(Succ);
+ IsActive[Succ] = true;
+ }
+ }
+ }
+
+ // Update the frequency for the block
+ Freq[I] = NewFreq;
+ }
+
+ LLVM_DEBUG(dbgs() << " Completed " << It << " inference iterations"
+ << format(" (%0.0f per block)", double(It) / Freq.size())
+ << "\n");
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << " Final discrepancy = "
+ << discrepancy(ProbMatrix, Freq).toString() << "\n");
+#endif
+}
+
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::findReachableBlocks(
+ std::vector<const BlockT *> &Blocks) const {
+ // Find all blocks to apply inference on, that is, reachable from the entry
+ // along edges with non-zero probablities
+ std::queue<const BlockT *> Queue;
+ std::unordered_set<const BlockT *> Reachable;
+ const BlockT *Entry = &F->front();
+ Queue.push(Entry);
+ Reachable.insert(Entry);
+ while (!Queue.empty()) {
+ const BlockT *SrcBB = Queue.front();
+ Queue.pop();
+ for (const BlockT *DstBB : children<const BlockT *>(SrcBB)) {
+ auto EP = BPI->getEdgeProbability(SrcBB, DstBB);
+ if (EP.isZero())
+ continue;
+ if (Reachable.find(DstBB) == Reachable.end()) {
+ Queue.push(DstBB);
+ Reachable.insert(DstBB);
+ }
+ }
+ }
+
+ // Find all blocks to apply inference on, that is, backward reachable from
+ // the entry along (backward) edges with non-zero probablities
+ std::unordered_set<const BlockT *> InverseReachable;
+ for (const BlockT &BB : *F) {
+ // An exit block is a block without any successors
+ bool HasSucc = GraphTraits<const BlockT *>::child_begin(&BB) !=
+ GraphTraits<const BlockT *>::child_end(&BB);
+ if (!HasSucc && Reachable.count(&BB)) {
+ Queue.push(&BB);
+ InverseReachable.insert(&BB);
+ }
+ }
+ while (!Queue.empty()) {
+ const BlockT *SrcBB = Queue.front();
+ Queue.pop();
+ for (const BlockT *DstBB : children<Inverse<const BlockT *>>(SrcBB)) {
+ auto EP = BPI->getEdgeProbability(DstBB, SrcBB);
+ if (EP.isZero())
+ continue;
+ if (InverseReachable.find(DstBB) == InverseReachable.end()) {
+ Queue.push(DstBB);
+ InverseReachable.insert(DstBB);
+ }
+ }
+ }
+
+ // Collect the result
+ Blocks.reserve(F->size());
+ for (const BlockT &BB : *F) {
+ if (Reachable.count(&BB) && InverseReachable.count(&BB)) {
+ Blocks.push_back(&BB);
+ }
+ }
+}
+
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::initTransitionProbabilities(
+ const std::vector<const BlockT *> &Blocks,
+ const DenseMap<const BlockT *, size_t> &BlockIndex,
+ ProbMatrixType &ProbMatrix) const {
+ const size_t NumBlocks = Blocks.size();
+ auto Succs = std::vector<std::vector<std::pair<size_t, Scaled64>>>(NumBlocks);
+ auto SumProb = std::vector<Scaled64>(NumBlocks);
+
+ // Find unique successors and corresponding probabilities for every block
+ for (size_t Src = 0; Src < NumBlocks; Src++) {
+ const BlockT *BB = Blocks[Src];
+ std::unordered_set<const BlockT *> UniqueSuccs;
+ for (const auto SI : children<const BlockT *>(BB)) {
+ // Ignore cold blocks
+ if (BlockIndex.find(SI) == BlockIndex.end())
+ continue;
+ // Ignore parallel edges between BB and SI blocks
+ if (UniqueSuccs.find(SI) != UniqueSuccs.end())
+ continue;
+ UniqueSuccs.insert(SI);
+ // Ignore jumps with zero probability
+ auto EP = BPI->getEdgeProbability(BB, SI);
+ if (EP.isZero())
+ continue;
+
+ auto EdgeProb =
+ Scaled64::getFraction(EP.getNumerator(), EP.getDenominator());
+ size_t Dst = BlockIndex.find(SI)->second;
+ Succs[Src].push_back(std::make_pair(Dst, EdgeProb));
+ SumProb[Src] += EdgeProb;
+ }
+ }
+
+ // Add transitions for every jump with positive branch probability
+ ProbMatrix = ProbMatrixType(NumBlocks);
+ for (size_t Src = 0; Src < NumBlocks; Src++) {
+ // Ignore blocks w/o successors
+ if (Succs[Src].empty())
+ continue;
+
+ assert(!SumProb[Src].isZero() && "Zero sum probability of non-exit block");
+ for (auto &Jump : Succs[Src]) {
+ size_t Dst = Jump.first;
+ Scaled64 Prob = Jump.second;
+ ProbMatrix[Dst].push_back(std::make_pair(Src, Prob / SumProb[Src]));
+ }
+ }
+
+ // Add transitions from sinks to the source
+ size_t EntryIdx = BlockIndex.find(&F->front())->second;
+ for (size_t Src = 0; Src < NumBlocks; Src++) {
+ if (Succs[Src].empty()) {
+ ProbMatrix[EntryIdx].push_back(std::make_pair(Src, Scaled64::getOne()));
+ }
+ }
+}
+
+#ifndef NDEBUG
+template <class BT>
+BlockFrequencyInfoImplBase::Scaled64 BlockFrequencyInfoImpl<BT>::discrepancy(
+ const ProbMatrixType &ProbMatrix, const std::vector<Scaled64> &Freq) const {
+ assert(Freq[0] > 0 && "Incorrectly computed frequency of the entry block");
+ Scaled64 Discrepancy;
+ for (size_t I = 0; I < ProbMatrix.size(); I++) {
+ Scaled64 Sum;
+ for (const auto &Jump : ProbMatrix[I]) {
+ Sum += Freq[Jump.first] * Jump.second;
+ }
+ Discrepancy += Freq[I] >= Sum ? Freq[I] - Sum : Sum - Freq[I];
+ }
+ // Normalizing by the frequency of the entry block
+ return Discrepancy / Freq[0];
+}
+#endif
+
/// \note This should be a lambda, but that crashes GCC 4.7.
namespace bfi_detail {