aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO/Inliner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/IPO/Inliner.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp217
1 files changed, 190 insertions, 27 deletions
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index e91b6c9b1d26..59260af88832 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -99,6 +99,10 @@ static cl::opt<std::string> CGSCCInlineReplayFile(
"by inlining from cgscc inline remarks."),
cl::Hidden);
+static cl::opt<bool> InlineEnablePriorityOrder(
+ "inline-enable-priority-order", cl::Hidden, cl::init(false),
+ cl::desc("Enable the priority inline order for the inliner"));
+
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@@ -670,6 +674,153 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
return *IAA->getAdvisor();
}
+template <typename T> class InlineOrder {
+public:
+ using reference = T &;
+ using const_reference = const T &;
+
+ virtual ~InlineOrder() {}
+
+ virtual size_t size() = 0;
+
+ virtual void push(const T &Elt) = 0;
+
+ virtual T pop() = 0;
+
+ virtual const_reference front() = 0;
+
+ virtual void erase_if(function_ref<bool(T)> Pred) = 0;
+
+ bool empty() { return !size(); }
+};
+
+template <typename T, typename Container = SmallVector<T, 16>>
+class DefaultInlineOrder : public InlineOrder<T> {
+ using reference = T &;
+ using const_reference = const T &;
+
+public:
+ size_t size() override { return Calls.size() - FirstIndex; }
+
+ void push(const T &Elt) override { Calls.push_back(Elt); }
+
+ T pop() override {
+ assert(size() > 0);
+ return Calls[FirstIndex++];
+ }
+
+ const_reference front() override {
+ assert(size() > 0);
+ return Calls[FirstIndex];
+ }
+
+ void erase_if(function_ref<bool(T)> Pred) override {
+ Calls.erase(std::remove_if(Calls.begin() + FirstIndex, Calls.end(), Pred),
+ Calls.end());
+ }
+
+private:
+ Container Calls;
+ size_t FirstIndex = 0;
+};
+
+class Priority {
+public:
+ Priority(int Size) : Size(Size) {}
+
+ static bool isMoreDesirable(const Priority &S1, const Priority &S2) {
+ return S1.Size < S2.Size;
+ }
+
+ static Priority evaluate(CallBase *CB) {
+ Function *Callee = CB->getCalledFunction();
+ return Priority(Callee->getInstructionCount());
+ }
+
+ int Size;
+};
+
+template <typename PriorityT>
+class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
+ using T = std::pair<CallBase *, int>;
+ using HeapT = std::pair<CallBase *, PriorityT>;
+ using reference = T &;
+ using const_reference = const T &;
+
+ static bool cmp(const HeapT &P1, const HeapT &P2) {
+ return PriorityT::isMoreDesirable(P2.second, P1.second);
+ }
+
+ // A call site could become less desirable for inlining because of the size
+ // growth from prior inlining into the callee. This method is used to lazily
+ // update the desirability of a call site if it's decreasing. It is only
+ // called on pop() or front(), not every time the desirability changes. When
+ // the desirability of the front call site decreases, an updated one would be
+ // pushed right back into the heap. For simplicity, those cases where
+ // the desirability of a call site increases are ignored here.
+ void adjust() {
+ bool Changed = false;
+ do {
+ CallBase *CB = Heap.front().first;
+ const PriorityT PreviousGoodness = Heap.front().second;
+ const PriorityT CurrentGoodness = PriorityT::evaluate(CB);
+ Changed = PriorityT::isMoreDesirable(PreviousGoodness, CurrentGoodness);
+ if (Changed) {
+ std::pop_heap(Heap.begin(), Heap.end(), cmp);
+ Heap.pop_back();
+ Heap.push_back({CB, CurrentGoodness});
+ std::push_heap(Heap.begin(), Heap.end(), cmp);
+ }
+ } while (Changed);
+ }
+
+public:
+ size_t size() override { return Heap.size(); }
+
+ void push(const T &Elt) override {
+ CallBase *CB = Elt.first;
+ const int InlineHistoryID = Elt.second;
+ const PriorityT Goodness = PriorityT::evaluate(CB);
+
+ Heap.push_back({CB, Goodness});
+ std::push_heap(Heap.begin(), Heap.end(), cmp);
+ InlineHistoryMap[CB] = InlineHistoryID;
+ }
+
+ T pop() override {
+ assert(size() > 0);
+ adjust();
+
+ CallBase *CB = Heap.front().first;
+ T Result = std::make_pair(CB, InlineHistoryMap[CB]);
+ InlineHistoryMap.erase(CB);
+ std::pop_heap(Heap.begin(), Heap.end(), cmp);
+ Heap.pop_back();
+ return Result;
+ }
+
+ const_reference front() override {
+ assert(size() > 0);
+ adjust();
+
+ CallBase *CB = Heap.front().first;
+ return *InlineHistoryMap.find(CB);
+ }
+
+ void erase_if(function_ref<bool(T)> Pred) override {
+ auto PredWrapper = [=](HeapT P) -> bool {
+ return Pred(std::make_pair(P.first, 0));
+ };
+ Heap.erase(std::remove_if(Heap.begin(), Heap.end(), PredWrapper),
+ Heap.end());
+ std::make_heap(Heap.begin(), Heap.end(), cmp);
+ }
+
+private:
+ SmallVector<HeapT, 16> Heap;
+ DenseMap<CallBase *, int> InlineHistoryMap;
+};
+
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
@@ -692,7 +843,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// We use a single common worklist for calls across the entire SCC. We
// process these in-order and append new calls introduced during inlining to
- // the end.
+ // the end. The PriorityInlineOrder is optional here, in which the smaller
+ // callee would have a higher priority to inline.
//
// Note that this particular order of processing is actually critical to
// avoid very bad behaviors. Consider *highly connected* call graphs where
@@ -714,7 +866,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// this model, but it is uniformly spread across all the functions in the SCC
// and eventually they all become too large to inline, rather than
// incrementally maknig a single function grow in a super linear fashion.
- SmallVector<std::pair<CallBase *, int>, 16> Calls;
+ std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
+ if (InlineEnablePriorityOrder)
+ Calls = std::make_unique<PriorityInlineOrder<Priority>>();
+ else
+ Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
+ assert(Calls != nullptr && "Expected an initialized InlineOrder");
// Populate the initial list of calls in this SCC.
for (auto &N : InitialC) {
@@ -729,7 +886,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (auto *CB = dyn_cast<CallBase>(&I))
if (Function *Callee = CB->getCalledFunction()) {
if (!Callee->isDeclaration())
- Calls.push_back({CB, -1});
+ Calls->push({CB, -1});
else if (!isa<IntrinsicInst>(I)) {
using namespace ore;
setInlineRemark(*CB, "unavailable definition");
@@ -743,7 +900,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
}
}
- if (Calls.empty())
+ if (Calls->empty())
return PreservedAnalyses::all();
// Capture updatable variable for the current SCC.
@@ -764,19 +921,22 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// defer deleting these to make it easier to handle the call graph updates.
SmallVector<Function *, 4> DeadFunctions;
- // Loop forward over all of the calls. Note that we cannot cache the size as
- // inlining can introduce new calls that need to be processed.
- for (int I = 0; I < (int)Calls.size(); ++I) {
+ // Loop forward over all of the calls.
+ while (!Calls->empty()) {
// We expect the calls to typically be batched with sequences of calls that
// have the same caller, so we first set up some shared infrastructure for
// this caller. We also do any pruning we can at this layer on the caller
// alone.
- Function &F = *Calls[I].first->getCaller();
+ Function &F = *Calls->front().first->getCaller();
LazyCallGraph::Node &N = *CG.lookup(F);
- if (CG.lookupSCC(N) != C)
+ if (CG.lookupSCC(N) != C) {
+ Calls->pop();
continue;
+ }
- LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"
+ << " Function size: " << F.getInstructionCount()
+ << "\n");
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
@@ -786,8 +946,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.
bool DidInline = false;
- for (; I < (int)Calls.size() && Calls[I].first->getCaller() == &F; ++I) {
- auto &P = Calls[I];
+ while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
+ auto P = Calls->pop();
CallBase *CB = P.first;
const int InlineHistoryID = P.second;
Function &Callee = *CB->getCalledFunction();
@@ -837,6 +997,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
InlinedCallees.insert(&Callee);
++NumInlined;
+ LLVM_DEBUG(dbgs() << " Size after inlining: "
+ << F.getInstructionCount() << "\n");
+
// Add any new callsites to defined functions to the worklist.
if (!IFI.InlinedCallSites.empty()) {
int NewHistoryID = InlineHistory.size();
@@ -844,6 +1007,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
Function *NewCallee = ICB->getCalledFunction();
+ assert(!(NewCallee && NewCallee->isIntrinsic()) &&
+ "Intrinsic calls should not be tracked.");
if (!NewCallee) {
// Try to promote an indirect (virtual) call without waiting for
// the post-inline cleanup and the next DevirtSCCRepeatedPass
@@ -854,7 +1019,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
if (NewCallee)
if (!NewCallee->isDeclaration())
- Calls.push_back({ICB, NewHistoryID});
+ Calls->push({ICB, NewHistoryID});
}
}
@@ -871,12 +1036,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// made dead by this operation on other functions).
Callee.removeDeadConstantUsers();
if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
- Calls.erase(
- std::remove_if(Calls.begin() + I + 1, Calls.end(),
- [&](const std::pair<CallBase *, int> &Call) {
- return Call.first->getCaller() == &Callee;
- }),
- Calls.end());
+ Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
+ return Call.first->getCaller() == &Callee;
+ });
// Clear the body and queue the function itself for deletion when we
// finish inlining and call graph updates.
// Note that after this point, it is an error to do anything other
@@ -894,10 +1056,6 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Advice->recordInlining();
}
- // Back the call index up by one to put us in a good position to go around
- // the outer loop.
- --I;
-
if (!DidInline)
continue;
Changed = true;
@@ -969,6 +1127,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
UR.InvalidatedSCCs.insert(&DeadC);
UR.InvalidatedRefSCCs.insert(&DeadRC);
+ // If the updated SCC was the one containing the deleted function, clear it.
+ if (&DeadC == UR.UpdatedC)
+ UR.UpdatedC = nullptr;
+
// And delete the actual function from the module.
// The Advisor may use Function pointers to efficiently index various
// internal maps, e.g. for memoization. Function cleanup passes like
@@ -993,12 +1155,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
- bool Debugging,
bool MandatoryFirst,
InliningAdvisorMode Mode,
unsigned MaxDevirtIterations)
: Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations),
- PM(Debugging), MPM(Debugging) {
+ PM(), MPM() {
// Run the inliner first. The theory is that we are walking bottom-up and so
// the callees have already been fully optimized, and we want to inline them
// into the callers so that our optimizations can reflect that.
@@ -1031,8 +1192,10 @@ PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
else
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations)));
- auto Ret = MPM.run(M, MAM);
+ MPM.run(M, MAM);
IAA.clear();
- return Ret;
+
+ // The ModulePassManager has already taken care of invalidating analyses.
+ return PreservedAnalyses::all();
}