diff options
Diffstat (limited to 'llvm/lib/Analysis/InlineCost.cpp')
| -rw-r--r-- | llvm/lib/Analysis/InlineCost.cpp | 175 |
1 files changed, 104 insertions, 71 deletions
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 345e5a019520..8fa150f7d690 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -249,6 +249,9 @@ protected: /// Getter for BlockFrequencyInfo function_ref<BlockFrequencyInfo &(Function &)> GetBFI; + /// Getter for TargetLibraryInfo + function_ref<const TargetLibraryInfo &(Function &)> GetTLI; + /// Profile summary information. ProfileSummaryInfo *PSI; @@ -419,6 +422,14 @@ protected: return It->second; } + /// Use a value in its given form directly if possible, otherwise try looking + /// for it in SimplifiedValues. + template <typename T> T *getDirectOrSimplifiedValue(Value *V) const { + if (auto *Direct = dyn_cast<T>(V)) + return Direct; + return dyn_cast_if_present<T>(SimplifiedValues.lookup(V)); + } + // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); void disableSROAForArg(AllocaInst *SROAArg); @@ -433,6 +444,7 @@ protected: bool simplifyIntrinsicCallIsConstant(CallBase &CB); bool simplifyIntrinsicCallObjectSize(CallBase &CB); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + bool isLoweredToCall(Function *F, CallBase &Call); /// Return true if the given argument to the function being considered for /// inlining has the given attribute set either at the call site or the @@ -492,20 +504,23 @@ protected: bool visitUnreachableInst(UnreachableInst &I); public: - CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, - function_ref<AssumptionCache &(Function &)> GetAssumptionCache, - function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, - ProfileSummaryInfo *PSI = nullptr, - OptimizationRemarkEmitter *ORE = nullptr) + CallAnalyzer( + Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, + function_ref<AssumptionCache &(Function &)> GetAssumptionCache, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr, + ProfileSummaryInfo *PSI = nullptr, + OptimizationRemarkEmitter *ORE = nullptr) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), - PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE), + GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE), CandidateCall(Call) {} InlineResult analyze(); std::optional<Constant *> getSimplifiedValue(Instruction *I) { - if (SimplifiedValues.contains(I)) - return SimplifiedValues[I]; + auto It = SimplifiedValues.find(I); + if (It != SimplifiedValues.end()) + return It->second; return std::nullopt; } @@ -687,7 +702,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { /// FIXME: if InlineCostCallAnalyzer is derived from, this may need /// to instantiate the derived class. InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, - GetAssumptionCache, GetBFI, PSI, ORE, false); + GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, + false); if (CA.analyze().isSuccess()) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. @@ -1105,10 +1121,12 @@ public: const TargetTransformInfo &TTI, function_ref<AssumptionCache &(Function &)> GetAssumptionCache, function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true, bool IgnoreThreshold = false) - : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE), + : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI, + ORE), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE || isCostBenefitAnalysisEnabled()), @@ -1129,8 +1147,9 @@ public: void print(raw_ostream &OS); std::optional<InstructionCostDetail> getCostDetails(const Instruction *I) { - if (InstructionCostDetailMap.contains(I)) - return InstructionCostDetailMap[I]; + auto It = InstructionCostDetailMap.find(I); + if (It != InstructionCostDetailMap.end()) + return It->second; return std::nullopt; } @@ -1226,8 +1245,8 @@ private: InlineConstants::IndirectCallThreshold; InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, - GetAssumptionCache, GetBFI, PSI, ORE, false, - true); + GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, + false, true); if (CA.analyze().isSuccess()) { increment(InlineCostFeatureIndex::nested_inline_cost_estimate, CA.getCost()); @@ -1353,9 +1372,11 @@ public: const TargetTransformInfo &TTI, function_ref<AssumptionCache &(Function &)> &GetAssumptionCache, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, CallBase &Call) - : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {} + : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, + PSI) {} const InlineCostFeatures &features() const { return Cost; } }; @@ -1422,10 +1443,8 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); GTI != GTE; ++GTI) { - ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); - if (!OpC) - if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) - OpC = dyn_cast<ConstantInt>(SimpleOp); + ConstantInt *OpC = + getDirectOrSimplifiedValue<ConstantInt>(GTI.getOperand()); if (!OpC) return false; if (OpC->isZero()) @@ -1539,9 +1558,7 @@ bool CallAnalyzer::visitPHI(PHINode &I) { if (&I == V) continue; - Constant *C = dyn_cast<Constant>(V); - if (!C) - C = SimplifiedValues.lookup(V); + Constant *C = getDirectOrSimplifiedValue<Constant>(V); std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset}; if (!C && CheckSROA) @@ -1626,7 +1643,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { // Lambda to check whether a GEP's indices are all constant. auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { for (const Use &Op : GEP.indices()) - if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op)) + if (!getDirectOrSimplifiedValue<Constant>(Op)) return false; return true; }; @@ -1653,9 +1670,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { bool CallAnalyzer::simplifyInstruction(Instruction &I) { SmallVector<Constant *> COps; for (Value *Op : I.operands()) { - Constant *COp = dyn_cast<Constant>(Op); - if (!COp) - COp = SimplifiedValues.lookup(Op); + Constant *COp = getDirectOrSimplifiedValue<Constant>(Op); if (!COp) return false; COps.push_back(COp); @@ -1678,10 +1693,7 @@ bool CallAnalyzer::simplifyInstruction(Instruction &I) { /// llvm.is.constant would evaluate. bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) { Value *Arg = CB.getArgOperand(0); - auto *C = dyn_cast<Constant>(Arg); - - if (!C) - C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg)); + auto *C = getDirectOrSimplifiedValue<Constant>(Arg); Type *RT = CB.getFunctionType()->getReturnType(); SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0); @@ -1941,7 +1953,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // and the callsite. int SingleBBBonusPercent = 50; int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); - int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; + int LastCallToStaticBonus = TTI.getInliningLastCallToStaticBonus(); // Lambda to set all the above bonus and bonus percentages to 0. auto DisallowAllBonuses = [&]() { @@ -2113,12 +2125,8 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) { bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - Constant *CLHS = dyn_cast<Constant>(LHS); - if (!CLHS) - CLHS = SimplifiedValues.lookup(LHS); - Constant *CRHS = dyn_cast<Constant>(RHS); - if (!CRHS) - CRHS = SimplifiedValues.lookup(RHS); + Constant *CLHS = getDirectOrSimplifiedValue<Constant>(LHS); + Constant *CRHS = getDirectOrSimplifiedValue<Constant>(RHS); Value *SimpleV = nullptr; if (auto FI = dyn_cast<FPMathOperator>(&I)) @@ -2152,9 +2160,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { bool CallAnalyzer::visitFNeg(UnaryOperator &I) { Value *Op = I.getOperand(0); - Constant *COp = dyn_cast<Constant>(Op); - if (!COp) - COp = SimplifiedValues.lookup(Op); + Constant *COp = getDirectOrSimplifiedValue<Constant>(Op); Value *SimpleV = simplifyFNegInst( COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL); @@ -2242,9 +2248,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) { SmallVector<Constant *, 4> ConstantArgs; ConstantArgs.reserve(Call.arg_size()); for (Value *I : Call.args()) { - Constant *C = dyn_cast<Constant>(I); - if (!C) - C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I)); + Constant *C = getDirectOrSimplifiedValue<Constant>(I); if (!C) return false; // This argument doesn't map to a constant. @@ -2258,6 +2262,39 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) { return false; } +bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) { + const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr; + LibFunc LF; + if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF)) + return TTI.isLoweredToCall(F); + + switch (LF) { + case LibFunc_memcpy_chk: + case LibFunc_memmove_chk: + case LibFunc_mempcpy_chk: + case LibFunc_memset_chk: { + // Calls to __memcpy_chk whose length is known to fit within the object + // size will eventually be replaced by inline stores. Therefore, these + // should not incur a call penalty. This is only really relevant on + // platforms whose headers redirect memcpy to __memcpy_chk (e.g. Darwin), as + // other platforms use memcpy intrinsics, which are already exempt from the + // call penalty. + auto *LenOp = getDirectOrSimplifiedValue<ConstantInt>(Call.getOperand(2)); + auto *ObjSizeOp = + getDirectOrSimplifiedValue<ConstantInt>(Call.getOperand(3)); + if (LenOp && ObjSizeOp && + LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) { + return false; + } + break; + } + default: + break; + } + + return TTI.isLoweredToCall(F); +} + bool CallAnalyzer::visitCallBase(CallBase &Call) { if (!onCallBaseVisitStart(Call)) return true; @@ -2339,7 +2376,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { return false; } - if (TTI.isLoweredToCall(F)) { + if (isLoweredToCall(F, Call)) { onLoweredCall(F, Call, IsIndirectCall); } @@ -2360,10 +2397,9 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) { // shouldn't exist at all, but handling them makes the behavior of the // inliner more regular and predictable. Interestingly, conditional branches // which will fold away are also free. - return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) || - BI.getMetadata(LLVMContext::MD_make_implicit) || - isa_and_nonnull<ConstantInt>( - SimplifiedValues.lookup(BI.getCondition())); + return BI.isUnconditional() || + getDirectOrSimplifiedValue<ConstantInt>(BI.getCondition()) || + BI.getMetadata(LLVMContext::MD_make_implicit); } bool CallAnalyzer::visitSelectInst(SelectInst &SI) { @@ -2371,12 +2407,8 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) { Value *TrueVal = SI.getTrueValue(); Value *FalseVal = SI.getFalseValue(); - Constant *TrueC = dyn_cast<Constant>(TrueVal); - if (!TrueC) - TrueC = SimplifiedValues.lookup(TrueVal); - Constant *FalseC = dyn_cast<Constant>(FalseVal); - if (!FalseC) - FalseC = SimplifiedValues.lookup(FalseVal); + Constant *TrueC = getDirectOrSimplifiedValue<Constant>(TrueVal); + Constant *FalseC = getDirectOrSimplifiedValue<Constant>(FalseVal); Constant *CondC = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition())); @@ -2446,11 +2478,8 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) { bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // We model unconditional switches as free, see the comments on handling // branches. - if (isa<ConstantInt>(SI.getCondition())) + if (getDirectOrSimplifiedValue<ConstantInt>(SI.getCondition())) return true; - if (Value *V = SimplifiedValues.lookup(SI.getCondition())) - if (isa<ConstantInt>(V)) - return true; // Assume the most general case where the switch is lowered into // either a jump table, bit test, or a balanced binary tree consisting of @@ -2669,8 +2698,10 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { // A CFG edge is dead if the predecessor is dead or the predecessor has a // known successor which is not the one under exam. - return (DeadBlocks.count(Pred) || - (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); + if (DeadBlocks.count(Pred)) + return true; + BasicBlock *KnownSucc = KnownSuccessors[Pred]; + return KnownSucc && KnownSucc != Succ; }; auto IsNewlyDead = [&](BasicBlock *BB) { @@ -2943,6 +2974,7 @@ std::optional<int> llvm::getInliningCostEstimate( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref<AssumptionCache &(Function &)> GetAssumptionCache, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { const InlineParams Params = {/* DefaultThreshold*/ 0, /*HintThreshold*/ {}, @@ -2956,7 +2988,7 @@ std::optional<int> llvm::getInliningCostEstimate( /*EnableDeferral*/ true}; InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI, ORE, true, + GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true, /*IgnoreThreshold*/ true); auto R = CA.analyze(); if (!R.isSuccess()) @@ -2968,9 +3000,10 @@ std::optional<InlineCostFeatures> llvm::getInliningCostFeatures( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref<AssumptionCache &(Function &)> GetAssumptionCache, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { - InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, - ORE, *Call.getCalledFunction(), Call); + InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI, + PSI, ORE, *Call.getCalledFunction(), Call); auto R = CFA.analyze(); if (!R.isSuccess()) return std::nullopt; @@ -3070,7 +3103,7 @@ InlineCost llvm::getInlineCost( << ")\n"); InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI, ORE); + GetAssumptionCache, GetBFI, GetTLI, PSI, ORE); InlineResult ShouldInline = CA.analyze(); LLVM_DEBUG(CA.dump()); @@ -3246,8 +3279,7 @@ InlineCostAnnotationPrinterPass::run(Function &F, }; Module *M = F.getParent(); ProfileSummaryInfo PSI(*M); - DataLayout DL(M); - TargetTransformInfo TTI(DL); + TargetTransformInfo TTI(M->getDataLayout()); // FIXME: Redesign the usage of InlineParams to expand the scope of this pass. // In the current implementation, the type of InlineParams doesn't matter as // the pass serves only for verification of inliner's decisions. @@ -3256,16 +3288,17 @@ InlineCostAnnotationPrinterPass::run(Function &F, const InlineParams Params = llvm::getInlineParams(); for (BasicBlock &BB : F) { for (Instruction &I : BB) { - if (CallInst *CI = dyn_cast<CallInst>(&I)) { - Function *CalledFunction = CI->getCalledFunction(); + if (auto *CB = dyn_cast<CallBase>(&I)) { + Function *CalledFunction = CB->getCalledFunction(); if (!CalledFunction || CalledFunction->isDeclaration()) continue; OptimizationRemarkEmitter ORE(CalledFunction); - InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI, - GetAssumptionCache, nullptr, &PSI, &ORE); + InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params, TTI, + GetAssumptionCache, nullptr, nullptr, &PSI, + &ORE); ICCA.analyze(); OS << " Analyzing call of " << CalledFunction->getName() - << "... (caller:" << CI->getCaller()->getName() << ")\n"; + << "... (caller:" << CB->getCaller()->getName() << ")\n"; ICCA.print(OS); OS << "\n"; } |
