aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp1309
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp26
-rw-r--r--lib/Transforms/IPO/CrossDSOCFI.cpp7
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp152
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp150
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp110
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp152
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp10
-rw-r--r--lib/Transforms/IPO/GlobalSplit.cpp11
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp8
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp13
-rw-r--r--lib/Transforms/IPO/Inliner.cpp205
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp308
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp299
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp2
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp87
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp271
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp24
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp261
-rw-r--r--lib/Transforms/IPO/WholeProgramDevirt.cpp764
20 files changed, 2765 insertions, 1404 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 65b7bad3b1ed..a2c8a32dfe86 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -29,8 +29,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -38,6 +39,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
@@ -51,323 +53,400 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
#include <set>
using namespace llvm;
#define DEBUG_TYPE "argpromotion"
-STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
-STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
-STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated");
+STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
-namespace {
- /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
- ///
- struct ArgPromotion : public CallGraphSCCPass {
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- getAAResultsAnalysisUsage(AU);
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
+/// A vector used to hold the indices of a single GEP instruction
+typedef std::vector<uint64_t> IndicesVector;
- bool runOnSCC(CallGraphSCC &SCC) override;
- static char ID; // Pass identification, replacement for typeid
- explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), maxElements(maxElements) {
- initializeArgPromotionPass(*PassRegistry::getPassRegistry());
- }
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function. At this point, we know that it's
+/// safe to do so.
+static Function *
+doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
+ SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
+ Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
+ ReplaceCallSite) {
- private:
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has modified arguments.
+ FunctionType *FTy = F->getFunctionType();
+ std::vector<Type *> Params;
- using llvm::Pass::doInitialization;
- bool doInitialization(CallGraph &CG) override;
- /// The maximum number of elements to expand, or 0 for unlimited.
- unsigned maxElements;
- };
-}
+ typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
-/// A vector used to hold the indices of a single GEP instruction
-typedef std::vector<uint64_t> IndicesVector;
+ // ScalarizedElements - If we are promoting a pointer that has elements
+ // accessed out of it, keep track of which elements are accessed so that we
+ // can add one argument for each.
+ //
+ // Arguments that are directly loaded will have a zero element value here, to
+ // handle cases where there are both a direct load and GEP accesses.
+ //
+ std::map<Argument *, ScalarizeTable> ScalarizedElements;
-static CallGraphNode *
-PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
- function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements);
-static bool isDenselyPacked(Type *type, const DataLayout &DL);
-static bool canPaddingBeAccessed(Argument *Arg);
-static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR,
- unsigned MaxElements);
-static CallGraphNode *
-DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG);
+ // OriginalLoads - Keep track of a representative load instruction from the
+ // original function so that we can tell the alias analysis implementation
+ // what the new GEP/Load instructions we are inserting look like.
+ // We need to keep the original loads for each argument and the elements
+ // of the argument that are accessed.
+ std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads;
-char ArgPromotion::ID = 0;
-INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false)
+ // Attribute - Keep track of the parameter attributes for the arguments
+ // that we are *not* promoting. For the ones that we do promote, the parameter
+ // attributes are lost
+ SmallVector<AttributeSet, 8> ArgAttrVec;
+ AttributeList PAL = F->getAttributes();
-Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
- return new ArgPromotion(maxElements);
-}
+ // First, determine the new argument list
+ unsigned ArgIndex = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgIndex) {
+ if (ByValArgsToTransform.count(&*I)) {
+ // Simple byval argument? Just add all the struct element types.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
+ Params.insert(Params.end(), STy->element_begin(), STy->element_end());
+ ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
+ AttributeSet());
+ ++NumByValArgsPromoted;
+ } else if (!ArgsToPromote.count(&*I)) {
+ // Unchanged argument
+ Params.push_back(I->getType());
+ ArgAttrVec.push_back(PAL.getParamAttributes(ArgIndex));
+ } else if (I->use_empty()) {
+ // Dead argument (which are always marked as promotable)
+ ++NumArgumentsDead;
+ } else {
+ // Okay, this is being promoted. This means that the only uses are loads
+ // or GEPs which are only used by loads
-static bool runImpl(CallGraphSCC &SCC, CallGraph &CG,
- function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements) {
- bool Changed = false, LocalChange;
+ // In this table, we will track which indices are loaded from the argument
+ // (where direct loads are tracked as no indices).
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+ for (User *U : I->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ Type *SrcTy;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ SrcTy = L->getType();
+ else
+ SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
+ IndicesVector Indices;
+ Indices.reserve(UI->getNumOperands() - 1);
+ // Since loads will only have a single operand, and GEPs only a single
+ // non-index operand, this will record direct loads without any indices,
+ // and gep+loads with the GEP indices.
+ for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
+ II != IE; ++II)
+ Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Indices.size() == 1 && Indices.front() == 0)
+ Indices.clear();
+ ArgIndices.insert(std::make_pair(SrcTy, Indices));
+ LoadInst *OrigLoad;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ OrigLoad = L;
+ else
+ // Take any load, we will use it only to update Alias Analysis
+ OrigLoad = cast<LoadInst>(UI->user_back());
+ OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
+ }
- do { // Iterate until we stop promoting from this SCC.
- LocalChange = false;
- // Attempt to promote arguments from all functions in this SCC.
- for (CallGraphNode *OldNode : SCC) {
- if (CallGraphNode *NewNode =
- PromoteArguments(OldNode, CG, AARGetter, MaxElements)) {
- LocalChange = true;
- SCC.ReplaceNode(OldNode, NewNode);
+ // Add a parameter to the function for each element passed in.
+ for (const auto &ArgIndex : ArgIndices) {
+ // not allowed to dereference ->begin() if size() is 0
+ Params.push_back(GetElementPtrInst::getIndexedType(
+ cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+ ArgIndex.second));
+ ArgAttrVec.push_back(AttributeSet());
+ assert(Params.back());
}
+
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
+ ++NumArgumentsPromoted;
+ else
+ ++NumAggregatesPromoted;
}
- Changed |= LocalChange; // Remember that we changed something.
- } while (LocalChange);
-
- return Changed;
-}
+ }
-bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
- if (skipSCC(SCC))
- return false;
+ Type *RetTy = FTy->getReturnType();
- // Get the callgraph information that we need to update to reflect our
- // changes.
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ // Construct the new function type using the new arguments.
+ FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
- auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
- };
-
- return runImpl(SCC, CG, AARGetter, maxElements);
-}
+ // Create the new function body and insert it into the module.
+ Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+ NF->copyAttributesFrom(F);
-/// \brief Checks if a type could have padding bytes.
-static bool isDenselyPacked(Type *type, const DataLayout &DL) {
+ // Patch the pointer to LLVM function in debug info descriptor.
+ NF->setSubprogram(F->getSubprogram());
+ F->setSubprogram(nullptr);
- // There is no size information, so be conservative.
- if (!type->isSized())
- return false;
+ DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
+ << "From: " << *F);
- // If the alloc size is not equal to the storage size, then there are padding
- // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
- if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
- return false;
+ // Recompute the parameter attributes list based on the new arguments for
+ // the function.
+ NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(),
+ PAL.getRetAttributes(), ArgAttrVec));
+ ArgAttrVec.clear();
- if (!isa<CompositeType>(type))
- return true;
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
+ NF->takeName(F);
- // For homogenous sequential types, check for padding within members.
- if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
- return isDenselyPacked(seqTy->getElementType(), DL);
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in the loaded pointers.
+ //
+ SmallVector<Value *, 16> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->user_back());
+ assert(CS.getCalledFunction() == F);
+ Instruction *Call = CS.getInstruction();
+ const AttributeList &CallPAL = CS.getAttributes();
- // Check for padding within and between elements of a struct.
- StructType *StructTy = cast<StructType>(type);
- const StructLayout *Layout = DL.getStructLayout(StructTy);
- uint64_t StartPos = 0;
- for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
- Type *ElTy = StructTy->getElementType(i);
- if (!isDenselyPacked(ElTy, DL))
- return false;
- if (StartPos != Layout->getElementOffsetInBits(i))
- return false;
- StartPos += DL.getTypeAllocSizeInBits(ElTy);
- }
+ // Loop over the operands, inserting GEP and loads in the caller as
+ // appropriate.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++AI, ++ArgIndex)
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ Args.push_back(*AI); // Unmodified argument
+ ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex));
+ } else if (ByValArgsToTransform.count(&*I)) {
+ // Emit a GEP and load for each element of the struct.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(
+ STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
+ // TODO: Tell AA about the new values?
+ Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call));
+ ArgAttrVec.push_back(AttributeSet());
+ }
+ } else if (!I->use_empty()) {
+ // Non-dead argument: insert GEPs and loads as appropriate.
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+ // Store the Value* version of the indices in here, but declare it now
+ // for reuse.
+ std::vector<Value *> Ops;
+ for (const auto &ArgIndex : ArgIndices) {
+ Value *V = *AI;
+ LoadInst *OrigLoad =
+ OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
+ if (!ArgIndex.second.empty()) {
+ Ops.reserve(ArgIndex.second.size());
+ Type *ElTy = V->getType();
+ for (unsigned long II : ArgIndex.second) {
+ // Use i32 to index structs, and i64 for others (pointers/arrays).
+ // This satisfies GEP constraints.
+ Type *IdxTy =
+ (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext())
+ : Type::getInt64Ty(F->getContext()));
+ Ops.push_back(ConstantInt::get(IdxTy, II));
+ // Keep track of the type we're currently indexing.
+ if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
+ ElTy = ElPTy->getElementType();
+ else
+ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
+ }
+ // And create a GEP to extract those indices.
+ V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
+ V->getName() + ".idx", Call);
+ Ops.clear();
+ }
+ // Since we're replacing a load make sure we take the alignment
+ // of the previous load.
+ LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call);
+ newLoad->setAlignment(OrigLoad->getAlignment());
+ // Transfer the AA info too.
+ AAMDNodes AAInfo;
+ OrigLoad->getAAMetadata(AAInfo);
+ newLoad->setAAMetadata(AAInfo);
- return true;
-}
+ Args.push_back(newLoad);
+ ArgAttrVec.push_back(AttributeSet());
+ }
+ }
-/// \brief Checks if the padding bytes of an argument could be accessed.
-static bool canPaddingBeAccessed(Argument *arg) {
+ // Push any varargs arguments on the list.
+ for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+ Args.push_back(*AI);
+ ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex));
+ }
- assert(arg->hasByValAttr());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CS.getOperandBundlesAsDefs(OpBundles);
- // Track all the pointers to the argument to make sure they are not captured.
- SmallPtrSet<Value *, 16> PtrValues;
- PtrValues.insert(arg);
+ CallSite NewCS;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", Call);
+ } else {
+ auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call);
+ NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
+ NewCS = NewCall;
+ }
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(
+ AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
+ CallPAL.getRetAttributes(), ArgAttrVec));
+ NewCS->setDebugLoc(Call->getDebugLoc());
+ uint64_t W;
+ if (Call->extractProfTotalWeight(W))
+ NewCS->setProfWeight(W);
+ Args.clear();
+ ArgAttrVec.clear();
- // Track all of the stores.
- SmallVector<StoreInst *, 16> Stores;
+ // Update the callgraph to know that the callsite has been transformed.
+ if (ReplaceCallSite)
+ (*ReplaceCallSite)(CS, NewCS);
- // Scan through the uses recursively to make sure the pointer is always used
- // sanely.
- SmallVector<Value *, 16> WorkList;
- WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
- while (!WorkList.empty()) {
- Value *V = WorkList.back();
- WorkList.pop_back();
- if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
- if (PtrValues.insert(V).second)
- WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
- } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
- Stores.push_back(Store);
- } else if (!isa<LoadInst>(V)) {
- return true;
+ if (!Call->use_empty()) {
+ Call->replaceAllUsesWith(NewCS.getInstruction());
+ NewCS->takeName(Call);
}
- }
-// Check to make sure the pointers aren't captured
- for (StoreInst *Store : Stores)
- if (PtrValues.count(Store->getValueOperand()))
- return true;
-
- return false;
-}
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
-/// PromoteArguments - This method checks the specified function to see if there
-/// are any promotable arguments and if it is safe to promote the function (for
-/// example, all callers are direct). If safe to promote some arguments, it
-/// calls the DoPromotion method.
-///
-static CallGraphNode *
-PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
- function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements) {
- Function *F = CGN->getFunction();
+ const DataLayout &DL = F->getParent()->getDataLayout();
- // Make sure that it is local to this module.
- if (!F || !F->hasLocalLinkage()) return nullptr;
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
- // Don't promote arguments for variadic functions. Adding, removing, or
- // changing non-pack parameters can change the classification of pack
- // parameters. Frontends encode that classification at the call site in the
- // IR, while in the callee the classification is determined dynamically based
- // on the number of registers consumed so far.
- if (F->isVarArg()) return nullptr;
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
+ //
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin();
+ I != E; ++I) {
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ // If this is an unmodified argument, move the name and users over to the
+ // new version.
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
+ ++I2;
+ continue;
+ }
- // First check: see if there are any pointer arguments! If not, quick exit.
- SmallVector<Argument*, 16> PointerArgs;
- for (Argument &I : F->args())
- if (I.getType()->isPointerTy())
- PointerArgs.push_back(&I);
- if (PointerArgs.empty()) return nullptr;
+ if (ByValArgsToTransform.count(&*I)) {
+ // In the callee, we create an alloca, and store each of the new incoming
+ // arguments into the alloca.
+ Instruction *InsertPt = &NF->begin()->front();
- // Second check: make sure that all callers are direct callers. We can't
- // transform functions that have indirect callers. Also see if the function
- // is self-recursive.
- bool isSelfRecursive = false;
- for (Use &U : F->uses()) {
- CallSite CS(U.getUser());
- // Must be a direct call.
- if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
-
- if (CS.getInstruction()->getParent()->getParent() == F)
- isSelfRecursive = true;
- }
-
- const DataLayout &DL = F->getParent()->getDataLayout();
+ // Just add all the struct element types.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
+ "", InsertPt);
+ StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
+ nullptr};
- AAResults &AAR = AARGetter(*F);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(
+ AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
+ InsertPt);
+ I2->setName(I->getName() + "." + Twine(i));
+ new StoreInst(&*I2++, Idx, InsertPt);
+ }
- // Check to see which arguments are promotable. If an argument is promotable,
- // add it to ArgsToPromote.
- SmallPtrSet<Argument*, 8> ArgsToPromote;
- SmallPtrSet<Argument*, 8> ByValArgsToTransform;
- for (Argument *PtrArg : PointerArgs) {
- Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+ // Anything that used the arg should now use the alloca.
+ I->replaceAllUsesWith(TheAlloca);
+ TheAlloca->takeName(&*I);
- // Replace sret attribute with noalias. This reduces register pressure by
- // avoiding a register copy.
- if (PtrArg->hasStructRetAttr()) {
- unsigned ArgNo = PtrArg->getArgNo();
- F->setAttributes(
- F->getAttributes()
- .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
- .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
- for (Use &U : F->uses()) {
- CallSite CS(U.getUser());
- CS.setAttributes(
- CS.getAttributes()
- .removeAttribute(F->getContext(), ArgNo + 1,
- Attribute::StructRet)
- .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ // If the alloca is used in a call, we must clear the tail flag since
+ // the callee now uses an alloca from the caller.
+ for (User *U : TheAlloca->users()) {
+ CallInst *Call = dyn_cast<CallInst>(U);
+ if (!Call)
+ continue;
+ Call->setTailCall(false);
}
+ continue;
}
- // If this is a byval argument, and if the aggregate type is small, just
- // pass the elements, which is always safe, if the passed value is densely
- // packed or if we can prove the padding bytes are never accessed. This does
- // not apply to inalloca.
- bool isSafeToPromote =
- PtrArg->hasByValAttr() &&
- (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
- if (isSafeToPromote) {
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
- DEBUG(dbgs() << "argpromotion disable promoting argument '"
- << PtrArg->getName() << "' because it would require adding more"
- << " than " << MaxElements << " arguments to the function.\n");
- continue;
- }
-
- // If all the elements are single-value types, we can promote it.
- bool AllSimple = true;
- for (const auto *EltTy : STy->elements()) {
- if (!EltTy->isSingleValueType()) {
- AllSimple = false;
- break;
- }
+ if (I->use_empty())
+ continue;
+
+ // Otherwise, if we promoted this argument, then all users are load
+ // instructions (or GEPs with only load users), and all loads should be
+ // using the new argument that we added.
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+
+ while (!I->use_empty()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
+ assert(ArgIndices.begin()->second.empty() &&
+ "Load element should sort to front!");
+ I2->setName(I->getName() + ".val");
+ LI->replaceAllUsesWith(&*I2);
+ LI->eraseFromParent();
+ DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+ << "' in function '" << F->getName() << "'\n");
+ } else {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
+ IndicesVector Operands;
+ Operands.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Operands.size() == 1 && Operands.front() == 0)
+ Operands.clear();
+
+ Function::arg_iterator TheArg = I2;
+ for (ScalarizeTable::iterator It = ArgIndices.begin();
+ It->second != Operands; ++It, ++TheArg) {
+ assert(It != ArgIndices.end() && "GEP not handled??");
}
- // Safe to transform, don't even bother trying to "promote" it.
- // Passing the elements as a scalar will allow sroa to hack on
- // the new alloca we introduce.
- if (AllSimple) {
- ByValArgsToTransform.insert(PtrArg);
- continue;
+ std::string NewName = I->getName();
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ NewName += "." + utostr(Operands[i]);
}
- }
- }
+ NewName += ".val";
+ TheArg->setName(NewName);
- // If the argument is a recursive type and we're in a recursive
- // function, we could end up infinitely peeling the function argument.
- if (isSelfRecursive) {
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- bool RecursiveType = false;
- for (const auto *EltTy : STy->elements()) {
- if (EltTy == PtrArg->getType()) {
- RecursiveType = true;
- break;
- }
+ DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
+ << "' of function '" << NF->getName() << "'\n");
+
+ // All of the uses must be load instructions. Replace them all with
+ // the argument specified by ArgNo.
+ while (!GEP->use_empty()) {
+ LoadInst *L = cast<LoadInst>(GEP->user_back());
+ L->replaceAllUsesWith(&*TheArg);
+ L->eraseFromParent();
}
- if (RecursiveType)
- continue;
+ GEP->eraseFromParent();
}
}
-
- // Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
- MaxElements))
- ArgsToPromote.insert(PtrArg);
- }
- // No promotable pointer arguments.
- if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
- return nullptr;
+ // Increment I2 past all of the arguments added for this promoted pointer.
+ std::advance(I2, ArgIndices.size());
+ }
- return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG);
+ return NF;
}
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+static bool allCallersPassInValidPointerForArgument(Argument *Arg) {
Function *Callee = Arg->getParent();
const DataLayout &DL = Callee->getParent()->getDataLayout();
@@ -390,26 +469,25 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
/// elements in Prefix is the same as the corresponding elements in Longer.
///
/// This means it also returns true when Prefix and Longer are equal!
-static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
+static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
if (Prefix.size() > Longer.size())
return false;
return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
}
-
/// Checks if Indices, or a prefix of Indices, is in Set.
-static bool PrefixIn(const IndicesVector &Indices,
+static bool prefixIn(const IndicesVector &Indices,
std::set<IndicesVector> &Set) {
- std::set<IndicesVector>::iterator Low;
- Low = Set.upper_bound(Indices);
- if (Low != Set.begin())
- Low--;
- // Low is now the last element smaller than or equal to Indices. This means
- // it points to a prefix of Indices (possibly Indices itself), if such
- // prefix exists.
- //
- // This load is safe if any prefix of its operands is safe to load.
- return Low != Set.end() && IsPrefix(*Low, Indices);
+ std::set<IndicesVector>::iterator Low;
+ Low = Set.upper_bound(Indices);
+ if (Low != Set.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This means
+ // it points to a prefix of Indices (possibly Indices itself), if such
+ // prefix exists.
+ //
+ // This load is safe if any prefix of its operands is safe to load.
+ return Low != Set.end() && isPrefix(*Low, Indices);
}
/// Mark the given indices (ToMark) as safe in the given set of indices
@@ -417,7 +495,7 @@ static bool PrefixIn(const IndicesVector &Indices,
/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
/// already. Furthermore, any indices that Indices is itself a prefix of, are
/// removed from Safe (since they are implicitely safe because of Indices now).
-static void MarkIndicesSafe(const IndicesVector &ToMark,
+static void markIndicesSafe(const IndicesVector &ToMark,
std::set<IndicesVector> &Safe) {
std::set<IndicesVector>::iterator Low;
Low = Safe.upper_bound(ToMark);
@@ -428,7 +506,7 @@ static void MarkIndicesSafe(const IndicesVector &ToMark,
// means it points to a prefix of Indices (possibly Indices itself), if
// such prefix exists.
if (Low != Safe.end()) {
- if (IsPrefix(*Low, ToMark))
+ if (isPrefix(*Low, ToMark))
// If there is already a prefix of these indices (or exactly these
// indices) marked a safe, don't bother adding these indices
return;
@@ -441,7 +519,7 @@ static void MarkIndicesSafe(const IndicesVector &ToMark,
++Low;
// If there we're a prefix of longer index list(s), remove those
std::set<IndicesVector>::iterator End = Safe.end();
- while (Low != End && IsPrefix(ToMark, *Low)) {
+ while (Low != End && isPrefix(ToMark, *Low)) {
std::set<IndicesVector>::iterator Remove = Low;
++Low;
Safe.erase(Remove);
@@ -486,7 +564,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
+ if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -512,25 +590,26 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
return false;
// Indices checked out, mark them as safe
- MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+ markIndicesSafe(Indices, SafeToUnconditionallyLoad);
Indices.clear();
}
} else if (V == Arg) {
// Direct loads are equivalent to a GEP with a single 0 index.
- MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+ markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
}
}
// Now, iterate all uses of the argument to see if there are any uses that are
// not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
- SmallVector<LoadInst*, 16> Loads;
+ SmallVector<LoadInst *, 16> Loads;
IndicesVector Operands;
for (Use &U : Arg->uses()) {
User *UR = U.getUser();
Operands.clear();
if (LoadInst *LI = dyn_cast<LoadInst>(UR)) {
// Don't hack volatile/atomic loads
- if (!LI->isSimple()) return false;
+ if (!LI->isSimple())
+ return false;
Loads.push_back(LI);
// Direct loads are equivalent to a GEP with a zero index and then a load.
Operands.push_back(0);
@@ -547,30 +626,31 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
}
// Ensure that all of the indices are constants.
- for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
- i != e; ++i)
+ for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e;
+ ++i)
if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
Operands.push_back(C->getSExtValue());
else
- return false; // Not a constant operand GEP!
+ return false; // Not a constant operand GEP!
// Ensure that the only users of the GEP are load instructions.
for (User *GEPU : GEP->users())
if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) {
// Don't hack volatile/atomic loads
- if (!LI->isSimple()) return false;
+ if (!LI->isSimple())
+ return false;
Loads.push_back(LI);
} else {
// Other uses than load?
return false;
}
} else {
- return false; // Not a load or a GEP.
+ return false; // Not a load or a GEP.
}
// Now, see if it is safe to promote this load / loads of this GEP. Loading
// is safe if Operands, or a prefix of Operands, is marked as safe.
- if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+ if (!prefixIn(Operands, SafeToUnconditionallyLoad))
return false;
// See if we are already promoting a load with these indices. If not, check
@@ -579,8 +659,10 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
if (ToPromote.find(Operands) == ToPromote.end()) {
if (MaxElements > 0 && ToPromote.size() == MaxElements) {
DEBUG(dbgs() << "argpromotion not promoting argument '"
- << Arg->getName() << "' because it would require adding more "
- << "than " << MaxElements << " arguments to the function.\n");
+ << Arg->getName()
+ << "' because it would require adding more "
+ << "than " << MaxElements
+ << " arguments to the function.\n");
// We limit aggregate promotion to only promoting up to a fixed number
// of elements of the aggregate.
return false;
@@ -589,7 +671,8 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
}
}
- if (Loads.empty()) return true; // No users, this is a dead argument.
+ if (Loads.empty())
+ return true; // No users, this is a dead argument.
// Okay, now we know that the argument is only used by load instructions and
// it is safe to unconditionally perform all of them. Use alias analysis to
@@ -598,7 +681,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
// Because there could be several/many load instructions, remember which
// blocks we know to be transparent to the load.
- df_iterator_default_set<BasicBlock*, 16> TranspBlocks;
+ df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
for (LoadInst *Load : Loads) {
// Check to see if the load is invalidated from the start of the block to
@@ -607,7 +690,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
MemoryLocation Loc = MemoryLocation::get(Load);
if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
- return false; // Pointer is invalidated!
+ return false; // Pointer is invalidated!
// Now check every path from the entry block to the load for transparency.
// To do this, we perform a depth first search on the inverse CFG from the
@@ -625,416 +708,352 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
return true;
}
-/// DoPromotion - This method actually performs the promotion of the specified
-/// arguments, and returns the new function. At this point, we know that it's
-/// safe to do so.
-static CallGraphNode *
-DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG) {
+/// \brief Checks if a type could have padding bytes.
+static bool isDenselyPacked(Type *type, const DataLayout &DL) {
- // Start by computing a new prototype for the function, which is the same as
- // the old function, but has modified arguments.
- FunctionType *FTy = F->getFunctionType();
- std::vector<Type*> Params;
+ // There is no size information, so be conservative.
+ if (!type->isSized())
+ return false;
- typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
+ // If the alloc size is not equal to the storage size, then there are padding
+ // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
+ if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
+ return false;
- // ScalarizedElements - If we are promoting a pointer that has elements
- // accessed out of it, keep track of which elements are accessed so that we
- // can add one argument for each.
- //
- // Arguments that are directly loaded will have a zero element value here, to
- // handle cases where there are both a direct load and GEP accesses.
- //
- std::map<Argument*, ScalarizeTable> ScalarizedElements;
+ if (!isa<CompositeType>(type))
+ return true;
- // OriginalLoads - Keep track of a representative load instruction from the
- // original function so that we can tell the alias analysis implementation
- // what the new GEP/Load instructions we are inserting look like.
- // We need to keep the original loads for each argument and the elements
- // of the argument that are accessed.
- std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
+ // For homogenous sequential types, check for padding within members.
+ if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
+ return isDenselyPacked(seqTy->getElementType(), DL);
- // Attribute - Keep track of the parameter attributes for the arguments
- // that we are *not* promoting. For the ones that we do promote, the parameter
- // attributes are lost
- SmallVector<AttributeSet, 8> AttributesVec;
- const AttributeSet &PAL = F->getAttributes();
+ // Check for padding within and between elements of a struct.
+ StructType *StructTy = cast<StructType>(type);
+ const StructLayout *Layout = DL.getStructLayout(StructTy);
+ uint64_t StartPos = 0;
+ for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
+ Type *ElTy = StructTy->getElementType(i);
+ if (!isDenselyPacked(ElTy, DL))
+ return false;
+ if (StartPos != Layout->getElementOffsetInBits(i))
+ return false;
+ StartPos += DL.getTypeAllocSizeInBits(ElTy);
+ }
- // Add any return attributes.
- if (PAL.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(F->getContext(),
- PAL.getRetAttributes()));
+ return true;
+}
- // First, determine the new argument list
- unsigned ArgIndex = 1;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
- ++I, ++ArgIndex) {
- if (ByValArgsToTransform.count(&*I)) {
- // Simple byval argument? Just add all the struct element types.
- Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- StructType *STy = cast<StructType>(AgTy);
- Params.insert(Params.end(), STy->element_begin(), STy->element_end());
- ++NumByValArgsPromoted;
- } else if (!ArgsToPromote.count(&*I)) {
- // Unchanged argument
- Params.push_back(I->getType());
- AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
- if (attrs.hasAttributes(ArgIndex)) {
- AttrBuilder B(attrs, ArgIndex);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Params.size(), B));
- }
- } else if (I->use_empty()) {
- // Dead argument (which are always marked as promotable)
- ++NumArgumentsDead;
- } else {
- // Okay, this is being promoted. This means that the only uses are loads
- // or GEPs which are only used by loads
+/// \brief Checks if the padding bytes of an argument could be accessed.
+static bool canPaddingBeAccessed(Argument *arg) {
- // In this table, we will track which indices are loaded from the argument
- // (where direct loads are tracked as no indices).
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- for (User *U : I->users()) {
- Instruction *UI = cast<Instruction>(U);
- Type *SrcTy;
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
- SrcTy = L->getType();
- else
- SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
- IndicesVector Indices;
- Indices.reserve(UI->getNumOperands() - 1);
- // Since loads will only have a single operand, and GEPs only a single
- // non-index operand, this will record direct loads without any indices,
- // and gep+loads with the GEP indices.
- for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
- II != IE; ++II)
- Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
- // GEPs with a single 0 index can be merged with direct loads
- if (Indices.size() == 1 && Indices.front() == 0)
- Indices.clear();
- ArgIndices.insert(std::make_pair(SrcTy, Indices));
- LoadInst *OrigLoad;
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
- OrigLoad = L;
- else
- // Take any load, we will use it only to update Alias Analysis
- OrigLoad = cast<LoadInst>(UI->user_back());
- OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
- }
+ assert(arg->hasByValAttr());
- // Add a parameter to the function for each element passed in.
- for (const auto &ArgIndex : ArgIndices) {
- // not allowed to dereference ->begin() if size() is 0
- Params.push_back(GetElementPtrInst::getIndexedType(
- cast<PointerType>(I->getType()->getScalarType())->getElementType(),
- ArgIndex.second));
- assert(Params.back());
- }
+ // Track all the pointers to the argument to make sure they are not captured.
+ SmallPtrSet<Value *, 16> PtrValues;
+ PtrValues.insert(arg);
- if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
- ++NumArgumentsPromoted;
- else
- ++NumAggregatesPromoted;
+ // Track all of the stores.
+ SmallVector<StoreInst *, 16> Stores;
+
+ // Scan through the uses recursively to make sure the pointer is always used
+ // sanely.
+ SmallVector<Value *, 16> WorkList;
+ WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
+ while (!WorkList.empty()) {
+ Value *V = WorkList.back();
+ WorkList.pop_back();
+ if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
+ if (PtrValues.insert(V).second)
+ WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
+ } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
+ Stores.push_back(Store);
+ } else if (!isa<LoadInst>(V)) {
+ return true;
}
}
- // Add any function attributes.
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
- PAL.getFnAttributes()));
+ // Check to make sure the pointers aren't captured
+ for (StoreInst *Store : Stores)
+ if (PtrValues.count(Store->getValueOperand()))
+ return true;
- Type *RetTy = FTy->getReturnType();
+ return false;
+}
- // Construct the new function type using the new arguments.
- FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct). If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+static Function *
+promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
+ unsigned MaxElements,
+ Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
+ ReplaceCallSite) {
+ // Make sure that it is local to this module.
+ if (!F->hasLocalLinkage())
+ return nullptr;
- // Create the new function body and insert it into the module.
- Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
- NF->copyAttributesFrom(F);
+ // Don't promote arguments for variadic functions. Adding, removing, or
+ // changing non-pack parameters can change the classification of pack
+ // parameters. Frontends encode that classification at the call site in the
+ // IR, while in the callee the classification is determined dynamically based
+ // on the number of registers consumed so far.
+ if (F->isVarArg())
+ return nullptr;
- // Patch the pointer to LLVM function in debug info descriptor.
- NF->setSubprogram(F->getSubprogram());
- F->setSubprogram(nullptr);
+ // First check: see if there are any pointer arguments! If not, quick exit.
+ SmallVector<Argument *, 16> PointerArgs;
+ for (Argument &I : F->args())
+ if (I.getType()->isPointerTy())
+ PointerArgs.push_back(&I);
+ if (PointerArgs.empty())
+ return nullptr;
- DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
- << "From: " << *F);
-
- // Recompute the parameter attributes list based on the new arguments for
- // the function.
- NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
- AttributesVec.clear();
+ // Second check: make sure that all callers are direct callers. We can't
+ // transform functions that have indirect callers. Also see if the function
+ // is self-recursive.
+ bool isSelfRecursive = false;
+ for (Use &U : F->uses()) {
+ CallSite CS(U.getUser());
+ // Must be a direct call.
+ if (CS.getInstruction() == nullptr || !CS.isCallee(&U))
+ return nullptr;
- F->getParent()->getFunctionList().insert(F->getIterator(), NF);
- NF->takeName(F);
+ if (CS.getInstruction()->getParent()->getParent() == F)
+ isSelfRecursive = true;
+ }
- // Get a new callgraph node for NF.
- CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+ const DataLayout &DL = F->getParent()->getDataLayout();
- // Loop over all of the callers of the function, transforming the call sites
- // to pass in the loaded pointers.
- //
- SmallVector<Value*, 16> Args;
- while (!F->use_empty()) {
- CallSite CS(F->user_back());
- assert(CS.getCalledFunction() == F);
- Instruction *Call = CS.getInstruction();
- const AttributeSet &CallPAL = CS.getAttributes();
+ AAResults &AAR = AARGetter(*F);
- // Add any return attributes.
- if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(F->getContext(),
- CallPAL.getRetAttributes()));
+ // Check to see which arguments are promotable. If an argument is promotable,
+ // add it to ArgsToPromote.
+ SmallPtrSet<Argument *, 8> ArgsToPromote;
+ SmallPtrSet<Argument *, 8> ByValArgsToTransform;
+ for (Argument *PtrArg : PointerArgs) {
+ Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
- // Loop over the operands, inserting GEP and loads in the caller as
- // appropriate.
- CallSite::arg_iterator AI = CS.arg_begin();
- ArgIndex = 1;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++AI, ++ArgIndex)
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
- Args.push_back(*AI); // Unmodified argument
+ // Replace sret attribute with noalias. This reduces register pressure by
+ // avoiding a register copy.
+ if (PtrArg->hasStructRetAttr()) {
+ unsigned ArgNo = PtrArg->getArgNo();
+ F->setAttributes(
+ F->getAttributes()
+ .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
+ .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ for (Use &U : F->uses()) {
+ CallSite CS(U.getUser());
+ CS.setAttributes(
+ CS.getAttributes()
+ .removeAttribute(F->getContext(), ArgNo + 1,
+ Attribute::StructRet)
+ .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ }
+ }
- if (CallPAL.hasAttributes(ArgIndex)) {
- AttrBuilder B(CallPAL, ArgIndex);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
- }
- } else if (ByValArgsToTransform.count(&*I)) {
- // Emit a GEP and load for each element of the struct.
- Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(
- STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
- // TODO: Tell AA about the new values?
- Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+ // If this is a byval argument, and if the aggregate type is small, just
+ // pass the elements, which is always safe, if the passed value is densely
+ // packed or if we can prove the padding bytes are never accessed. This does
+ // not apply to inalloca.
+ bool isSafeToPromote =
+ PtrArg->hasByValAttr() &&
+ (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
+ if (isSafeToPromote) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+ if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
+ DEBUG(dbgs() << "argpromotion disable promoting argument '"
+ << PtrArg->getName()
+ << "' because it would require adding more"
+ << " than " << MaxElements
+ << " arguments to the function.\n");
+ continue;
}
- } else if (!I->use_empty()) {
- // Non-dead argument: insert GEPs and loads as appropriate.
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- // Store the Value* version of the indices in here, but declare it now
- // for reuse.
- std::vector<Value*> Ops;
- for (const auto &ArgIndex : ArgIndices) {
- Value *V = *AI;
- LoadInst *OrigLoad =
- OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
- if (!ArgIndex.second.empty()) {
- Ops.reserve(ArgIndex.second.size());
- Type *ElTy = V->getType();
- for (unsigned long II : ArgIndex.second) {
- // Use i32 to index structs, and i64 for others (pointers/arrays).
- // This satisfies GEP constraints.
- Type *IdxTy = (ElTy->isStructTy() ?
- Type::getInt32Ty(F->getContext()) :
- Type::getInt64Ty(F->getContext()));
- Ops.push_back(ConstantInt::get(IdxTy, II));
- // Keep track of the type we're currently indexing.
- if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
- ElTy = ElPTy->getElementType();
- else
- ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
- }
- // And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
- V->getName() + ".idx", Call);
- Ops.clear();
+
+ // If all the elements are single-value types, we can promote it.
+ bool AllSimple = true;
+ for (const auto *EltTy : STy->elements()) {
+ if (!EltTy->isSingleValueType()) {
+ AllSimple = false;
+ break;
}
- // Since we're replacing a load make sure we take the alignment
- // of the previous load.
- LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
- newLoad->setAlignment(OrigLoad->getAlignment());
- // Transfer the AA info too.
- AAMDNodes AAInfo;
- OrigLoad->getAAMetadata(AAInfo);
- newLoad->setAAMetadata(AAInfo);
+ }
- Args.push_back(newLoad);
+ // Safe to transform, don't even bother trying to "promote" it.
+ // Passing the elements as a scalar will allow sroa to hack on
+ // the new alloca we introduce.
+ if (AllSimple) {
+ ByValArgsToTransform.insert(PtrArg);
+ continue;
}
}
+ }
- // Push any varargs arguments on the list.
- for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
- Args.push_back(*AI);
- if (CallPAL.hasAttributes(ArgIndex)) {
- AttrBuilder B(CallPAL, ArgIndex);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ // If the argument is a recursive type and we're in a recursive
+ // function, we could end up infinitely peeling the function argument.
+ if (isSelfRecursive) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+ bool RecursiveType = false;
+ for (const auto *EltTy : STy->elements()) {
+ if (EltTy == PtrArg->getType()) {
+ RecursiveType = true;
+ break;
+ }
+ }
+ if (RecursiveType)
+ continue;
}
}
- // Add any function attributes.
- if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(Call->getContext(),
- CallPAL.getFnAttributes()));
+ // Otherwise, see if we can promote the pointer to its value.
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
+ MaxElements))
+ ArgsToPromote.insert(PtrArg);
+ }
+
+ // No promotable pointer arguments.
+ if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
+ return nullptr;
- SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
+}
- Instruction *New;
- if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call);
- cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
- AttributesVec));
- } else {
- New = CallInst::Create(NF, Args, OpBundles, "", Call);
- cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
- AttributesVec));
- cast<CallInst>(New)->setTailCallKind(
- cast<CallInst>(Call)->getTailCallKind());
- }
- New->setDebugLoc(Call->getDebugLoc());
- Args.clear();
- AttributesVec.clear();
+PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG,
+ CGSCCUpdateResult &UR) {
+ bool Changed = false, LocalChange;
- // Update the callgraph to know that the callsite has been transformed.
- CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
- CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
+ // Iterate until we stop promoting from this SCC.
+ do {
+ LocalChange = false;
- if (!Call->use_empty()) {
- Call->replaceAllUsesWith(New);
- New->takeName(Call);
+ for (LazyCallGraph::Node &N : C) {
+ Function &OldF = N.getFunction();
+
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+ // FIXME: This lambda must only be used with this function. We should
+ // skip the lambda and just get the AA results directly.
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ assert(&F == &OldF && "Called with an unexpected function!");
+ return FAM.getResult<AAManager>(F);
+ };
+
+ Function *NewF = promoteArguments(&OldF, AARGetter, 3u, None);
+ if (!NewF)
+ continue;
+ LocalChange = true;
+
+ // Directly substitute the functions in the call graph. Note that this
+ // requires the old function to be completely dead and completely
+ // replaced by the new function. It does no call graph updates, it merely
+ // swaps out the particular function mapped to a particular node in the
+ // graph.
+ C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
+ OldF.eraseFromParent();
}
- // Finally, remove the old call from the program, reducing the use-count of
- // F.
- Call->eraseFromParent();
- }
-
- // Since we have now created the new function, splice the body of the old
- // function right into the new function, leaving the old rotting hulk of the
- // function empty.
- NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+ Changed |= LocalChange;
+ } while (LocalChange);
- // Loop over the argument list, transferring uses of the old arguments over to
- // the new arguments, also transferring over the names as well.
- //
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
- I2 = NF->arg_begin(); I != E; ++I) {
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
- // If this is an unmodified argument, move the name and users over to the
- // new version.
- I->replaceAllUsesWith(&*I2);
- I2->takeName(&*I);
- ++I2;
- continue;
- }
+ if (!Changed)
+ return PreservedAnalyses::all();
- if (ByValArgsToTransform.count(&*I)) {
- // In the callee, we create an alloca, and store each of the new incoming
- // arguments into the alloca.
- Instruction *InsertPt = &NF->begin()->front();
+ return PreservedAnalyses::none();
+}
- // Just add all the struct element types.
- Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
+namespace {
+/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+///
+struct ArgPromotion : public CallGraphSCCPass {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ getAAResultsAnalysisUsage(AU);
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(
- AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
- InsertPt);
- I2->setName(I->getName()+"."+Twine(i));
- new StoreInst(&*I2++, Idx, InsertPt);
- }
+ bool runOnSCC(CallGraphSCC &SCC) override;
+ static char ID; // Pass identification, replacement for typeid
+ explicit ArgPromotion(unsigned MaxElements = 3)
+ : CallGraphSCCPass(ID), MaxElements(MaxElements) {
+ initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+ }
- // Anything that used the arg should now use the alloca.
- I->replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(&*I);
+private:
+ using llvm::Pass::doInitialization;
+ bool doInitialization(CallGraph &CG) override;
+ /// The maximum number of elements to expand, or 0 for unlimited.
+ unsigned MaxElements;
+};
+}
- // If the alloca is used in a call, we must clear the tail flag since
- // the callee now uses an alloca from the caller.
- for (User *U : TheAlloca->users()) {
- CallInst *Call = dyn_cast<CallInst>(U);
- if (!Call)
- continue;
- Call->setTailCall(false);
- }
- continue;
- }
+char ArgPromotion::ID = 0;
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
- if (I->use_empty())
- continue;
+Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) {
+ return new ArgPromotion(MaxElements);
+}
- // Otherwise, if we promoted this argument, then all users are load
- // instructions (or GEPs with only load users), and all loads should be
- // using the new argument that we added.
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+ if (skipSCC(SCC))
+ return false;
- while (!I->use_empty()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
- assert(ArgIndices.begin()->second.empty() &&
- "Load element should sort to front!");
- I2->setName(I->getName()+".val");
- LI->replaceAllUsesWith(&*I2);
- LI->eraseFromParent();
- DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
- << "' in function '" << F->getName() << "'\n");
- } else {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
- IndicesVector Operands;
- Operands.reserve(GEP->getNumIndices());
- for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
- II != IE; ++II)
- Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // Get the callgraph information that we need to update to reflect our
+ // changes.
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- // GEPs with a single 0 index can be merged with direct loads
- if (Operands.size() == 1 && Operands.front() == 0)
- Operands.clear();
+ LegacyAARGetter AARGetter(*this);
- Function::arg_iterator TheArg = I2;
- for (ScalarizeTable::iterator It = ArgIndices.begin();
- It->second != Operands; ++It, ++TheArg) {
- assert(It != ArgIndices.end() && "GEP not handled??");
- }
+ bool Changed = false, LocalChange;
- std::string NewName = I->getName();
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
- NewName += "." + utostr(Operands[i]);
- }
- NewName += ".val";
- TheArg->setName(NewName);
+ // Iterate until we stop promoting from this SCC.
+ do {
+ LocalChange = false;
+ // Attempt to promote arguments from all functions in this SCC.
+ for (CallGraphNode *OldNode : SCC) {
+ Function *OldF = OldNode->getFunction();
+ if (!OldF)
+ continue;
+
+ auto ReplaceCallSite = [&](CallSite OldCS, CallSite NewCS) {
+ Function *Caller = OldCS.getInstruction()->getParent()->getParent();
+ CallGraphNode *NewCalleeNode =
+ CG.getOrInsertFunction(NewCS.getCalledFunction());
+ CallGraphNode *CallerNode = CG[Caller];
+ CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
+ };
+
+ if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements,
+ {ReplaceCallSite})) {
+ LocalChange = true;
- DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
- << "' of function '" << NF->getName() << "'\n");
+ // Update the call graph for the newly promoted function.
+ CallGraphNode *NewNode = CG.getOrInsertFunction(NewF);
+ NewNode->stealCalledFunctionsFrom(OldNode);
+ if (OldNode->getNumReferences() == 0)
+ delete CG.removeFunctionFromModule(OldNode);
+ else
+ OldF->setLinkage(Function::ExternalLinkage);
- // All of the uses must be load instructions. Replace them all with
- // the argument specified by ArgNo.
- while (!GEP->use_empty()) {
- LoadInst *L = cast<LoadInst>(GEP->user_back());
- L->replaceAllUsesWith(&*TheArg);
- L->eraseFromParent();
- }
- GEP->eraseFromParent();
+ // And updat ethe SCC we're iterating as well.
+ SCC.ReplaceNode(OldNode, NewNode);
}
}
+ // Remember that we changed something.
+ Changed |= LocalChange;
+ } while (LocalChange);
- // Increment I2 past all of the arguments added for this promoted pointer.
- std::advance(I2, ArgIndices.size());
- }
-
- NF_CGN->stealCalledFunctionsFrom(CG[F]);
-
- // Now that the old function is dead, delete it. If there is a dangling
- // reference to the CallgraphNode, just leave the dead function around for
- // someone else to nuke.
- CallGraphNode *CGN = CG[F];
- if (CGN->getNumReferences() == 0)
- delete CG.removeFunctionFromModule(CGN);
- else
- F->setLinkage(Function::ExternalLinkage);
-
- return NF_CGN;
+ return Changed;
}
bool ArgPromotion::doInitialization(CallGraph &CG) {
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index d75ed206ad23..62b5a9c9ba26 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -60,6 +60,23 @@ static bool IsBetterCanonical(const GlobalVariable &A,
return A.hasGlobalUnnamedAddr();
}
+static bool hasMetadataOtherThanDebugLoc(const GlobalVariable *GV) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ GV->getAllMetadata(MDs);
+ for (const auto &V : MDs)
+ if (V.first != LLVMContext::MD_dbg)
+ return true;
+ return false;
+}
+
+static void copyDebugLocMetadata(const GlobalVariable *From,
+ GlobalVariable *To) {
+ SmallVector<DIGlobalVariableExpression *, 1> MDs;
+ From->getDebugInfo(MDs);
+ for (auto MD : MDs)
+ To->addDebugInfo(MD);
+}
+
static unsigned getAlignment(GlobalVariable *GV) {
unsigned Align = GV->getAlignment();
if (Align)
@@ -113,6 +130,10 @@ static bool mergeConstants(Module &M) {
if (GV->isWeakForLinker())
continue;
+ // Don't touch globals with metadata other then !dbg.
+ if (hasMetadataOtherThanDebugLoc(GV))
+ continue;
+
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
@@ -155,6 +176,9 @@ static bool mergeConstants(Module &M) {
if (!Slot->hasGlobalUnnamedAddr() && !GV->hasGlobalUnnamedAddr())
continue;
+ if (hasMetadataOtherThanDebugLoc(GV))
+ continue;
+
if (!GV->hasGlobalUnnamedAddr())
Slot->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
@@ -178,6 +202,8 @@ static bool mergeConstants(Module &M) {
getAlignment(Replacements[i].second)));
}
+ copyDebugLocMetadata(Replacements[i].first, Replacements[i].second);
+
// Eliminate any uses of the dead global.
Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index ba2e60dee3bc..1b111de06157 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -98,8 +98,11 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
LLVMContext &Ctx = M.getContext();
Constant *C = M.getOrInsertFunction(
"__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
- Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx), nullptr);
+ Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
Function *F = dyn_cast<Function>(C);
+ // Take over the existing function. The frontend emits a weak stub so that the
+ // linker knows about the symbol; this pass replaces the function body.
+ F->deleteBody();
F->setAlignment(4096);
auto args = F->arg_begin();
Value &CallSiteTypeId = *(args++);
@@ -117,7 +120,7 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
IRBuilder<> IRBFail(TrapBB);
Constant *CFICheckFailFn = M.getOrInsertFunction(
"__cfi_check_fail", Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx),
- Type::getInt8PtrTy(Ctx), nullptr);
+ Type::getInt8PtrTy(Ctx));
IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr});
IRBFail.CreateBr(ExitBB);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 1a5ed4692211..375b74c494d9 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -166,41 +166,43 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
// Drop any attributes that were on the vararg arguments.
- AttributeSet PAL = CS.getAttributes();
+ AttributeList PAL = CS.getAttributes();
if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) {
- SmallVector<AttributeSet, 8> AttributesVec;
+ SmallVector<AttributeList, 8> AttributesVec;
for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i)
AttributesVec.push_back(PAL.getSlotAttributes(i));
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(Fn.getContext(),
- PAL.getFnAttributes()));
- PAL = AttributeSet::get(Fn.getContext(), AttributesVec);
+ if (PAL.hasAttributes(AttributeList::FunctionIndex))
+ AttributesVec.push_back(AttributeList::get(Fn.getContext(),
+ AttributeList::FunctionIndex,
+ PAL.getFnAttributes()));
+ PAL = AttributeList::get(Fn.getContext(), AttributesVec);
}
SmallVector<OperandBundleDef, 1> OpBundles;
CS.getOperandBundlesAsDefs(OpBundles);
- Instruction *New;
+ CallSite NewCS;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call);
- cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(PAL);
+ NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", Call);
} else {
- New = CallInst::Create(NF, Args, OpBundles, "", Call);
- cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(PAL);
- cast<CallInst>(New)->setTailCallKind(
- cast<CallInst>(Call)->getTailCallKind());
+ NewCS = CallInst::Create(NF, Args, OpBundles, "", Call);
+ cast<CallInst>(NewCS.getInstruction())
+ ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
}
- New->setDebugLoc(Call->getDebugLoc());
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(PAL);
+ NewCS->setDebugLoc(Call->getDebugLoc());
+ uint64_t W;
+ if (Call->extractProfTotalWeight(W))
+ NewCS->setProfWeight(W);
Args.clear();
if (!Call->use_empty())
- Call->replaceAllUsesWith(New);
+ Call->replaceAllUsesWith(NewCS.getInstruction());
- New->takeName(Call);
+ NewCS->takeName(Call);
// Finally, remove the old call from the program, reducing the use-count of
// F.
@@ -681,8 +683,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
bool HasLiveReturnedArg = false;
// Set up to build a new list of parameter attributes.
- SmallVector<AttributeSet, 8> AttributesVec;
- const AttributeSet &PAL = F->getAttributes();
+ SmallVector<AttributeSet, 8> ArgAttrVec;
+ const AttributeList &PAL = F->getAttributes();
// Remember which arguments are still alive.
SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
@@ -696,16 +698,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (LiveValues.erase(Arg)) {
Params.push_back(I->getType());
ArgAlive[i] = true;
-
- // Get the original parameter attributes (skipping the first one, that is
- // for the return value.
- if (PAL.hasAttributes(i + 1)) {
- AttrBuilder B(PAL, i + 1);
- if (B.contains(Attribute::Returned))
- HasLiveReturnedArg = true;
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Params.size(), B));
- }
+ ArgAttrVec.push_back(PAL.getParamAttributes(i));
+ HasLiveReturnedArg |= PAL.hasParamAttribute(i, Attribute::Returned);
} else {
++NumArgumentsEliminated;
DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument " << i
@@ -779,30 +773,24 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
assert(NRetTy && "No new return type found?");
// The existing function return attributes.
- AttributeSet RAttrs = PAL.getRetAttributes();
+ AttrBuilder RAttrs(PAL.getRetAttributes());
// Remove any incompatible attributes, but only if we removed all return
// values. Otherwise, ensure that we don't have any conflicting attributes
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
if (NRetTy->isVoidTy())
- RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
- AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NRetTy));
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy));
else
- assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
- overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
+ assert(!RAttrs.overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
"Return attributes no longer compatible?");
- if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs));
-
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(F->getContext(),
- PAL.getFnAttributes()));
+ AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
// Reconstruct the AttributesList based on the vector we constructed.
- AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec);
+ assert(ArgAttrVec.size() == Params.size());
+ AttributeList NewPAL = AttributeList::get(
+ F->getContext(), PAL.getFnAttributes(), RetAttrs, ArgAttrVec);
// Create the new function type based on the recomputed parameters.
FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
@@ -829,18 +817,14 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
CallSite CS(F->user_back());
Instruction *Call = CS.getInstruction();
- AttributesVec.clear();
- const AttributeSet &CallPAL = CS.getAttributes();
-
- // The call return attributes.
- AttributeSet RAttrs = CallPAL.getRetAttributes();
+ ArgAttrVec.clear();
+ const AttributeList &CallPAL = CS.getAttributes();
- // Adjust in case the function was changed to return void.
- RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
- AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NF->getReturnType()));
- if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
+ // Adjust the call return attributes in case the function was changed to
+ // return void.
+ AttrBuilder RAttrs(CallPAL.getRetAttributes());
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy));
+ AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
// Declare these outside of the loops, so we can reuse them for the second
// loop, which loops the varargs.
@@ -852,57 +836,55 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[i]) {
Args.push_back(*I);
// Get original parameter attributes, but skip return attributes.
- if (CallPAL.hasAttributes(i + 1)) {
- AttrBuilder B(CallPAL, i + 1);
+ AttributeSet Attrs = CallPAL.getParamAttributes(i);
+ if (NRetTy != RetTy && Attrs.hasAttribute(Attribute::Returned)) {
// If the return type has changed, then get rid of 'returned' on the
// call site. The alternative is to make all 'returned' attributes on
// call sites keep the return value alive just like 'returned'
- // attributes on function declaration but it's less clearly a win
- // and this is not an expected case anyway
- if (NRetTy != RetTy && B.contains(Attribute::Returned))
- B.removeAttribute(Attribute::Returned);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ // attributes on function declaration but it's less clearly a win and
+ // this is not an expected case anyway
+ ArgAttrVec.push_back(AttributeSet::get(
+ F->getContext(),
+ AttrBuilder(Attrs).removeAttribute(Attribute::Returned)));
+ } else {
+ // Otherwise, use the original attributes.
+ ArgAttrVec.push_back(Attrs);
}
}
// Push any varargs arguments on the list. Don't forget their attributes.
for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
Args.push_back(*I);
- if (CallPAL.hasAttributes(i + 1)) {
- AttrBuilder B(CallPAL, i + 1);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
- }
+ ArgAttrVec.push_back(CallPAL.getParamAttributes(i));
}
- if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(Call->getContext(),
- CallPAL.getFnAttributes()));
-
// Reconstruct the AttributesList based on the vector we constructed.
- AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec);
+ assert(ArgAttrVec.size() == Args.size());
+ AttributeList NewCallPAL = AttributeList::get(
+ F->getContext(), CallPAL.getFnAttributes(), RetAttrs, ArgAttrVec);
SmallVector<OperandBundleDef, 1> OpBundles;
CS.getOperandBundlesAsDefs(OpBundles);
- Instruction *New;
+ CallSite NewCS;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call->getParent());
- cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+ NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", Call->getParent());
} else {
- New = CallInst::Create(NF, Args, OpBundles, "", Call);
- cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(NewCallPAL);
- cast<CallInst>(New)->setTailCallKind(
- cast<CallInst>(Call)->getTailCallKind());
+ NewCS = CallInst::Create(NF, Args, OpBundles, "", Call);
+ cast<CallInst>(NewCS.getInstruction())
+ ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
}
- New->setDebugLoc(Call->getDebugLoc());
-
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(NewCallPAL);
+ NewCS->setDebugLoc(Call->getDebugLoc());
+ uint64_t W;
+ if (Call->extractProfTotalWeight(W))
+ NewCS->setProfWeight(W);
Args.clear();
+ ArgAttrVec.clear();
+ Instruction *New = NewCS.getInstruction();
if (!Call->use_empty()) {
if (New->getType() == Call->getType()) {
// Return type not changed? Just replace users then.
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 402a66552c24..4d13b3f40688 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -49,31 +49,35 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
-namespace {
-typedef SmallSetVector<Function *, 8> SCCNodeSet;
-}
+// FIXME: This is disabled by default to avoid exposing security vulnerabilities
+// in C/C++ code compiled by clang:
+// http://lists.llvm.org/pipermail/cfe-dev/2017-January/052066.html
+static cl::opt<bool> EnableNonnullArgPropagation(
+ "enable-nonnull-arg-prop", cl::Hidden,
+ cl::desc("Try to propagate nonnull argument attributes from callsites to "
+ "caller functions."));
namespace {
-/// The three kinds of memory access relevant to 'readonly' and
-/// 'readnone' attributes.
-enum MemoryAccessKind {
- MAK_ReadNone = 0,
- MAK_ReadOnly = 1,
- MAK_MayWrite = 2
-};
+typedef SmallSetVector<Function *, 8> SCCNodeSet;
}
-static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
+/// Returns the memory access attribute for function F using AAR for AA results,
+/// where SCCNodes is the current SCC.
+///
+/// If ThisBody is true, this function may examine the function body and will
+/// return a result pertaining to this copy of the function. If it is false, the
+/// result will be based only on AA results for the function declaration; it
+/// will be assumed that some other (perhaps less optimized) version of the
+/// function may be selected at link time.
+static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
+ AAResults &AAR,
const SCCNodeSet &SCCNodes) {
FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
if (MRB == FMRB_DoesNotAccessMemory)
// Already perfect!
return MAK_ReadNone;
- // Non-exact function definitions may not be selected at link time, and an
- // alternative version that writes to memory may be selected. See the comment
- // on GlobalValue::isDefinitionExact for more details.
- if (!F.hasExactDefinition()) {
+ if (!ThisBody) {
if (AliasAnalysis::onlyReadsMemory(MRB))
return MAK_ReadOnly;
@@ -172,9 +176,14 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
}
+MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F,
+ AAResults &AAR) {
+ return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {});
+}
+
/// Deduce readonly/readnone attributes for the SCC.
template <typename AARGetterT>
-static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
+static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
// Check if any of the functions in the SCC read or write memory. If they
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
@@ -182,7 +191,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
// Call the callable parameter to look up AA results for this function.
AAResults &AAR = AARGetter(*F);
- switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) {
+ // Non-exact function definitions may not be selected at link time, and an
+ // alternative version that writes to memory may be selected. See the
+ // comment on GlobalValue::isDefinitionExact for more details.
+ switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(),
+ AAR, SCCNodes)) {
case MAK_MayWrite:
return false;
case MAK_ReadOnly:
@@ -212,11 +225,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
AttrBuilder B;
B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
F->removeAttributes(
- AttributeSet::FunctionIndex,
- AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B));
+ AttributeList::FunctionIndex,
+ AttributeList::get(F->getContext(), AttributeList::FunctionIndex, B));
// Add in the new attribute.
- F->addAttribute(AttributeSet::FunctionIndex,
+ F->addAttribute(AttributeList::FunctionIndex,
ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
if (ReadsMemory)
@@ -522,7 +535,7 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
if (Value *RetArg = FindRetArg()) {
auto *A = cast<Argument>(RetArg);
- A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
++NumReturned;
Changed = true;
}
@@ -531,6 +544,49 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
return Changed;
}
+/// If a callsite has arguments that are also arguments to the parent function,
+/// try to propagate attributes from the callsite's arguments to the parent's
+/// arguments. This may be important because inlining can cause information loss
+/// when attribute knowledge disappears with the inlined call.
+static bool addArgumentAttrsFromCallsites(Function &F) {
+ if (!EnableNonnullArgPropagation)
+ return false;
+
+ bool Changed = false;
+
+ // For an argument attribute to transfer from a callsite to the parent, the
+ // call must be guaranteed to execute every time the parent is called.
+ // Conservatively, just check for calls in the entry block that are guaranteed
+ // to execute.
+ // TODO: This could be enhanced by testing if the callsite post-dominates the
+ // entry block or by doing simple forward walks or backward walks to the
+ // callsite.
+ BasicBlock &Entry = F.getEntryBlock();
+ for (Instruction &I : Entry) {
+ if (auto CS = CallSite(&I)) {
+ if (auto *CalledFunc = CS.getCalledFunction()) {
+ for (auto &CSArg : CalledFunc->args()) {
+ if (!CSArg.hasNonNullAttr())
+ continue;
+
+ // If the non-null callsite argument operand is an argument to 'F'
+ // (the caller) and the call is guaranteed to execute, then the value
+ // must be non-null throughout 'F'.
+ auto *FArg = dyn_cast<Argument>(CS.getArgOperand(CSArg.getArgNo()));
+ if (FArg && !FArg->hasNonNullAttr()) {
+ FArg->addAttr(Attribute::NonNull);
+ Changed = true;
+ }
+ }
+ }
+ }
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ break;
+ }
+
+ return Changed;
+}
+
/// Deduce nocapture attributes for the SCC.
static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
@@ -549,6 +605,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (!F->hasExactDefinition())
continue;
+ Changed |= addArgumentAttrsFromCallsites(*F);
+
// Functions that are readonly (or readnone) and nounwind and don't return
// a value can't capture arguments. Don't analyze them.
if (F->onlyReadsMemory() && F->doesNotThrow() &&
@@ -556,7 +614,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
- A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
}
@@ -576,7 +634,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
A->addAttr(
- AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
} else {
@@ -604,7 +662,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (R != Attribute::None) {
AttrBuilder B;
B.addAttribute(R);
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
Changed = true;
R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
}
@@ -629,7 +687,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (ArgumentSCC[0]->Uses.size() == 1 &&
ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
Argument *A = ArgumentSCC[0]->Definition;
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
}
@@ -671,7 +729,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
}
@@ -708,8 +766,9 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
// Clear out existing readonly/readnone attributes
- A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R));
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->removeAttr(
+ AttributeList::get(A->getContext(), A->getArgNo() + 1, R));
+ A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
Changed = true;
}
@@ -769,7 +828,7 @@ static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
case Instruction::Call:
case Instruction::Invoke: {
CallSite CS(RVI);
- if (CS.paramHasAttr(0, Attribute::NoAlias))
+ if (CS.hasRetAttr(Attribute::NoAlias))
break;
if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
break;
@@ -905,7 +964,7 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
// pointers.
for (Function *F : SCCNodes) {
// Already nonnull.
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::NonNull))
continue;
@@ -926,7 +985,7 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
// Mark the function eagerly since we may discover a function
// which prevents us from speculating about the entire SCC
DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n");
- F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
++NumNonNullReturn;
MadeChange = true;
}
@@ -939,13 +998,13 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
if (SCCReturnsNonNull) {
for (Function *F : SCCNodes) {
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::NonNull) ||
!F->getReturnType()->isPointerTy())
continue;
DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
- F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
++NumNonNullReturn;
MadeChange = true;
}
@@ -1163,19 +1222,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
if (skipSCC(SCC))
return false;
-
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
- auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
- };
-
- return runImpl(SCC, AARGetter);
+ return runImpl(SCC, LegacyAARGetter(*this));
}
namespace {
@@ -1275,16 +1322,9 @@ PreservedAnalyses
ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &CG = AM.getResult<CallGraphAnalysis>(M);
- bool Changed = deduceFunctionAttributeInRPO(M, CG);
-
- // CallGraphAnalysis holds AssertingVH and must be invalidated eagerly so
- // that other passes don't delete stuff from under it.
- // FIXME: We need to invalidate this to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<CallGraphAnalysis>(M);
-
- if (!Changed)
+ if (!deduceFunctionAttributeInRPO(M, CG))
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
PA.preserve<CallGraphAnalysis>();
return PA;
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 6b32f6c31f72..d66411f04cc4 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -75,12 +75,6 @@ static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
cl::desc("Compute dead symbols"));
-// Temporary allows the function import pass to disable always linking
-// referenced discardable symbols.
-static cl::opt<bool>
- DontForceImportReferencedDiscardableSymbols("disable-force-link-odr",
- cl::init(false), cl::Hidden);
-
static cl::opt<bool> EnableImportMetadata(
"enable-import-metadata", cl::init(
#if !defined(NDEBUG)
@@ -124,7 +118,7 @@ namespace {
static const GlobalValueSummary *
selectCallee(const ModuleSummaryIndex &Index,
const GlobalValueSummaryList &CalleeSummaryList,
- unsigned Threshold) {
+ unsigned Threshold, StringRef CallerModulePath) {
auto It = llvm::find_if(
CalleeSummaryList,
[&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
@@ -145,6 +139,21 @@ selectCallee(const ModuleSummaryIndex &Index,
auto *Summary = cast<FunctionSummary>(GVSummary);
+ // If this is a local function, make sure we import the copy
+ // in the caller's module. The only time a local function can
+ // share an entry in the index is if there is a local with the same name
+ // in another module that had the same source file name (in a different
+ // directory), where each was compiled in their own directory so there
+ // was not distinguishing path.
+ // However, do the import from another module if there is only one
+ // entry in the list - in that case this must be a reference due
+ // to indirect call profile data, since a function pointer can point to
+ // a local in another module.
+ if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
+ CalleeSummaryList.size() > 1 &&
+ Summary->modulePath() != CallerModulePath)
+ return false;
+
if (Summary->instCount() > Threshold)
return false;
@@ -163,11 +172,13 @@ selectCallee(const ModuleSummaryIndex &Index,
/// null if there's no match.
static const GlobalValueSummary *selectCallee(GlobalValue::GUID GUID,
unsigned Threshold,
- const ModuleSummaryIndex &Index) {
+ const ModuleSummaryIndex &Index,
+ StringRef CallerModulePath) {
auto CalleeSummaryList = Index.findGlobalValueSummaryList(GUID);
if (CalleeSummaryList == Index.end())
return nullptr; // This function does not have a summary
- return selectCallee(Index, CalleeSummaryList->second, Threshold);
+ return selectCallee(Index, CalleeSummaryList->second, Threshold,
+ CallerModulePath);
}
using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */,
@@ -186,6 +197,15 @@ static void computeImportForFunction(
auto GUID = Edge.first.getGUID();
DEBUG(dbgs() << " edge -> " << GUID << " Threshold:" << Threshold << "\n");
+ if (Index.findGlobalValueSummaryList(GUID) == Index.end()) {
+ // For SamplePGO, the indirect call targets for local functions will
+ // have its original name annotated in profile. We try to find the
+ // corresponding PGOFuncName as the GUID.
+ GUID = Index.getGUIDFromOriginalID(GUID);
+ if (GUID == 0)
+ continue;
+ }
+
if (DefinedGVSummaries.count(GUID)) {
DEBUG(dbgs() << "ignored! Target already in destination module.\n");
continue;
@@ -202,7 +222,8 @@ static void computeImportForFunction(
const auto NewThreshold =
Threshold * GetBonusMultiplier(Edge.second.Hotness);
- auto *CalleeSummary = selectCallee(GUID, NewThreshold, Index);
+ auto *CalleeSummary =
+ selectCallee(GUID, NewThreshold, Index, Summary.modulePath());
if (!CalleeSummary) {
DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n");
continue;
@@ -522,6 +543,23 @@ llvm::EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename,
/// Fixup WeakForLinker linkages in \p TheModule based on summary analysis.
void llvm::thinLTOResolveWeakForLinkerModule(
Module &TheModule, const GVSummaryMapTy &DefinedGlobals) {
+ auto ConvertToDeclaration = [](GlobalValue &GV) {
+ DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName() << "\n");
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ F->deleteBody();
+ F->clearMetadata();
+ } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
+ V->setInitializer(nullptr);
+ V->setLinkage(GlobalValue::ExternalLinkage);
+ V->clearMetadata();
+ } else
+ // For now we don't resolve or drop aliases. Once we do we'll
+ // need to add support here for creating either a function or
+ // variable declaration, and return the new GlobalValue* for
+ // the caller to use.
+ llvm_unreachable("Expected function or variable");
+ };
+
auto updateLinkage = [&](GlobalValue &GV) {
if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
return;
@@ -532,18 +570,25 @@ void llvm::thinLTOResolveWeakForLinkerModule(
auto NewLinkage = GS->second->linkage();
if (NewLinkage == GV.getLinkage())
return;
- DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
- << GV.getLinkage() << " to " << NewLinkage << "\n");
- GV.setLinkage(NewLinkage);
- // Remove functions converted to available_externally from comdats,
+ // Check for a non-prevailing def that has interposable linkage
+ // (e.g. non-odr weak or linkonce). In that case we can't simply
+ // convert to available_externally, since it would lose the
+ // interposable property and possibly get inlined. Simply drop
+ // the definition in that case.
+ if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
+ GlobalValue::isInterposableLinkage(GV.getLinkage()))
+ ConvertToDeclaration(GV);
+ else {
+ DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
+ << GV.getLinkage() << " to " << NewLinkage << "\n");
+ GV.setLinkage(NewLinkage);
+ }
+ // Remove declarations from comdats, including available_externally
// as this is a declaration for the linker, and will be dropped eventually.
// It is illegal for comdats to contain declarations.
auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
- if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
- assert(GO->hasAvailableExternallyLinkage() &&
- "Expected comdat on definition (possibly available external)");
+ if (GO && GO->isDeclarationForLinker() && GO->hasComdat())
GO->setComdat(nullptr);
- }
};
// Process functions and global now
@@ -562,7 +607,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
// the current module.
StringSet<> AsmUndefinedRefs;
ModuleSymbolTable::CollectAsmSymbols(
- Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
+ TheModule,
[&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
if (Flags & object::BasicSymbolRef::SF_Undefined)
AsmUndefinedRefs.insert(Name);
@@ -617,14 +662,12 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
// index.
//
Expected<bool> FunctionImporter::importFunctions(
- Module &DestModule, const FunctionImporter::ImportMapTy &ImportList,
- bool ForceImportReferencedDiscardableSymbols) {
+ Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
DEBUG(dbgs() << "Starting import for Module "
<< DestModule.getModuleIdentifier() << "\n");
unsigned ImportedCount = 0;
- // Linker that will be used for importing function
- Linker TheLinker(DestModule);
+ IRMover Mover(DestModule);
// Do the actual import of functions now, one Module at a time
std::set<StringRef> ModuleNameOrderedList;
for (auto &FunctionsToImportPerModule : ImportList) {
@@ -648,7 +691,7 @@ Expected<bool> FunctionImporter::importFunctions(
auto &ImportGUIDs = FunctionsToImportPerModule->second;
// Find the globals to import
- DenseSet<const GlobalValue *> GlobalsToImport;
+ SetVector<GlobalValue *> GlobalsToImport;
for (Function &F : *SrcModule) {
if (!F.hasName())
continue;
@@ -687,6 +730,13 @@ Expected<bool> FunctionImporter::importFunctions(
}
}
for (GlobalAlias &GA : SrcModule->aliases()) {
+ // FIXME: This should eventually be controlled entirely by the summary.
+ if (FunctionImportGlobalProcessing::doImportAsDefinition(
+ &GA, &GlobalsToImport)) {
+ GlobalsToImport.insert(&GA);
+ continue;
+ }
+
if (!GA.hasName())
continue;
auto GUID = GA.getGUID();
@@ -731,12 +781,9 @@ Expected<bool> FunctionImporter::importFunctions(
<< " from " << SrcModule->getSourceFileName() << "\n";
}
- // Instruct the linker that the client will take care of linkonce resolution
- unsigned Flags = Linker::Flags::None;
- if (!ForceImportReferencedDiscardableSymbols)
- Flags |= Linker::Flags::DontForceLinkLinkonceODR;
-
- if (TheLinker.linkInModule(std::move(SrcModule), Flags, &GlobalsToImport))
+ if (Mover.move(std::move(SrcModule), GlobalsToImport.getArrayRef(),
+ [](GlobalValue &, IRMover::ValueAdder) {},
+ /*IsPerformingImport=*/true))
report_fatal_error("Function Import: link error");
ImportedCount += GlobalsToImport.size();
@@ -796,8 +843,7 @@ static bool doImportingForModule(Module &M) {
return loadFile(Identifier, M.getContext());
};
FunctionImporter Importer(*Index, ModuleLoader);
- Expected<bool> Result = Importer.importFunctions(
- M, ImportList, !DontForceImportReferencedDiscardableSymbols);
+ Expected<bool> Result = Importer.importFunctions(M, ImportList);
// FIXME: Probably need to propagate Errors through the pass manager.
if (!Result) {
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 7a04de3d12db..c91e8b454927 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -25,7 +25,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include <unordered_map>
+
using namespace llvm;
#define DEBUG_TYPE "globaldce"
@@ -50,7 +50,14 @@ namespace {
if (skipModule(M))
return false;
+ // We need a minimally functional dummy module analysis manager. It needs
+ // to at least know about the possibility of proxying a function analysis
+ // manager.
+ FunctionAnalysisManager DummyFAM;
ModuleAnalysisManager DummyMAM;
+ DummyMAM.registerPass(
+ [&] { return FunctionAnalysisManagerModuleProxy(DummyFAM); });
+
auto PA = Impl.run(M, DummyMAM);
return !PA.areAllPreserved();
}
@@ -78,9 +85,67 @@ static bool isEmptyFunction(Function *F) {
return RI.getReturnValue() == nullptr;
}
-PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
+/// Compute the set of GlobalValue that depends from V.
+/// The recursion stops as soon as a GlobalValue is met.
+void GlobalDCEPass::ComputeDependencies(Value *V,
+ SmallPtrSetImpl<GlobalValue *> &Deps) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ Function *Parent = I->getParent()->getParent();
+ Deps.insert(Parent);
+ } else if (auto *GV = dyn_cast<GlobalValue>(V)) {
+ Deps.insert(GV);
+ } else if (auto *CE = dyn_cast<Constant>(V)) {
+ // Avoid walking the whole tree of a big ConstantExprs multiple times.
+ auto Where = ConstantDependenciesCache.find(CE);
+ if (Where != ConstantDependenciesCache.end()) {
+ auto const &K = Where->second;
+ Deps.insert(K.begin(), K.end());
+ } else {
+ SmallPtrSetImpl<GlobalValue *> &LocalDeps = ConstantDependenciesCache[CE];
+ for (User *CEUser : CE->users())
+ ComputeDependencies(CEUser, LocalDeps);
+ Deps.insert(LocalDeps.begin(), LocalDeps.end());
+ }
+ }
+}
+
+void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) {
+ SmallPtrSet<GlobalValue *, 8> Deps;
+ for (User *User : GV.users())
+ ComputeDependencies(User, Deps);
+ Deps.erase(&GV); // Remove self-reference.
+ for (GlobalValue *GVU : Deps) {
+ GVDependencies.insert(std::make_pair(GVU, &GV));
+ }
+}
+
+/// Mark Global value as Live
+void GlobalDCEPass::MarkLive(GlobalValue &GV,
+ SmallVectorImpl<GlobalValue *> *Updates) {
+ auto const Ret = AliveGlobals.insert(&GV);
+ if (!Ret.second)
+ return;
+
+ if (Updates)
+ Updates->push_back(&GV);
+ if (Comdat *C = GV.getComdat()) {
+ for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+ MarkLive(*CM.second, Updates); // Recursion depth is only two because only
+ // globals in the same comdat are visited.
+ }
+}
+
+PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
bool Changed = false;
+ // The algorithm first computes the set L of global variables that are
+ // trivially live. Then it walks the initialization of these variables to
+ // compute the globals used to initialize them, which effectively builds a
+ // directed graph where nodes are global variables, and an edge from A to B
+ // means B is used to initialize A. Finally, it propagates the liveness
+ // information through the graph starting from the nodes in L. Nodes note
+ // marked as alive are discarded.
+
// Remove empty functions from the global ctors list.
Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
@@ -103,21 +168,39 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
// initializer.
if (!GO.isDeclaration() && !GO.hasAvailableExternallyLinkage())
if (!GO.isDiscardableIfUnused())
- GlobalIsNeeded(&GO);
+ MarkLive(GO);
+
+ UpdateGVDependencies(GO);
}
+ // Compute direct dependencies of aliases.
for (GlobalAlias &GA : M.aliases()) {
Changed |= RemoveUnusedGlobalValue(GA);
// Externally visible aliases are needed.
if (!GA.isDiscardableIfUnused())
- GlobalIsNeeded(&GA);
+ MarkLive(GA);
+
+ UpdateGVDependencies(GA);
}
+ // Compute direct dependencies of ifuncs.
for (GlobalIFunc &GIF : M.ifuncs()) {
Changed |= RemoveUnusedGlobalValue(GIF);
// Externally visible ifuncs are needed.
if (!GIF.isDiscardableIfUnused())
- GlobalIsNeeded(&GIF);
+ MarkLive(GIF);
+
+ UpdateGVDependencies(GIF);
+ }
+
+ // Propagate liveness from collected Global Values through the computed
+ // dependencies.
+ SmallVector<GlobalValue *, 8> NewLiveGVs{AliveGlobals.begin(),
+ AliveGlobals.end()};
+ while (!NewLiveGVs.empty()) {
+ GlobalValue *LGV = NewLiveGVs.pop_back_val();
+ for (auto &&GVD : make_range(GVDependencies.equal_range(LGV)))
+ MarkLive(*GVD.second, &NewLiveGVs);
}
// Now that all globals which are needed are in the AliveGlobals set, we loop
@@ -154,7 +237,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
GA.setAliasee(nullptr);
}
- // The third pass drops targets of ifuncs which are dead...
+ // The fourth pass drops targets of ifuncs which are dead...
std::vector<GlobalIFunc*> DeadIFuncs;
for (GlobalIFunc &GIF : M.ifuncs())
if (!AliveGlobals.count(&GIF)) {
@@ -188,7 +271,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
// Make sure that all memory is released
AliveGlobals.clear();
- SeenConstants.clear();
+ ConstantDependenciesCache.clear();
+ GVDependencies.clear();
ComdatMembers.clear();
if (Changed)
@@ -196,60 +280,6 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
}
-/// GlobalIsNeeded - the specific global value as needed, and
-/// recursively mark anything that it uses as also needed.
-void GlobalDCEPass::GlobalIsNeeded(GlobalValue *G) {
- // If the global is already in the set, no need to reprocess it.
- if (!AliveGlobals.insert(G).second)
- return;
-
- if (Comdat *C = G->getComdat()) {
- for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
- GlobalIsNeeded(CM.second);
- }
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
- // If this is a global variable, we must make sure to add any global values
- // referenced by the initializer to the alive set.
- if (GV->hasInitializer())
- MarkUsedGlobalsAsNeeded(GV->getInitializer());
- } else if (GlobalIndirectSymbol *GIS = dyn_cast<GlobalIndirectSymbol>(G)) {
- // The target of a global alias or ifunc is needed.
- MarkUsedGlobalsAsNeeded(GIS->getIndirectSymbol());
- } else {
- // Otherwise this must be a function object. We have to scan the body of
- // the function looking for constants and global values which are used as
- // operands. Any operands of these types must be processed to ensure that
- // any globals used will be marked as needed.
- Function *F = cast<Function>(G);
-
- for (Use &U : F->operands())
- MarkUsedGlobalsAsNeeded(cast<Constant>(U.get()));
-
- for (BasicBlock &BB : *F)
- for (Instruction &I : BB)
- for (Use &U : I.operands())
- if (GlobalValue *GV = dyn_cast<GlobalValue>(U))
- GlobalIsNeeded(GV);
- else if (Constant *C = dyn_cast<Constant>(U))
- MarkUsedGlobalsAsNeeded(C);
- }
-}
-
-void GlobalDCEPass::MarkUsedGlobalsAsNeeded(Constant *C) {
- if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return GlobalIsNeeded(GV);
-
- // Loop over all of the operands of the constant, adding any globals they
- // use to the list of needed globals.
- for (Use &U : C->operands()) {
- // If we've already processed this constant there's no need to do it again.
- Constant *Op = dyn_cast<Constant>(U);
- if (Op && SeenConstants.insert(Op).second)
- MarkUsedGlobalsAsNeeded(Op);
- }
-}
-
// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
// GlobalValue, looking for the constant pointer ref that may be pointing to it.
// If found, check to see if the constant pointer ref is safe to destroy, and if
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 5b0d5e3bc01e..ade4f21ceb52 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1819,12 +1819,14 @@ static bool processInternalGlobal(
GS.AccessingFunction->doesNotRecurse() &&
isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
LookupDomTree)) {
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+
DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
Type *ElemTy = GV->getValueType();
// FIXME: Pass Global's alignment when globals have alignment
- AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
+ AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
GV->getName(), &FirstI);
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
@@ -1977,7 +1979,7 @@ static void ChangeCalleesToFastCall(Function *F) {
}
}
-static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
+static AttributeList StripNest(LLVMContext &C, const AttributeList &Attrs) {
for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
unsigned Index = Attrs.getSlotIndex(i);
if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
@@ -2387,7 +2389,7 @@ OptimizeGlobalAliases(Module &M,
}
static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
- LibFunc::Func F = LibFunc::cxa_atexit;
+ LibFunc F = LibFunc_cxa_atexit;
if (!TLI->has(F))
return nullptr;
@@ -2396,7 +2398,7 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
return nullptr;
// Make sure that the function has the correct prototype.
- if (!TLI->getLibFunc(*Fn, F) || F != LibFunc::cxa_atexit)
+ if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
return nullptr;
return Fn;
diff --git a/lib/Transforms/IPO/GlobalSplit.cpp b/lib/Transforms/IPO/GlobalSplit.cpp
index bbbd096e89c0..4705ebe265ae 100644
--- a/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/lib/Transforms/IPO/GlobalSplit.cpp
@@ -85,7 +85,16 @@ bool splitGlobal(GlobalVariable &GV) {
uint64_t ByteOffset = cast<ConstantInt>(
cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
->getZExtValue();
- if (ByteOffset < SplitBegin || ByteOffset >= SplitEnd)
+ // Type metadata may be attached one byte after the end of the vtable, for
+ // classes without virtual methods in Itanium ABI. AFAIK, it is never
+ // attached to the first byte of a vtable. Subtract one to get the right
+ // slice.
+ // This is making an assumption that vtable groups are the only kinds of
+ // global variables that !type metadata can be attached to, and that they
+ // are either Itanium ABI vtable groups or contain a single vtable (i.e.
+ // Microsoft ABI vtables).
+ uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1;
+ if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd)
continue;
SplitGV->addMetadata(
LLVMContext::MD_type,
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 916135e33cd5..349807496dc2 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -136,7 +136,13 @@ static bool PropagateConstantReturn(Function &F) {
// For more details, see GlobalValue::mayBeDerefined.
if (!F.isDefinitionExact())
return false;
-
+
+ // Don't touch naked functions. The may contain asm returning
+ // value we don't see, so we may end up interprocedurally propagating
+ // the return value incorrectly.
+ if (F.hasFnAttribute(Attribute::Naked))
+ return false;
+
// Check to see if this function returns a constant.
SmallVector<Value *,4> RetVals;
StructType *STy = dyn_cast<StructType>(F.getReturnType());
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 1770445b413f..50e7cc89a3b3 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -48,7 +48,7 @@ public:
}
explicit SimpleInliner(InlineParams Params)
- : LegacyInlinerBase(ID), Params(Params) {
+ : LegacyInlinerBase(ID), Params(std::move(Params)) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -61,7 +61,8 @@ public:
[&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache, PSI);
+ return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
+ /*GetBFI=*/None, PSI);
}
bool runOnSCC(CallGraphSCC &SCC) override;
@@ -92,8 +93,12 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
}
Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
- unsigned SizeOptLevel) {
- return new SimpleInliner(llvm::getInlineParams(OptLevel, SizeOptLevel));
+ unsigned SizeOptLevel,
+ bool DisableInlineHotCallSite) {
+ auto Param = llvm::getInlineParams(OptLevel, SizeOptLevel);
+ if (DisableInlineHotCallSite)
+ Param.HotCallSiteThreshold = 0;
+ return new SimpleInliner(Param);
}
Pass *llvm::createFunctionInliningPass(InlineParams &Params) {
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 3f4731c937d1..6c83c99ae3be 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
@@ -260,8 +261,8 @@ static bool InlineCallIfPossible(
/// Return true if inlining of CS can block the caller from being
/// inlined which is proved to be more beneficial. \p IC is the
/// estimated inline cost associated with callsite \p CS.
-/// \p TotalAltCost will be set to the estimated cost of inlining the caller
-/// if \p CS is suppressed for inlining.
+/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
+/// caller if \p CS is suppressed for inlining.
static bool
shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
int &TotalSecondaryCost,
@@ -288,7 +289,7 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
// treating them as truly abstract units etc.
TotalSecondaryCost = 0;
// The candidate cost to be imposed upon the current function.
- int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
+ int CandidateCost = IC.getCost() - 1;
// This bool tracks what happens if we do NOT inline C into B.
bool callerWillBeRemoved = Caller->hasLocalLinkage();
// This bool tracks what happens if we DO inline C into B.
@@ -325,7 +326,7 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
// one is set very low by getInlineCost, in anticipation that Caller will
// be removed entirely. We did not account for this above unless there
// is only one caller of Caller.
- if (callerWillBeRemoved && !Caller->use_empty())
+ if (callerWillBeRemoved && !Caller->hasOneUse())
TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
@@ -342,6 +343,7 @@ static bool shouldInline(CallSite CS,
InlineCost IC = GetInlineCost(CS);
Instruction *Call = CS.getInstruction();
Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
if (IC.isAlways()) {
DEBUG(dbgs() << " Inlining: cost=always"
@@ -355,19 +357,20 @@ static bool shouldInline(CallSite CS,
if (IC.isNever()) {
DEBUG(dbgs() << " NOT Inlining: cost=never"
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "NeverInline", Call)
- << NV("Callee", Callee)
- << " should never be inlined (cost=never)");
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+ << NV("Callee", Callee) << " not inlined into "
+ << NV("Caller", Caller)
+ << " because it should never be inlined (cost=never)");
return false;
}
- Function *Caller = CS.getCaller();
if (!IC) {
DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
<< ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
- << NV("Callee", Callee) << " too costly to inline (cost="
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
+ << NV("Callee", Callee) << " not inlined into "
+ << NV("Caller", Caller) << " because too costly to inline (cost="
<< NV("Cost", IC.getCost()) << ", threshold="
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
return false;
@@ -378,8 +381,8 @@ static bool shouldInline(CallSite CS,
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
<< " Cost = " << IC.getCost()
<< ", outer Cost = " << TotalSecondaryCost << '\n');
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
- "IncreaseCostInOtherContexts", Call)
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
+ Call)
<< "Not inlining. Cost of inlining " << NV("Callee", Callee)
<< " increases the cost of inlining " << NV("Caller", Caller)
<< " in other contexts");
@@ -552,16 +555,11 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// If the policy determines that we should inline this function,
// try to do so.
- using namespace ore;
- if (!shouldInline(CS, GetInlineCost, ORE)) {
- ORE.emit(
- OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller));
+ if (!shouldInline(CS, GetInlineCost, ORE))
continue;
- }
// Attempt to inline the function.
+ using namespace ore;
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime, AARGetter,
ImportedFunctionsStats)) {
@@ -638,22 +636,12 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
ACT = &getAnalysis<AssumptionCacheTracker>();
PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
- auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
- };
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
[this](CallSite CS) { return getInlineCost(CS); },
- AARGetter, ImportedFunctionsStats);
+ LegacyAARGetter(*this), ImportedFunctionsStats);
}
/// Remove now-dead linkonce functions at the end of
@@ -750,9 +738,6 @@ bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
- .getManager();
const ModuleAnalysisManager &MAM =
AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
bool Changed = false;
@@ -761,35 +746,52 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Module &M = *InitialC.begin()->getFunction().getParent();
ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&](Function &F) -> AssumptionCache & {
- return FAM.getResult<AssumptionAnalysis>(F);
- };
-
- // Setup the data structure used to plumb customization into the
- // `InlineFunction` routine.
- InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
+ // We use a single common worklist for calls across the entire SCC. We
+ // process these in-order and append new calls introduced during inlining to
+ // the end.
+ //
+ // Note that this particular order of processing is actually critical to
+ // avoid very bad behaviors. Consider *highly connected* call graphs where
+ // each function contains a small amonut of code and a couple of calls to
+ // other functions. Because the LLVM inliner is fundamentally a bottom-up
+ // inliner, it can handle gracefully the fact that these all appear to be
+ // reasonable inlining candidates as it will flatten things until they become
+ // too big to inline, and then move on and flatten another batch.
+ //
+ // However, when processing call edges *within* an SCC we cannot rely on this
+ // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
+ // functions we can end up incrementally inlining N calls into each of
+ // N functions because each incremental inlining decision looks good and we
+ // don't have a topological ordering to prevent explosions.
+ //
+ // To compensate for this, we don't process transitive edges made immediate
+ // by inlining until we've done one pass of inlining across the entire SCC.
+ // Large, highly connected SCCs still lead to some amount of code bloat in
+ // this model, but it is uniformly spread across all the functions in the SCC
+ // and eventually they all become too large to inline, rather than
+ // incrementally maknig a single function grow in a super linear fashion.
+ SmallVector<std::pair<CallSite, int>, 16> Calls;
- auto GetInlineCost = [&](CallSite CS) {
- Function &Callee = *CS.getCalledFunction();
- auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
- return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, PSI);
- };
+ // Populate the initial list of calls in this SCC.
+ for (auto &N : InitialC) {
+ // We want to generally process call sites top-down in order for
+ // simplifications stemming from replacing the call with the returned value
+ // after inlining to be visible to subsequent inlining decisions.
+ // FIXME: Using instructions sequence is a really bad way to do this.
+ // Instead we should do an actual RPO walk of the function body.
+ for (Instruction &I : instructions(N.getFunction()))
+ if (auto CS = CallSite(&I))
+ if (Function *Callee = CS.getCalledFunction())
+ if (!Callee->isDeclaration())
+ Calls.push_back({CS, -1});
+ }
+ if (Calls.empty())
+ return PreservedAnalyses::all();
- // We use a worklist of nodes to process so that we can handle if the SCC
- // structure changes and some nodes are no longer part of the current SCC. We
- // also need to use an updatable pointer for the SCC as a consequence.
- SmallVector<LazyCallGraph::Node *, 16> Nodes;
- for (auto &N : InitialC)
- Nodes.push_back(&N);
+ // Capture updatable variables for the current SCC and RefSCC.
auto *C = &InitialC;
auto *RC = &C->getOuterRefSCC();
- // We also use a secondary worklist of call sites within a particular node to
- // allow quickly continuing to inline through newly inlined call sites where
- // possible.
- SmallVector<std::pair<CallSite, int>, 16> Calls;
-
// When inlining a callee produces new call sites, we want to keep track of
// the fact that they were inlined from the callee. This allows us to avoid
// infinite inlining in some obscure cases. To represent this, we use an
@@ -805,34 +807,58 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// defer deleting these to make it easier to handle the call graph updates.
SmallVector<Function *, 4> DeadFunctions;
- do {
- auto &N = *Nodes.pop_back_val();
+ // Loop forward over all of the calls. Note that we cannot cache the size as
+ // inlining can introduce new calls that need to be processed.
+ for (int i = 0; i < (int)Calls.size(); ++i) {
+ // We expect the calls to typically be batched with sequences of calls that
+ // have the same caller, so we first set up some shared infrastructure for
+ // this caller. We also do any pruning we can at this layer on the caller
+ // alone.
+ Function &F = *Calls[i].first.getCaller();
+ LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
- Function &F = N.getFunction();
if (F.hasFnAttribute(Attribute::OptimizeNone))
continue;
+ DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
+
+ // Get a FunctionAnalysisManager via a proxy for this particular node. We
+ // do this each time we visit a node as the SCC may have changed and as
+ // we're going to mutate this particular function we want to make sure the
+ // proxy is in place to forward any invalidation events. We can use the
+ // manager we get here for looking up results for functions other than this
+ // node however because those functions aren't going to be mutated by this
+ // pass.
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
+ .getManager();
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+ [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+ auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
+ return FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ auto GetInlineCost = [&](CallSite CS) {
+ Function &Callee = *CS.getCalledFunction();
+ auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
+ return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
+ PSI);
+ };
+
// Get the remarks emission analysis for the caller.
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- // We want to generally process call sites top-down in order for
- // simplifications stemming from replacing the call with the returned value
- // after inlining to be visible to subsequent inlining decisions. So we
- // walk the function backwards and then process the back of the vector.
- // FIXME: Using reverse is a really bad way to do this. Instead we should
- // do an actual PO walk of the function body.
- for (Instruction &I : reverse(instructions(F)))
- if (auto CS = CallSite(&I))
- if (Function *Callee = CS.getCalledFunction())
- if (!Callee->isDeclaration())
- Calls.push_back({CS, -1});
-
+ // Now process as many calls as we have within this caller in the sequnece.
+ // We bail out as soon as the caller has to change so we can update the
+ // call graph and prepare the context of that new caller.
bool DidInline = false;
- while (!Calls.empty()) {
+ for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
int InlineHistoryID;
CallSite CS;
- std::tie(CS, InlineHistoryID) = Calls.pop_back_val();
+ std::tie(CS, InlineHistoryID) = Calls[i];
Function &Callee = *CS.getCalledFunction();
if (InlineHistoryID != -1 &&
@@ -843,6 +869,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (!shouldInline(CS, GetInlineCost, ORE))
continue;
+ // Setup the data structure used to plumb customization into the
+ // `InlineFunction` routine.
+ InlineFunctionInfo IFI(
+ /*cg=*/nullptr, &GetAssumptionCache,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(Callee));
+
if (!InlineFunction(CS, IFI))
continue;
DidInline = true;
@@ -870,6 +903,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// made dead by this operation on other functions).
Callee.removeDeadConstantUsers();
if (Callee.use_empty()) {
+ Calls.erase(
+ std::remove_if(Calls.begin() + i + 1, Calls.end(),
+ [&Callee](const std::pair<CallSite, int> &Call) {
+ return Call.first.getCaller() == &Callee;
+ }),
+ Calls.end());
// Clear the body and queue the function itself for deletion when we
// finish inlining and call graph updates.
// Note that after this point, it is an error to do anything other
@@ -882,6 +921,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
}
+ // Back the call index up by one to put us in a good position to go around
+ // the outer loop.
+ --i;
+
if (!DidInline)
continue;
Changed = true;
@@ -896,8 +939,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// below.
for (Function *InlinedCallee : InlinedCallees) {
LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
- for (LazyCallGraph::Edge &E : CalleeN)
- RC->insertTrivialRefEdge(N, *E.getNode());
+ for (LazyCallGraph::Edge &E : *CalleeN)
+ RC->insertTrivialRefEdge(N, E.getNode());
}
InlinedCallees.clear();
@@ -908,8 +951,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// re-use the exact same logic for updating the call graph to reflect the
// change..
C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
+ DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
RC = &C->getOuterRefSCC();
- } while (!Nodes.empty());
+ }
// Now that we've finished inlining all of the calls across this SCC, delete
// all of the trivially dead functions, updating the call graph and the CGSCC
@@ -920,8 +964,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// sets.
for (Function *DeadF : DeadFunctions) {
// Get the necessary information out of the call graph and nuke the
- // function there.
+ // function there. Also, cclear out any cached analyses.
auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
+ .getManager();
+ FAM.clear(*DeadF);
+ AM.clear(DeadC);
auto &DeadRC = DeadC.getOuterRefSCC();
CG.removeDeadFunction(*DeadF);
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index deb7e819480b..785207efbe5c 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -42,8 +42,6 @@
using namespace llvm;
using namespace lowertypetests;
-using SummaryAction = LowerTypeTestsSummaryAction;
-
#define DEBUG_TYPE "lowertypetests"
STATISTIC(ByteArraySizeBits, "Byte array size in bits");
@@ -57,13 +55,13 @@ static cl::opt<bool> AvoidReuse(
cl::desc("Try to avoid reuse of byte array addresses using aliases"),
cl::Hidden, cl::init(true));
-static cl::opt<SummaryAction> ClSummaryAction(
+static cl::opt<PassSummaryAction> ClSummaryAction(
"lowertypetests-summary-action",
cl::desc("What to do with the summary when running this pass"),
- cl::values(clEnumValN(SummaryAction::None, "none", "Do nothing"),
- clEnumValN(SummaryAction::Import, "import",
+ cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
+ clEnumValN(PassSummaryAction::Import, "import",
"Import typeid resolutions from summary and globals"),
- clEnumValN(SummaryAction::Export, "export",
+ clEnumValN(PassSummaryAction::Export, "export",
"Export typeid resolutions to summary and globals")),
cl::Hidden);
@@ -234,8 +232,8 @@ public:
class LowerTypeTestsModule {
Module &M;
- SummaryAction Action;
- ModuleSummaryIndex *Summary;
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
bool LinkerSubsectionsViaSymbols;
Triple::ArchType Arch;
@@ -253,15 +251,21 @@ class LowerTypeTestsModule {
// Indirect function call index assignment counter for WebAssembly
uint64_t IndirectIndex = 1;
- // Mapping from type identifiers to the call sites that test them.
- DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites;
+ // Mapping from type identifiers to the call sites that test them, as well as
+ // whether the type identifier needs to be exported to ThinLTO backends as
+ // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId).
+ struct TypeIdUserInfo {
+ std::vector<CallInst *> CallSites;
+ bool IsExported = false;
+ };
+ DenseMap<Metadata *, TypeIdUserInfo> TypeIdUsers;
/// This structure describes how to lower type tests for a particular type
/// identifier. It is either built directly from the global analysis (during
/// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type
/// identifier summaries and external symbol references (in ThinLTO backends).
struct TypeIdLowering {
- TypeTestResolution::Kind TheKind;
+ TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat;
/// All except Unsat: the start address within the combined global.
Constant *OffsetedGlobal;
@@ -274,9 +278,6 @@ class LowerTypeTestsModule {
/// covering members of this type identifier as a multiple of 2^AlignLog2.
Constant *SizeM1;
- /// ByteArray, Inline, AllOnes: range of SizeM1 expressed as a bit width.
- unsigned SizeM1BitWidth;
-
/// ByteArray: the byte array to test the address against.
Constant *TheByteArray;
@@ -291,6 +292,10 @@ class LowerTypeTestsModule {
Function *WeakInitializerFn = nullptr;
+ void exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
+ TypeIdLowering importTypeId(StringRef TypeId);
+ void importTypeTest(CallInst *CI);
+
BitSetInfo
buildBitSet(Metadata *TypeId,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
@@ -327,8 +332,8 @@ class LowerTypeTestsModule {
void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions);
public:
- LowerTypeTestsModule(Module &M, SummaryAction Action,
- ModuleSummaryIndex *Summary);
+ LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary);
bool lower();
// Lower the module using the action and summary passed as command line
@@ -341,15 +346,17 @@ struct LowerTypeTests : public ModulePass {
bool UseCommandLine = false;
- SummaryAction Action;
- ModuleSummaryIndex *Summary;
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
LowerTypeTests() : ModulePass(ID), UseCommandLine(true) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
- LowerTypeTests(SummaryAction Action, ModuleSummaryIndex *Summary)
- : ModulePass(ID), Action(Action), Summary(Summary) {
+ LowerTypeTests(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : ModulePass(ID), ExportSummary(ExportSummary),
+ ImportSummary(ImportSummary) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
@@ -358,7 +365,7 @@ struct LowerTypeTests : public ModulePass {
return false;
if (UseCommandLine)
return LowerTypeTestsModule::runForTesting(M);
- return LowerTypeTestsModule(M, Action, Summary).lower();
+ return LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower();
}
};
@@ -368,9 +375,10 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
false)
char LowerTypeTests::ID = 0;
-ModulePass *llvm::createLowerTypeTestsPass(SummaryAction Action,
- ModuleSummaryIndex *Summary) {
- return new LowerTypeTests(Action, Summary);
+ModulePass *
+llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary) {
+ return new LowerTypeTests(ExportSummary, ImportSummary);
}
/// Build a bit set for TypeId using the object layouts in
@@ -494,10 +502,11 @@ Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
return createMaskedBitTest(B, TIL.InlineBits, BitOffset);
} else {
Constant *ByteArray = TIL.TheByteArray;
- if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
+ if (!LinkerSubsectionsViaSymbols && AvoidReuse && !ImportSummary) {
// Each use of the byte array uses a different alias. This makes the
// backend less likely to reuse previously computed byte array addresses,
// improving the security of the CFI mechanism based on this pass.
+ // This won't work when importing because TheByteArray is external.
ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage,
"bits_use", ByteArray, &M);
}
@@ -593,8 +602,7 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
IntPtrTy));
Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
- Constant *BitSizeConst = ConstantExpr::getZExt(TIL.SizeM1, IntPtrTy);
- Value *OffsetInRange = B.CreateICmpULE(BitOffset, BitSizeConst);
+ Value *OffsetInRange = B.CreateICmpULE(BitOffset, TIL.SizeM1);
// If the bit set is all ones, testing against it is unnecessary.
if (TIL.TheKind == TypeTestResolution::AllOnes)
@@ -687,6 +695,123 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
}
}
+/// Export the given type identifier so that ThinLTO backends may import it.
+/// Type identifiers are exported by adding coarse-grained information about how
+/// to test the type identifier to the summary, and creating symbols in the
+/// object file (aliases and absolute symbols) containing fine-grained
+/// information about the type identifier.
+void LowerTypeTestsModule::exportTypeId(StringRef TypeId,
+ const TypeIdLowering &TIL) {
+ TypeTestResolution &TTRes =
+ ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes;
+ TTRes.TheKind = TIL.TheKind;
+
+ auto ExportGlobal = [&](StringRef Name, Constant *C) {
+ GlobalAlias *GA =
+ GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
+ "__typeid_" + TypeId + "_" + Name, C, &M);
+ GA->setVisibility(GlobalValue::HiddenVisibility);
+ };
+
+ if (TIL.TheKind != TypeTestResolution::Unsat)
+ ExportGlobal("global_addr", TIL.OffsetedGlobal);
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray ||
+ TIL.TheKind == TypeTestResolution::Inline ||
+ TIL.TheKind == TypeTestResolution::AllOnes) {
+ ExportGlobal("align", ConstantExpr::getIntToPtr(TIL.AlignLog2, Int8PtrTy));
+ ExportGlobal("size_m1", ConstantExpr::getIntToPtr(TIL.SizeM1, Int8PtrTy));
+
+ uint64_t BitSize = cast<ConstantInt>(TIL.SizeM1)->getZExtValue() + 1;
+ if (TIL.TheKind == TypeTestResolution::Inline)
+ TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6;
+ else
+ TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32;
+ }
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray) {
+ ExportGlobal("byte_array", TIL.TheByteArray);
+ ExportGlobal("bit_mask", TIL.BitMask);
+ }
+
+ if (TIL.TheKind == TypeTestResolution::Inline)
+ ExportGlobal("inline_bits",
+ ConstantExpr::getIntToPtr(TIL.InlineBits, Int8PtrTy));
+}
+
+LowerTypeTestsModule::TypeIdLowering
+LowerTypeTestsModule::importTypeId(StringRef TypeId) {
+ const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId);
+ if (!TidSummary)
+ return {}; // Unsat: no globals match this type id.
+ const TypeTestResolution &TTRes = TidSummary->TTRes;
+
+ TypeIdLowering TIL;
+ TIL.TheKind = TTRes.TheKind;
+
+ auto ImportGlobal = [&](StringRef Name, unsigned AbsWidth) {
+ Constant *C =
+ M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(), Int8Ty);
+ auto *GV = dyn_cast<GlobalVariable>(C);
+ // We only need to set metadata if the global is newly created, in which
+ // case it would not have hidden visibility.
+ if (!GV || GV->getVisibility() == GlobalValue::HiddenVisibility)
+ return C;
+
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
+ auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
+ auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
+ GV->setMetadata(LLVMContext::MD_absolute_symbol,
+ MDNode::get(M.getContext(), {MinC, MaxC}));
+ };
+ if (AbsWidth == IntPtrTy->getBitWidth())
+ SetAbsRange(~0ull, ~0ull); // Full set.
+ else if (AbsWidth)
+ SetAbsRange(0, 1ull << AbsWidth);
+ return C;
+ };
+
+ if (TIL.TheKind != TypeTestResolution::Unsat)
+ TIL.OffsetedGlobal = ImportGlobal("global_addr", 0);
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray ||
+ TIL.TheKind == TypeTestResolution::Inline ||
+ TIL.TheKind == TypeTestResolution::AllOnes) {
+ TIL.AlignLog2 = ConstantExpr::getPtrToInt(ImportGlobal("align", 8), Int8Ty);
+ TIL.SizeM1 = ConstantExpr::getPtrToInt(
+ ImportGlobal("size_m1", TTRes.SizeM1BitWidth), IntPtrTy);
+ }
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray) {
+ TIL.TheByteArray = ImportGlobal("byte_array", 0);
+ TIL.BitMask = ImportGlobal("bit_mask", 8);
+ }
+
+ if (TIL.TheKind == TypeTestResolution::Inline)
+ TIL.InlineBits = ConstantExpr::getPtrToInt(
+ ImportGlobal("inline_bits", 1 << TTRes.SizeM1BitWidth),
+ TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty);
+
+ return TIL;
+}
+
+void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
+ auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!TypeIdMDVal)
+ report_fatal_error("Second argument of llvm.type.test must be metadata");
+
+ auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata());
+ if (!TypeIdStr)
+ report_fatal_error(
+ "Second argument of llvm.type.test must be a metadata string");
+
+ TypeIdLowering TIL = importTypeId(TypeIdStr->getString());
+ Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+}
+
void LowerTypeTestsModule::lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
@@ -708,16 +833,12 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),
TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2);
+ TIL.SizeM1 = ConstantInt::get(IntPtrTy, BSI.BitSize - 1);
if (BSI.isAllOnes()) {
TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single
: TypeTestResolution::AllOnes;
- TIL.SizeM1BitWidth = (BSI.BitSize <= 128) ? 7 : 32;
- TIL.SizeM1 = ConstantInt::get((BSI.BitSize <= 128) ? Int8Ty : Int32Ty,
- BSI.BitSize - 1);
} else if (BSI.BitSize <= 64) {
TIL.TheKind = TypeTestResolution::Inline;
- TIL.SizeM1BitWidth = (BSI.BitSize <= 32) ? 5 : 6;
- TIL.SizeM1 = ConstantInt::get(Int8Ty, BSI.BitSize - 1);
uint64_t InlineBits = 0;
for (auto Bit : BSI.Bits)
InlineBits |= uint64_t(1) << Bit;
@@ -728,17 +849,19 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
(BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits);
} else {
TIL.TheKind = TypeTestResolution::ByteArray;
- TIL.SizeM1BitWidth = (BSI.BitSize <= 128) ? 7 : 32;
- TIL.SizeM1 = ConstantInt::get((BSI.BitSize <= 128) ? Int8Ty : Int32Ty,
- BSI.BitSize - 1);
++NumByteArraysCreated;
ByteArrayInfo *BAI = createByteArray(BSI);
TIL.TheByteArray = BAI->ByteArray;
TIL.BitMask = BAI->MaskGlobal;
}
+ TypeIdUserInfo &TIUI = TypeIdUsers[TypeId];
+
+ if (TIUI.IsExported)
+ exportTypeId(cast<MDString>(TypeId)->getString(), TIL);
+
// Lower each call to llvm.type.test for this type identifier.
- for (CallInst *CI : TypeTestCallSites[TypeId]) {
+ for (CallInst *CI : TIUI.CallSites) {
++NumTypeTestCallsLowered;
Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
CI->replaceAllUsesWith(Lowered);
@@ -757,9 +880,9 @@ void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
report_fatal_error(
"A member of a type identifier may not have an explicit section");
- if (isa<GlobalVariable>(GO) && GO->isDeclarationForLinker())
- report_fatal_error(
- "A global var member of a type identifier must be a definition");
+ // FIXME: We previously checked that global var member of a type identifier
+ // must be a definition, but the IR linker may leave type metadata on
+ // declarations. We should restore this check after fixing PR31759.
auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Type->getOperand(0));
if (!OffsetConstMD)
@@ -1173,13 +1296,11 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
}
/// Lower all type tests in this module.
-LowerTypeTestsModule::LowerTypeTestsModule(Module &M, SummaryAction Action,
- ModuleSummaryIndex *Summary)
- : M(M), Action(Action), Summary(Summary) {
- // FIXME: Use these fields.
- (void)this->Action;
- (void)this->Summary;
-
+LowerTypeTestsModule::LowerTypeTestsModule(
+ Module &M, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+ assert(!(ExportSummary && ImportSummary));
Triple TargetTriple(M.getTargetTriple());
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
Arch = TargetTriple.getArch();
@@ -1203,7 +1324,11 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
ExitOnErr(errorCodeToError(In.error()));
}
- bool Changed = LowerTypeTestsModule(M, ClSummaryAction, &Summary).lower();
+ bool Changed =
+ LowerTypeTestsModule(
+ M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
+ ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
+ .lower();
if (!ClWriteSummary.empty()) {
ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
@@ -1222,9 +1347,18 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
bool LowerTypeTestsModule::lower() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
- if (!TypeTestFunc || TypeTestFunc->use_empty())
+ if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary)
return false;
+ if (ImportSummary) {
+ for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
+ UI != UE;) {
+ auto *CI = cast<CallInst>((*UI++).getUser());
+ importTypeTest(CI);
+ }
+ return true;
+ }
+
// Equivalence class set containing type identifiers and the globals that
// reference them. This is used to partition the set of type identifiers in
// the module into disjoint sets.
@@ -1248,6 +1382,9 @@ bool LowerTypeTestsModule::lower() {
unsigned I = 0;
SmallVector<MDNode *, 2> Types;
for (GlobalObject &GO : M.global_objects()) {
+ if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker())
+ continue;
+
Types.clear();
GO.getMetadata(LLVMContext::MD_type, Types);
if (Types.empty())
@@ -1262,33 +1399,57 @@ bool LowerTypeTestsModule::lower() {
}
}
- for (const Use &U : TypeTestFunc->uses()) {
- auto CI = cast<CallInst>(U.getUser());
+ auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & {
+ // Add the call site to the list of call sites for this type identifier. We
+ // also use TypeIdUsers to keep track of whether we have seen this type
+ // identifier before. If we have, we don't need to re-add the referenced
+ // globals to the equivalence class.
+ auto Ins = TypeIdUsers.insert({TypeId, {}});
+ if (Ins.second) {
+ // Add the type identifier to the equivalence class.
+ GlobalClassesTy::iterator GCI = GlobalClasses.insert(TypeId);
+ GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+
+ // Add the referenced globals to the type identifier's equivalence class.
+ for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals)
+ CurSet = GlobalClasses.unionSets(
+ CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM)));
+ }
+
+ return Ins.first->second;
+ };
- auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
- if (!BitSetMDVal)
- report_fatal_error("Second argument of llvm.type.test must be metadata");
- auto BitSet = BitSetMDVal->getMetadata();
+ if (TypeTestFunc) {
+ for (const Use &U : TypeTestFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
- // Add the call site to the list of call sites for this type identifier. We
- // also use TypeTestCallSites to keep track of whether we have seen this
- // type identifier before. If we have, we don't need to re-add the
- // referenced globals to the equivalence class.
- std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator, bool>
- Ins = TypeTestCallSites.insert(
- std::make_pair(BitSet, std::vector<CallInst *>()));
- Ins.first->second.push_back(CI);
- if (!Ins.second)
- continue;
+ auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!TypeIdMDVal)
+ report_fatal_error("Second argument of llvm.type.test must be metadata");
+ auto TypeId = TypeIdMDVal->getMetadata();
+ AddTypeIdUse(TypeId).CallSites.push_back(CI);
+ }
+ }
- // Add the type identifier to the equivalence class.
- GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
- GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+ if (ExportSummary) {
+ DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
+ for (auto &P : TypeIdInfo) {
+ if (auto *TypeId = dyn_cast<MDString>(P.first))
+ MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
+ TypeId);
+ }
- // Add the referenced globals to the type identifier's equivalence class.
- for (GlobalTypeMember *GTM : TypeIdInfo[BitSet].RefGlobals)
- CurSet = GlobalClasses.unionSets(
- CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM)));
+ for (auto &P : *ExportSummary) {
+ for (auto &S : P.second) {
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS)
+ continue;
+ // FIXME: Only add live functions.
+ for (GlobalValue::GUID G : FS->type_tests())
+ for (Metadata *MD : MetadataByGUID[G])
+ AddTypeIdUse(MD).IsExported = true;
+ }
+ }
}
if (GlobalClasses.empty())
@@ -1349,8 +1510,9 @@ bool LowerTypeTestsModule::lower() {
PreservedAnalyses LowerTypeTestsPass::run(Module &M,
ModuleAnalysisManager &AM) {
- bool Changed =
- LowerTypeTestsModule(M, SummaryAction::None, /*Summary=*/nullptr).lower();
+ bool Changed = LowerTypeTestsModule(M, /*ExportSummary=*/nullptr,
+ /*ImportSummary=*/nullptr)
+ .lower();
if (!Changed)
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index e0bb0eb42b59..771770ddc060 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -96,8 +96,10 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
@@ -127,6 +129,26 @@ static cl::opt<unsigned> NumFunctionsForSanityCheck(
"'0' disables this check. Works only with '-debug' key."),
cl::init(0), cl::Hidden);
+// Under option -mergefunc-preserve-debug-info we:
+// - Do not create a new function for a thunk.
+// - Retain the debug info for a thunk's parameters (and associated
+// instructions for the debug info) from the entry block.
+// Note: -debug will display the algorithm at work.
+// - Create debug-info for the call (to the shared implementation) made by
+// a thunk and its return value.
+// - Erase the rest of the function, retaining the (minimally sized) entry
+// block to create a thunk.
+// - Preserve a thunk's call site to point to the thunk even when both occur
+// within the same translation unit, to aid debugability. Note that this
+// behaviour differs from the underlying -mergefunc implementation which
+// modifies the thunk's call site to point to the shared implementation
+// when both occur within the same translation unit.
+static cl::opt<bool>
+ MergeFunctionsPDI("mergefunc-preserve-debug-info", cl::Hidden,
+ cl::init(false),
+ cl::desc("Preserve debug info in thunk when mergefunc "
+ "transformations are made."));
+
namespace {
class FunctionNode {
@@ -215,8 +237,21 @@ private:
/// Replace G with a thunk or an alias to F. Deletes G.
void writeThunkOrAlias(Function *F, Function *G);
- /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
- /// of G with bitcast(F). Deletes G.
+ /// Fill PDIUnrelatedWL with instructions from the entry block that are
+ /// unrelated to parameter related debug info.
+ void filterInstsUnrelatedToPDI(BasicBlock *GEntryBlock,
+ std::vector<Instruction *> &PDIUnrelatedWL);
+
+ /// Erase the rest of the CFG (i.e. barring the entry block).
+ void eraseTail(Function *G);
+
+ /// Erase the instructions in PDIUnrelatedWL as they are unrelated to the
+ /// parameter debug info, from the entry block.
+ void eraseInstsUnrelatedToPDI(std::vector<Instruction *> &PDIUnrelatedWL);
+
+ /// Replace G with a simple tail call to bitcast(F). Also (unless
+ /// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
+ /// delete G.
void writeThunk(Function *F, Function *G);
/// Replace G with an alias to F. Deletes G.
@@ -269,8 +304,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
if (Res1 != -Res2) {
dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
<< "\n";
- F1->dump();
- F2->dump();
+ dbgs() << *F1 << '\n' << *F2 << '\n';
Valid = false;
}
@@ -305,9 +339,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
<< TripleNumber << "\n";
dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
<< Res4 << "\n";
- F1->dump();
- F2->dump();
- F3->dump();
+ dbgs() << *F1 << '\n' << *F2 << '\n' << *F3 << '\n';
Valid = false;
}
}
@@ -400,19 +432,15 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
// Transferring other attributes may help other optimizations, but that
// should be done uniformly and not in this ad-hoc way.
auto &Context = New->getContext();
- auto NewFuncAttrs = New->getAttributes();
- auto CallSiteAttrs = CS.getAttributes();
-
- CallSiteAttrs = CallSiteAttrs.addAttributes(
- Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
-
- for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
- AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
- if (Attrs.getNumSlots())
- CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
- }
-
- CS.setAttributes(CallSiteAttrs);
+ auto NewPAL = New->getAttributes();
+ SmallVector<AttributeSet, 4> NewArgAttrs;
+ for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++)
+ NewArgAttrs.push_back(NewPAL.getParamAttributes(argIdx));
+ // Don't transfer attributes from the function to the callee. Function
+ // attributes typically aren't relevant to the calling convention or ABI.
+ CS.setAttributes(AttributeList::get(Context, /*FnAttrs=*/AttributeSet(),
+ NewPAL.getRetAttributes(),
+ NewArgAttrs));
remove(CS.getInstruction()->getParent()->getParent());
U->set(BitcastNew);
@@ -461,51 +489,242 @@ static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
return Builder.CreateBitCast(V, DestTy);
}
-// Replace G with a simple tail call to bitcast(F). Also replace direct uses
-// of G with bitcast(F). Deletes G.
+// Erase the instructions in PDIUnrelatedWL as they are unrelated to the
+// parameter debug info, from the entry block.
+void MergeFunctions::eraseInstsUnrelatedToPDI(
+ std::vector<Instruction *> &PDIUnrelatedWL) {
+
+ DEBUG(dbgs() << " Erasing instructions (in reverse order of appearance in "
+ "entry block) unrelated to parameter debug info from entry "
+ "block: {\n");
+ while (!PDIUnrelatedWL.empty()) {
+ Instruction *I = PDIUnrelatedWL.back();
+ DEBUG(dbgs() << " Deleting Instruction: ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ I->eraseFromParent();
+ PDIUnrelatedWL.pop_back();
+ }
+ DEBUG(dbgs() << " } // Done erasing instructions unrelated to parameter "
+ "debug info from entry block. \n");
+}
+
+// Reduce G to its entry block.
+void MergeFunctions::eraseTail(Function *G) {
+
+ std::vector<BasicBlock *> WorklistBB;
+ for (Function::iterator BBI = std::next(G->begin()), BBE = G->end();
+ BBI != BBE; ++BBI) {
+ BBI->dropAllReferences();
+ WorklistBB.push_back(&*BBI);
+ }
+ while (!WorklistBB.empty()) {
+ BasicBlock *BB = WorklistBB.back();
+ BB->eraseFromParent();
+ WorklistBB.pop_back();
+ }
+}
+
+// We are interested in the following instructions from the entry block as being
+// related to parameter debug info:
+// - @llvm.dbg.declare
+// - stores from the incoming parameters to locations on the stack-frame
+// - allocas that create these locations on the stack-frame
+// - @llvm.dbg.value
+// - the entry block's terminator
+// The rest are unrelated to debug info for the parameters; fill up
+// PDIUnrelatedWL with such instructions.
+void MergeFunctions::filterInstsUnrelatedToPDI(
+ BasicBlock *GEntryBlock, std::vector<Instruction *> &PDIUnrelatedWL) {
+
+ std::set<Instruction *> PDIRelated;
+ for (BasicBlock::iterator BI = GEntryBlock->begin(), BIE = GEntryBlock->end();
+ BI != BIE; ++BI) {
+ if (auto *DVI = dyn_cast<DbgValueInst>(&*BI)) {
+ DEBUG(dbgs() << " Deciding: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ DILocalVariable *DILocVar = DVI->getVariable();
+ if (DILocVar->isParameter()) {
+ DEBUG(dbgs() << " Include (parameter): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(&*BI);
+ } else {
+ DEBUG(dbgs() << " Delete (!parameter): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ } else if (auto *DDI = dyn_cast<DbgDeclareInst>(&*BI)) {
+ DEBUG(dbgs() << " Deciding: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ DILocalVariable *DILocVar = DDI->getVariable();
+ if (DILocVar->isParameter()) {
+ DEBUG(dbgs() << " Parameter: ");
+ DEBUG(DILocVar->print(dbgs()));
+ AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+ if (AI) {
+ DEBUG(dbgs() << " Processing alloca users: ");
+ DEBUG(dbgs() << "\n");
+ for (User *U : AI->users()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (Value *Arg = SI->getValueOperand()) {
+ if (dyn_cast<Argument>(Arg)) {
+ DEBUG(dbgs() << " Include: ");
+ DEBUG(AI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(AI);
+ DEBUG(dbgs() << " Include (parameter): ");
+ DEBUG(SI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(SI);
+ DEBUG(dbgs() << " Include: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(&*BI);
+ } else {
+ DEBUG(dbgs() << " Delete (!parameter): ");
+ DEBUG(SI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ } else {
+ DEBUG(dbgs() << " Defer: ");
+ DEBUG(U->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ } else {
+ DEBUG(dbgs() << " Delete (alloca NULL): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ } else {
+ DEBUG(dbgs() << " Delete (!parameter): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ } else if (dyn_cast<TerminatorInst>(BI) == GEntryBlock->getTerminator()) {
+ DEBUG(dbgs() << " Will Include Terminator: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(&*BI);
+ } else {
+ DEBUG(dbgs() << " Defer: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ DEBUG(dbgs()
+ << " Report parameter debug info related/related instructions: {\n");
+ for (BasicBlock::iterator BI = GEntryBlock->begin(), BE = GEntryBlock->end();
+ BI != BE; ++BI) {
+
+ Instruction *I = &*BI;
+ if (PDIRelated.find(I) == PDIRelated.end()) {
+ DEBUG(dbgs() << " !PDIRelated: ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIUnrelatedWL.push_back(I);
+ } else {
+ DEBUG(dbgs() << " PDIRelated: ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ DEBUG(dbgs() << " }\n");
+}
+
+// Replace G with a simple tail call to bitcast(F). Also (unless
+// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
+// delete G. Under MergeFunctionsPDI, we use G itself for creating
+// the thunk as we preserve the debug info (and associated instructions)
+// from G's entry block pertaining to G's incoming arguments which are
+// passed on as corresponding arguments in the call that G makes to F.
+// For better debugability, under MergeFunctionsPDI, we do not modify G's
+// call sites to point to F even when within the same translation unit.
void MergeFunctions::writeThunk(Function *F, Function *G) {
- if (!G->isInterposable()) {
- // Redirect direct callers of G to F.
+ if (!G->isInterposable() && !MergeFunctionsPDI) {
+ // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
+ // above).
replaceDirectCallers(G, F);
}
// If G was internal then we may have replaced all uses of G with F. If so,
- // stop here and delete G. There's no need for a thunk.
- if (G->hasLocalLinkage() && G->use_empty()) {
+ // stop here and delete G. There's no need for a thunk. (See note on
+ // MergeFunctionsPDI above).
+ if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) {
G->eraseFromParent();
return;
}
- Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
- G->getParent());
- BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
- IRBuilder<> Builder(BB);
+ BasicBlock *GEntryBlock = nullptr;
+ std::vector<Instruction *> PDIUnrelatedWL;
+ BasicBlock *BB = nullptr;
+ Function *NewG = nullptr;
+ if (MergeFunctionsPDI) {
+ DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) Do not create a new "
+ "function as thunk; retain original: "
+ << G->getName() << "()\n");
+ GEntryBlock = &G->getEntryBlock();
+ DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) filter parameter related "
+ "debug info for "
+ << G->getName() << "() {\n");
+ filterInstsUnrelatedToPDI(GEntryBlock, PDIUnrelatedWL);
+ GEntryBlock->getTerminator()->eraseFromParent();
+ BB = GEntryBlock;
+ } else {
+ NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+ G->getParent());
+ BB = BasicBlock::Create(F->getContext(), "", NewG);
+ }
+ IRBuilder<> Builder(BB);
+ Function *H = MergeFunctionsPDI ? G : NewG;
SmallVector<Value *, 16> Args;
unsigned i = 0;
FunctionType *FFTy = F->getFunctionType();
- for (Argument & AI : NewG->args()) {
+ for (Argument & AI : H->args()) {
Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
++i;
}
CallInst *CI = Builder.CreateCall(F, Args);
+ ReturnInst *RI = nullptr;
CI->setTailCall();
CI->setCallingConv(F->getCallingConv());
CI->setAttributes(F->getAttributes());
- if (NewG->getReturnType()->isVoidTy()) {
- Builder.CreateRetVoid();
+ if (H->getReturnType()->isVoidTy()) {
+ RI = Builder.CreateRetVoid();
} else {
- Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
+ RI = Builder.CreateRet(createCast(Builder, CI, H->getReturnType()));
}
- NewG->copyAttributesFrom(G);
- NewG->takeName(G);
- removeUsers(G);
- G->replaceAllUsesWith(NewG);
- G->eraseFromParent();
+ if (MergeFunctionsPDI) {
+ DISubprogram *DIS = G->getSubprogram();
+ if (DIS) {
+ DebugLoc CIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS);
+ DebugLoc RIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS);
+ CI->setDebugLoc(CIDbgLoc);
+ RI->setDebugLoc(RIDbgLoc);
+ } else {
+ DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) No DISubprogram for "
+ << G->getName() << "()\n");
+ }
+ eraseTail(G);
+ eraseInstsUnrelatedToPDI(PDIUnrelatedWL);
+ DEBUG(dbgs() << "} // End of parameter related debug info filtering for: "
+ << G->getName() << "()\n");
+ } else {
+ NewG->copyAttributesFrom(G);
+ NewG->takeName(G);
+ removeUsers(G);
+ G->replaceAllUsesWith(NewG);
+ G->eraseFromParent();
+ }
- DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+ DEBUG(dbgs() << "writeThunk: " << H->getName() << '\n');
++NumThunksWritten;
}
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 7ef3fc1fc2a7..a2f6e5639d9d 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -33,7 +33,7 @@ STATISTIC(NumPartialInlined, "Number of functions partially inlined");
namespace {
struct PartialInlinerImpl {
- PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(IFI) {}
+ PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {}
bool run(Module &M);
Function *unswitchFunction(Function *F);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 941efb210d1c..f11b58d1adc4 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -93,10 +93,6 @@ static cl::opt<CFLAAType>
clEnumValN(CFLAAType::Both, "both",
"Enable both variants of CFL-AA")));
-static cl::opt<bool>
-EnableMLSM("mlsm", cl::init(true), cl::Hidden,
- cl::desc("Enable motion of merged load and store"));
-
static cl::opt<bool> EnableLoopInterchange(
"enable-loopinterchange", cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental LoopInterchange Pass"));
@@ -141,8 +137,8 @@ static cl::opt<int> PreInlineThreshold(
"(default = 75)"));
static cl::opt<bool> EnableGVNHoist(
- "enable-gvn-hoist", cl::init(false), cl::Hidden,
- cl::desc("Enable the GVN hoisting pass"));
+ "enable-gvn-hoist", cl::init(true), cl::Hidden,
+ cl::desc("Enable the GVN hoisting pass (default = on)"));
static cl::opt<bool>
DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
@@ -172,6 +168,7 @@ PassManagerBuilder::PassManagerBuilder() {
PGOInstrUse = RunPGOInstrUse;
PrepareForThinLTO = EnablePrepareForThinLTO;
PerformThinLTO = false;
+ DivergentTarget = false;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -248,8 +245,6 @@ void PassManagerBuilder::populateFunctionPassManager(
FPM.add(createCFGSimplificationPass());
FPM.add(createSROAPass());
FPM.add(createEarlyCSEPass());
- if(EnableGVNHoist)
- FPM.add(createGVNHoistPass());
FPM.add(createLowerExpectIntrinsicPass());
}
@@ -294,6 +289,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Break up aggregate allocas, using SSAUpdater.
MPM.add(createSROAPass());
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ if (EnableGVNHoist)
+ MPM.add(createGVNHoistPass());
// Speculative execution if the target has divergent branches; otherwise nop.
MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
MPM.add(createJumpThreadingPass()); // Thread jumps.
@@ -305,29 +302,34 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLibCallsShrinkWrapPass());
addExtensionsToPM(EP_Peephole, MPM);
+ // Optimize memory intrinsic calls based on the profiled size information.
+ if (SizeLevel == 0)
+ MPM.add(createPGOMemOPSizeOptLegacyPass());
+
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
MPM.add(createLICMPass()); // Hoist loop invariants
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
+ addExtensionsToPM(EP_LateLoopOptimizations, MPM);
MPM.add(createLoopDeletionPass()); // Delete dead loops
+
if (EnableLoopInterchange) {
MPM.add(createLoopInterchangePass()); // Interchange loops
MPM.add(createCFGSimplificationPass());
}
if (!DisableUnrollLoops)
- MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
+ MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
if (OptLevel > 1) {
- if (EnableMLSM)
- MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
+ MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
MPM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
}
@@ -369,7 +371,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// BBVectorize may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
+ MPM.add(createLoopUnrollPass(OptLevel));
}
}
@@ -434,7 +436,16 @@ void PassManagerBuilder::populateModulePassManager(
// earlier in the pass pipeline, here before globalopt. Otherwise imported
// available_externally functions look unreferenced and are removed.
if (PerformThinLTO)
- MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true));
+ MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true,
+ !PGOSampleUse.empty()));
+
+ // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops
+ // as it will change the CFG too much to make the 2nd profile annotation
+ // in backend more difficult.
+ bool PrepareForThinLTOUsingPGOSampleProfile =
+ PrepareForThinLTO && !PGOSampleUse.empty();
+ if (PrepareForThinLTOUsingPGOSampleProfile)
+ DisableUnrollLoops = true;
if (!DisableUnitAtATime) {
// Infer attributes about declarations if possible.
@@ -454,14 +465,18 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
}
- if (!PerformThinLTO) {
+ // For SamplePGO in ThinLTO compile phase, we do not want to do indirect
+ // call promotion as it will change the CFG too much to make the 2nd
+ // profile annotation in backend more difficult.
+ if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) {
/// PGO instrumentation is added during the compile phase for ThinLTO, do
/// not run it a second time
addPGOInstrPasses(MPM);
// Indirect call promotion that promotes intra-module targets only.
// For ThinLTO this is done earlier due to interactions with globalopt
// for imported functions.
- MPM.add(createPGOIndirectCallPromotionLegacyPass());
+ MPM.add(
+ createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
}
if (EnableNonLTOGlobalsModRef)
@@ -589,7 +604,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createCorrelatedValuePropagationPass());
addInstructionCombiningPass(MPM);
MPM.add(createLICMPass());
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
}
@@ -615,16 +630,16 @@ void PassManagerBuilder::populateModulePassManager(
// BBVectorize may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
+ MPM.add(createLoopUnrollPass(OptLevel));
}
}
addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createLateCFGSimplificationPass()); // Switches to lookup tables
addInstructionCombiningPass(MPM);
if (!DisableUnrollLoops) {
- MPM.add(createLoopUnrollPass()); // Unroll small loops
+ MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops
// LoopUnroll may generate some redundency to cleanup.
addInstructionCombiningPass(MPM);
@@ -684,7 +699,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// left by the earlier promotion pass that promotes intra-module targets.
// This two-step promotion is to save the compile time. For LTO, it should
// produce the same result as if we only do promotion here.
- PM.add(createPGOIndirectCallPromotionLegacyPass(true));
+ PM.add(
+ createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
@@ -703,7 +719,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createGlobalSplitPass());
// Apply whole-program devirtualization and virtual constant propagation.
- PM.add(createWholeProgramDevirtPass());
+ PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
// That's all we need at opt level 1.
if (OptLevel == 1)
@@ -759,8 +775,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
- if (EnableMLSM)
- PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
+ PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
@@ -775,11 +790,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createLoopInterchangePass());
if (!DisableUnrollLoops)
- PM.add(createSimpleLoopUnrollPass()); // Unroll small loops
+ PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
PM.add(createLoopVectorizePass(true, LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
- PM.add(createLoopUnrollPass());
+ PM.add(createLoopUnrollPass(OptLevel));
// Now that we've optimized loops (in particular loop induction variables),
// we may have exposed more scalar opportunities. Run parts of the scalar
@@ -833,6 +848,23 @@ void PassManagerBuilder::populateThinLTOPassManager(
if (VerifyInput)
PM.add(createVerifierPass());
+ if (ImportSummary) {
+ // These passes import type identifier resolutions for whole-program
+ // devirtualization and CFI. They must run early because other passes may
+ // disturb the specific instruction patterns that these passes look for,
+ // creating dependencies on resolutions that may not appear in the summary.
+ //
+ // For example, GVN may transform the pattern assume(type.test) appearing in
+ // two basic blocks into assume(phi(type.test, type.test)), which would
+ // transform a dependency on a WPD resolution into a dependency on a type
+ // identifier resolution for CFI.
+ //
+ // Also, WPD has access to more precise information than ICP and can
+ // devirtualize more effectively, so it should operate on the IR first.
+ PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary));
+ PM.add(createLowerTypeTestsPass(nullptr, ImportSummary));
+ }
+
populateModulePassManager(PM);
if (VerifyOutput)
@@ -857,8 +889,7 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
// Lower type metadata and the type.test intrinsic. This pass supports Clang's
// control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
// link time if CFI is enabled. The pass does nothing if CFI is disabled.
- PM.add(createLowerTypeTestsPass(LowerTypeTestsSummaryAction::None,
- /*Summary=*/nullptr));
+ PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
if (OptLevel != 0)
addLateLTOOptimizationPasses(PM);
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 6a43f8dbac48..3371de6e3d14 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -43,6 +44,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -50,6 +52,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <cctype>
@@ -159,8 +162,11 @@ protected:
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
+ std::vector<const FunctionSamples *>
+ findIndirectCallFunctionSamples(const Instruction &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- bool inlineHotFunctions(Function &F);
+ bool inlineHotFunctions(Function &F,
+ DenseSet<GlobalValue::GUID> &ImportGUIDs);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -173,7 +179,7 @@ protected:
void buildEdges(Function &F);
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
- unsigned getOffset(unsigned L, unsigned H) const;
+ unsigned getOffset(const DILocation *DIL) const;
void clearFunctionData();
/// \brief Map basic blocks to their computed weights.
@@ -326,11 +332,12 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
// If there are inlined callsites in this function, count the samples found
// in the respective bodies. However, do not bother counting callees with 0
// total samples, these are callees that were never invoked at runtime.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Count += countUsedRecords(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countUsedRecords(CalleeSamples);
+ }
return Count;
}
@@ -343,11 +350,12 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
unsigned Count = FS->getBodySamples().size();
// Only count records in hot callsites.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Count += countBodyRecords(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countBodyRecords(CalleeSamples);
+ }
return Count;
}
@@ -362,11 +370,12 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
Total += I.second.getSamples();
// Only count samples in hot callsites.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Total += countBodySamples(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Total += countBodySamples(CalleeSamples);
+ }
return Total;
}
@@ -398,15 +407,11 @@ void SampleProfileLoader::clearFunctionData() {
CoverageTracker.clear();
}
-/// \brief Returns the offset of lineno \p L to head_lineno \p H
-///
-/// \param L Lineno
-/// \param H Header lineno of the function
-///
-/// \returns offset to the header lineno. 16 bits are used to represent offset.
+/// Returns the line offset to the start line of the subprogram.
/// We assume that a single function will not exceed 65535 LOC.
-unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
- return (L - H) & 0xffff;
+unsigned SampleProfileLoader::getOffset(const DILocation *DIL) const {
+ return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
+ 0xffff;
}
/// \brief Print the weight of edge \p E on stream \p OS.
@@ -451,8 +456,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
/// \param Inst Instruction to query.
///
/// \returns the weight of \p Inst.
-ErrorOr<uint64_t>
-SampleProfileLoader::getInstWeight(const Instruction &Inst) {
+ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
const DebugLoc &DLoc = Inst.getDebugLoc();
if (!DLoc)
return std::error_code();
@@ -470,19 +474,14 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// If a call/invoke instruction is inlined in profile, but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
- bool IsCall = isa<CallInst>(Inst) || isa<InvokeInst>(Inst);
- if (IsCall && findCalleeFunctionSamples(Inst))
+ if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
+ findCalleeFunctionSamples(Inst))
return 0;
const DILocation *DIL = DLoc;
- unsigned Lineno = DLoc.getLine();
- unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
-
- uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
- uint32_t Discriminator = DIL->getDiscriminator();
- ErrorOr<uint64_t> R = IsCall
- ? FS->findCallSamplesAt(LineOffset, Discriminator)
- : FS->findSamplesAt(LineOffset, Discriminator);
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+ ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
if (R) {
bool FirstMark =
CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
@@ -491,13 +490,14 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
LLVMContext &Ctx = F->getContext();
emitOptimizationRemark(
Ctx, DEBUG_TYPE, *F, DLoc,
- Twine("Applied ") + Twine(*R) + " samples from profile (offset: " +
- Twine(LineOffset) +
+ Twine("Applied ") + Twine(*R) +
+ " samples from profile (offset: " + Twine(LineOffset) +
((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
}
- DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":"
- << Inst << " (line offset: " << Lineno - HeaderLineno << "."
- << DIL->getDiscriminator() << " - weight: " << R.get()
+ DEBUG(dbgs() << " " << DLoc.getLine() << "."
+ << DIL->getBaseDiscriminator() << ":" << Inst
+ << " (line offset: " << LineOffset << "."
+ << DIL->getBaseDiscriminator() << " - weight: " << R.get()
<< ")\n");
}
return R;
@@ -511,8 +511,7 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
/// \param BB The basic block to query.
///
/// \returns the weight for \p BB.
-ErrorOr<uint64_t>
-SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
+ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
uint64_t Max = 0;
bool HasWeight = false;
for (auto &I : BB->getInstList()) {
@@ -565,16 +564,49 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
if (!DIL) {
return nullptr;
}
- DISubprogram *SP = DIL->getScope()->getSubprogram();
- if (!SP)
- return nullptr;
+
+ StringRef CalleeName;
+ if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
+ if (Function *Callee = CI->getCalledFunction())
+ CalleeName = Callee->getName();
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return nullptr;
- return FS->findFunctionSamplesAt(LineLocation(
- getOffset(DIL->getLine(), SP->getLine()), DIL->getDiscriminator()));
+ return FS->findFunctionSamplesAt(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName);
+}
+
+/// Returns a vector of FunctionSamples that are the indirect call targets
+/// of \p Inst. The vector is sorted by the total number of samples.
+std::vector<const FunctionSamples *>
+SampleProfileLoader::findIndirectCallFunctionSamples(
+ const Instruction &Inst) const {
+ const DILocation *DIL = Inst.getDebugLoc();
+ std::vector<const FunctionSamples *> R;
+
+ if (!DIL) {
+ return R;
+ }
+
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (FS == nullptr)
+ return R;
+
+ if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) {
+ if (M->size() == 0)
+ return R;
+ for (const auto &NameFS : *M) {
+ R.push_back(&NameFS.second);
+ }
+ std::sort(R.begin(), R.end(),
+ [](const FunctionSamples *L, const FunctionSamples *R) {
+ return L->getTotalSamples() > R->getTotalSamples();
+ });
+ }
+ return R;
}
/// \brief Get the FunctionSamples for an instruction.
@@ -588,23 +620,23 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
/// \returns the FunctionSamples pointer to the inlined instance.
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
- SmallVector<LineLocation, 10> S;
+ SmallVector<std::pair<LineLocation, StringRef>, 10> S;
const DILocation *DIL = Inst.getDebugLoc();
- if (!DIL) {
+ if (!DIL)
return Samples;
- }
+
+ const DILocation *PrevDIL = DIL;
for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
- DISubprogram *SP = DIL->getScope()->getSubprogram();
- if (!SP)
- return nullptr;
- S.push_back(LineLocation(getOffset(DIL->getLine(), SP->getLine()),
- DIL->getDiscriminator()));
+ S.push_back(std::make_pair(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()),
+ PrevDIL->getScope()->getSubprogram()->getLinkageName()));
+ PrevDIL = DIL;
}
if (S.size() == 0)
return Samples;
const FunctionSamples *FS = Samples;
for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
- FS = FS->findFunctionSamplesAt(S[i]);
+ FS = FS->findFunctionSamplesAt(S[i].first, S[i].second);
}
return FS;
}
@@ -614,14 +646,17 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
/// Iteratively traverse all callsites of the function \p F, and find if
/// the corresponding inlined instance exists and is hot in profile. If
/// it is hot enough, inline the callsites and adds new callsites of the
-/// callee into the caller.
-///
-/// TODO: investigate the possibility of not invoking InlineFunction directly.
+/// callee into the caller. If the call is an indirect call, first promote
+/// it to direct call. Each indirect call is limited with a single target.
///
/// \param F function to perform iterative inlining.
+/// \param ImportGUIDs a set to be updated to include all GUIDs that come
+/// from a different module but inlined in the profiled binary.
///
/// \returns True if there is any inline happened.
-bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+bool SampleProfileLoader::inlineHotFunctions(
+ Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) {
+ DenseSet<Instruction *> PromotedInsns;
bool Changed = false;
LLVMContext &Ctx = F.getContext();
std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](
@@ -647,18 +682,42 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) {
}
for (auto I : CIS) {
InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
- CallSite CS(I);
- Function *CalledFunction = CS.getCalledFunction();
- if (!CalledFunction || !CalledFunction->getSubprogram())
+ Function *CalledFunction = CallSite(I).getCalledFunction();
+ Instruction *DI = I;
+ if (!CalledFunction && !PromotedInsns.count(I) &&
+ CallSite(I).isIndirectCall())
+ for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
+ auto CalleeFunctionName = FS->getName();
+ const char *Reason = "Callee function not available";
+ CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
+ if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
+ // The indirect target was promoted and inlined in the profile, as a
+ // result, we do not have profile info for the branch probability.
+ // We set the probability to 80% taken to indicate that the static
+ // call is likely taken.
+ DI = dyn_cast<Instruction>(
+ promoteIndirectCall(I, CalledFunction, 80, 100, false)
+ ->stripPointerCasts());
+ PromotedInsns.insert(I);
+ } else {
+ DEBUG(dbgs() << "\nFailed to promote indirect call to "
+ << CalleeFunctionName << " because " << Reason
+ << "\n");
+ continue;
+ }
+ }
+ if (!CalledFunction || !CalledFunction->getSubprogram()) {
+ findCalleeFunctionSamples(*I)->findImportedFunctions(
+ ImportGUIDs, F.getParent(),
+ Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
continue;
+ }
DebugLoc DLoc = I->getDebugLoc();
- uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples();
- if (InlineFunction(CS, IFI)) {
+ if (InlineFunction(CallSite(DI), IFI)) {
LocalChanged = true;
emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
Twine("inlined hot callee '") +
- CalledFunction->getName() + "' with " +
- Twine(NumSamples) + " samples into '" +
+ CalledFunction->getName() + "' into '" +
F.getName() + "'");
}
}
@@ -994,6 +1053,26 @@ void SampleProfileLoader::buildEdges(Function &F) {
}
}
+/// Sorts the CallTargetMap \p M by count in descending order and stores the
+/// sorted result in \p Sorted. Returns the total counts.
+static uint64_t SortCallTargets(SmallVector<InstrProfValueData, 2> &Sorted,
+ const SampleRecord::CallTargetMap &M) {
+ Sorted.clear();
+ uint64_t Sum = 0;
+ for (auto I = M.begin(); I != M.end(); ++I) {
+ Sum += I->getValue();
+ Sorted.push_back({Function::getGUID(I->getKey()), I->getValue()});
+ }
+ std::sort(Sorted.begin(), Sorted.end(),
+ [](const InstrProfValueData &L, const InstrProfValueData &R) {
+ if (L.Count == R.Count)
+ return L.Value > R.Value;
+ else
+ return L.Count > R.Count;
+ });
+ return Sum;
+}
+
/// \brief Propagate weights into edges
///
/// The following rules are applied to every block BB in the CFG:
@@ -1015,10 +1094,6 @@ void SampleProfileLoader::propagateWeights(Function &F) {
bool Changed = true;
unsigned I = 0;
- // Add an entry count to the function using the samples gathered
- // at the function entry.
- F.setEntryCount(Samples->getHeadSamples() + 1);
-
// If BB weight is larger than its corresponding loop's header BB weight,
// use the BB weight to replace the loop header BB weight.
for (auto &BI : F) {
@@ -1071,13 +1146,32 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (BlockWeights[BB]) {
for (auto &I : BB->getInstList()) {
- if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- if (!dyn_cast<IntrinsicInst>(&I)) {
- SmallVector<uint32_t, 1> Weights;
- Weights.push_back(BlockWeights[BB]);
- CI->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(Weights));
- }
+ if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
+ continue;
+ CallSite CS(&I);
+ if (!CS.getCalledFunction()) {
+ const DebugLoc &DLoc = I.getDebugLoc();
+ if (!DLoc)
+ continue;
+ const DILocation *DIL = DLoc;
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+
+ const FunctionSamples *FS = findFunctionSamples(I);
+ if (!FS)
+ continue;
+ auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
+ if (!T || T.get().size() == 0)
+ continue;
+ SmallVector<InstrProfValueData, 2> SortedCallTargets;
+ uint64_t Sum = SortCallTargets(SortedCallTargets, T.get());
+ annotateValueSite(*I.getParent()->getParent()->getParent(), I,
+ SortedCallTargets, Sum, IPVK_IndirectCallTarget,
+ SortedCallTargets.size());
+ } else if (!dyn_cast<IntrinsicInst>(&I)) {
+ SmallVector<uint32_t, 1> Weights;
+ Weights.push_back(BlockWeights[BB]);
+ I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
}
}
}
@@ -1115,9 +1209,13 @@ void SampleProfileLoader::propagateWeights(Function &F) {
}
}
+ uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
- if (MaxWeight > 0) {
+ // Do not set weights if the weights are present. In ThinLTO, the profile
+ // annotation is done twice. If the first annotation already set the
+ // weights, the second pass does not need to set it.
+ if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
@@ -1228,12 +1326,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
- Changed |= inlineHotFunctions(F);
+ DenseSet<GlobalValue::GUID> ImportGUIDs;
+ Changed |= inlineHotFunctions(F, ImportGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
if (Changed) {
+ // Add an entry count to the function using the samples gathered at the
+ // function entry. Also sets the GUIDs that comes from a different
+ // module but inlined in the profiled binary. This is aiming at making
+ // the IR match the profiled binary before annotation.
+ F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs);
+
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
@@ -1329,7 +1434,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
bool SampleProfileLoader::runOnFunction(Function &F) {
F.setEntryCount(0);
Samples = Reader->getSamplesFor(F);
- if (!Samples->empty())
+ if (Samples && !Samples->empty())
return emitAnnotations(F);
return false;
}
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 8f6f161428e8..fb64367eef91 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -323,6 +323,14 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
LiveGVs.insert(GVE);
}
+ std::set<DICompileUnit *> LiveCUs;
+ // Any CU referenced from a subprogram is live.
+ for (DISubprogram *SP : F.subprograms()) {
+ if (SP->getUnit())
+ LiveCUs.insert(SP->getUnit());
+ }
+
+ bool HasDeadCUs = false;
for (DICompileUnit *DIC : F.compile_units()) {
// Create our live global variable list.
bool GlobalVariableChange = false;
@@ -341,6 +349,11 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
GlobalVariableChange = true;
}
+ if (!LiveGlobalVariables.empty())
+ LiveCUs.insert(DIC);
+ else if (!LiveCUs.count(DIC))
+ HasDeadCUs = true;
+
// If we found dead global variables, replace the current global
// variable list with our new live global variable list.
if (GlobalVariableChange) {
@@ -352,5 +365,16 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
LiveGlobalVariables.clear();
}
+ if (HasDeadCUs) {
+ // Delete the old node and replace it with a new one
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
+ NMD->clearOperands();
+ if (!LiveCUs.empty()) {
+ for (DICompileUnit *CU : LiveCUs)
+ NMD->addOperand(CU);
+ }
+ Changed = true;
+ }
+
return Changed;
}
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 3680cfc813a1..65deb82cd2a5 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -14,16 +14,21 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/Cloning.h"
using namespace llvm;
@@ -41,23 +46,14 @@ namespace {
std::string getModuleId(Module *M) {
MD5 Md5;
bool ExportsSymbols = false;
- auto AddGlobal = [&](GlobalValue &GV) {
+ for (auto &GV : M->global_values()) {
if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
!GV.hasExternalLinkage())
- return;
+ continue;
ExportsSymbols = true;
Md5.update(GV.getName());
Md5.update(ArrayRef<uint8_t>{0});
- };
-
- for (auto &F : *M)
- AddGlobal(F);
- for (auto &GV : M->globals())
- AddGlobal(GV);
- for (auto &GA : M->aliases())
- AddGlobal(GA);
- for (auto &IF : M->ifuncs())
- AddGlobal(IF);
+ }
if (!ExportsSymbols)
return "";
@@ -73,15 +69,21 @@ std::string getModuleId(Module *M) {
// Promote each local-linkage entity defined by ExportM and used by ImportM by
// changing visibility and appending the given ModuleId.
void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
- auto PromoteInternal = [&](GlobalValue &ExportGV) {
+ DenseMap<const Comdat *, Comdat *> RenamedComdats;
+ for (auto &ExportGV : ExportM.global_values()) {
if (!ExportGV.hasLocalLinkage())
- return;
+ continue;
- GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
+ auto Name = ExportGV.getName();
+ GlobalValue *ImportGV = ImportM.getNamedValue(Name);
if (!ImportGV || ImportGV->use_empty())
- return;
+ continue;
+
+ std::string NewName = (Name + ModuleId).str();
- std::string NewName = (ExportGV.getName() + ModuleId).str();
+ if (const auto *C = ExportGV.getComdat())
+ if (C->getName() == Name)
+ RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));
ExportGV.setName(NewName);
ExportGV.setLinkage(GlobalValue::ExternalLinkage);
@@ -89,16 +91,15 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
ImportGV->setName(NewName);
ImportGV->setVisibility(GlobalValue::HiddenVisibility);
- };
+ }
- for (auto &F : ExportM)
- PromoteInternal(F);
- for (auto &GV : ExportM.globals())
- PromoteInternal(GV);
- for (auto &GA : ExportM.aliases())
- PromoteInternal(GA);
- for (auto &IF : ExportM.ifuncs())
- PromoteInternal(IF);
+ if (!RenamedComdats.empty())
+ for (auto &GO : ExportM.global_objects())
+ if (auto *C = GO.getComdat()) {
+ auto Replacement = RenamedComdats.find(C);
+ if (Replacement != RenamedComdats.end())
+ GO.setComdat(Replacement->second);
+ }
}
// Promote all internal (i.e. distinct) type ids used by the module by replacing
@@ -194,24 +195,7 @@ void simplifyExternals(Module &M) {
}
void filterModule(
- Module *M, std::function<bool(const GlobalValue *)> ShouldKeepDefinition) {
- for (Function &F : *M) {
- if (ShouldKeepDefinition(&F))
- continue;
-
- F.deleteBody();
- F.clearMetadata();
- }
-
- for (GlobalVariable &GV : M->globals()) {
- if (ShouldKeepDefinition(&GV))
- continue;
-
- GV.setInitializer(nullptr);
- GV.setLinkage(GlobalValue::ExternalLinkage);
- GV.clearMetadata();
- }
-
+ Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E;) {
GlobalAlias *GA = &*I++;
@@ -219,7 +203,7 @@ void filterModule(
continue;
GlobalObject *GO;
- if (I->getValueType()->isFunctionTy())
+ if (GA->getValueType()->isFunctionTy())
GO = Function::Create(cast<FunctionType>(GA->getValueType()),
GlobalValue::ExternalLinkage, "", M);
else
@@ -231,53 +215,168 @@ void filterModule(
GA->replaceAllUsesWith(GO);
GA->eraseFromParent();
}
+
+ for (Function &F : *M) {
+ if (ShouldKeepDefinition(&F))
+ continue;
+
+ F.deleteBody();
+ F.setComdat(nullptr);
+ F.clearMetadata();
+ }
+
+ for (GlobalVariable &GV : M->globals()) {
+ if (ShouldKeepDefinition(&GV))
+ continue;
+
+ GV.setInitializer(nullptr);
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ GV.setComdat(nullptr);
+ GV.clearMetadata();
+ }
+}
+
+void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
+ if (auto *F = dyn_cast<Function>(C))
+ return Fn(F);
+ if (isa<GlobalValue>(C))
+ return;
+ for (Value *Op : C->operands())
+ forEachVirtualFunction(cast<Constant>(Op), Fn);
}
// If it's possible to split M into regular and thin LTO parts, do so and write
// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
// regular LTO bitcode file to OS.
-void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) {
+void splitAndWriteThinLTOBitcode(
+ raw_ostream &OS, raw_ostream *ThinLinkOS,
+ function_ref<AAResults &(Function &)> AARGetter, Module &M) {
std::string ModuleId = getModuleId(&M);
if (ModuleId.empty()) {
// We couldn't generate a module ID for this module, just write it out as a
// regular LTO module.
WriteBitcodeToFile(&M, OS);
+ if (ThinLinkOS)
+ // We don't have a ThinLTO part, but still write the module to the
+ // ThinLinkOS if requested so that the expected output file is produced.
+ WriteBitcodeToFile(&M, *ThinLinkOS);
return;
}
promoteTypeIds(M, ModuleId);
- auto IsInMergedM = [&](const GlobalValue *GV) {
- auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject());
- if (!GVar)
- return false;
-
+ // Returns whether a global has attached type metadata. Such globals may
+ // participate in CFI or whole-program devirtualization, so they need to
+ // appear in the merged module instead of the thin LTO module.
+ auto HasTypeMetadata = [&](const GlobalObject *GO) {
SmallVector<MDNode *, 1> MDs;
- GVar->getMetadata(LLVMContext::MD_type, MDs);
+ GO->getMetadata(LLVMContext::MD_type, MDs);
return !MDs.empty();
};
+ // Collect the set of virtual functions that are eligible for virtual constant
+ // propagation. Each eligible function must not access memory, must return
+ // an integer of width <=64 bits, must take at least one argument, must not
+ // use its first argument (assumed to be "this") and all arguments other than
+ // the first one must be of <=64 bit integer type.
+ //
+ // Note that we test whether this copy of the function is readnone, rather
+ // than testing function attributes, which must hold for any copy of the
+ // function, even a less optimized version substituted at link time. This is
+ // sound because the virtual constant propagation optimizations effectively
+ // inline all implementations of the virtual function into each call site,
+ // rather than using function attributes to perform local optimization.
+ std::set<const Function *> EligibleVirtualFns;
+ // If any member of a comdat lives in MergedM, put all members of that
+ // comdat in MergedM to keep the comdat together.
+ DenseSet<const Comdat *> MergedMComdats;
+ for (GlobalVariable &GV : M.globals())
+ if (HasTypeMetadata(&GV)) {
+ if (const auto *C = GV.getComdat())
+ MergedMComdats.insert(C);
+ forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
+ auto *RT = dyn_cast<IntegerType>(F->getReturnType());
+ if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
+ !F->arg_begin()->use_empty())
+ return;
+ for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
+ auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
+ if (!ArgT || ArgT->getBitWidth() > 64)
+ return;
+ }
+ if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
+ EligibleVirtualFns.insert(F);
+ });
+ }
+
ValueToValueMapTy VMap;
- std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM));
+ std::unique_ptr<Module> MergedM(
+ CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
+ if (const auto *C = GV->getComdat())
+ if (MergedMComdats.count(C))
+ return true;
+ if (auto *F = dyn_cast<Function>(GV))
+ return EligibleVirtualFns.count(F);
+ if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ return HasTypeMetadata(GVar);
+ return false;
+ }));
+ StripDebugInfo(*MergedM);
+
+ for (Function &F : *MergedM)
+ if (!F.isDeclaration()) {
+ // Reset the linkage of all functions eligible for virtual constant
+ // propagation. The canonical definitions live in the thin LTO module so
+ // that they can be imported.
+ F.setLinkage(GlobalValue::AvailableExternallyLinkage);
+ F.setComdat(nullptr);
+ }
- filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); });
+ // Remove all globals with type metadata, globals with comdats that live in
+ // MergedM, and aliases pointing to such globals from the thin LTO module.
+ filterModule(&M, [&](const GlobalValue *GV) {
+ if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ if (HasTypeMetadata(GVar))
+ return false;
+ if (const auto *C = GV->getComdat())
+ if (MergedMComdats.count(C))
+ return false;
+ return true;
+ });
promoteInternals(*MergedM, M, ModuleId);
promoteInternals(M, *MergedM, ModuleId);
simplifyExternals(*MergedM);
- SmallVector<char, 0> Buffer;
- BitcodeWriter W(Buffer);
// FIXME: Try to re-use BSI and PFI from the original module here.
ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
- W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
- /*GenerateHash=*/true);
- W.writeModule(MergedM.get());
+ SmallVector<char, 0> Buffer;
+ BitcodeWriter W(Buffer);
+ // Save the module hash produced for the full bitcode, which will
+ // be used in the backends, and use that in the minimized bitcode
+ // produced for the full link.
+ ModuleHash ModHash = {{0}};
+ W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
+ /*GenerateHash=*/true, &ModHash);
+ W.writeModule(MergedM.get());
OS << Buffer;
+
+ // If a minimized bitcode module was requested for the thin link,
+ // strip the debug info (the merged module was already stripped above)
+ // and write it to the given OS.
+ if (ThinLinkOS) {
+ Buffer.clear();
+ BitcodeWriter W2(Buffer);
+ StripDebugInfo(M);
+ W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
+ /*GenerateHash=*/false, &ModHash);
+ W2.writeModule(MergedM.get());
+ *ThinLinkOS << Buffer;
+ }
}
// Returns whether this module needs to be split because it uses type metadata.
@@ -292,28 +391,45 @@ bool requiresSplit(Module &M) {
return false;
}
-void writeThinLTOBitcode(raw_ostream &OS, Module &M,
- const ModuleSummaryIndex *Index) {
+void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
+ function_ref<AAResults &(Function &)> AARGetter,
+ Module &M, const ModuleSummaryIndex *Index) {
// See if this module has any type metadata. If so, we need to split it.
if (requiresSplit(M))
- return splitAndWriteThinLTOBitcode(OS, M);
+ return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
// Otherwise we can just write it out as a regular module.
+
+ // Save the module hash produced for the full bitcode, which will
+ // be used in the backends, and use that in the minimized bitcode
+ // produced for the full link.
+ ModuleHash ModHash = {{0}};
WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
- /*GenerateHash=*/true);
+ /*GenerateHash=*/true, &ModHash);
+ // If a minimized bitcode module was requested for the thin link,
+ // strip the debug info and write it to the given OS.
+ if (ThinLinkOS) {
+ StripDebugInfo(M);
+ WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
+ Index,
+ /*GenerateHash=*/false, &ModHash);
+ }
}
class WriteThinLTOBitcode : public ModulePass {
raw_ostream &OS; // raw_ostream to print on
+ // The output stream on which to emit a minimized module for use
+ // just in the thin link, if requested.
+ raw_ostream *ThinLinkOS;
public:
static char ID; // Pass identification, replacement for typeid
- WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
+ WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
}
- explicit WriteThinLTOBitcode(raw_ostream &o)
- : ModulePass(ID), OS(o) {
+ explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
+ : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
}
@@ -322,12 +438,14 @@ public:
bool runOnModule(Module &M) override {
const ModuleSummaryIndex *Index =
&(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
- writeThinLTOBitcode(OS, M, Index);
+ writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
return true;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ModuleSummaryIndexWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
} // anonymous namespace
@@ -335,10 +453,13 @@ public:
char WriteThinLTOBitcode::ID = 0;
INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
"Write ThinLTO Bitcode", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
"Write ThinLTO Bitcode", false, true)
-ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) {
- return new WriteThinLTOBitcode(Str);
+ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
+ raw_ostream *ThinLinkOS) {
+ return new WriteThinLTOBitcode(Str, ThinLinkOS);
}
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 844cc0f70eed..cb7d487b68b0 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -25,6 +25,20 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
+// This pass is intended to be used during the regular and thin LTO pipelines.
+// During regular LTO, the pass determines the best optimization for each
+// virtual call and applies the resolutions directly to virtual calls that are
+// eligible for virtual call optimization (i.e. calls that use either of the
+// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During
+// ThinLTO, the pass operates in two phases:
+// - Export phase: this is run during the thin link over a single merged module
+// that contains all vtables with !type metadata that participate in the link.
+// The pass computes a resolution for each virtual call and stores it in the
+// type identifier summary.
+// - Import phase: this is run during the thin backends over the individual
+// modules. The pass applies the resolutions previously computed during the
+// import phase to each eligible virtual call.
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -35,6 +49,8 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -54,12 +70,16 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include <algorithm>
#include <cstddef>
@@ -72,6 +92,26 @@ using namespace wholeprogramdevirt;
#define DEBUG_TYPE "wholeprogramdevirt"
+static cl::opt<PassSummaryAction> ClSummaryAction(
+ "wholeprogramdevirt-summary-action",
+ cl::desc("What to do with the summary when running this pass"),
+ cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
+ clEnumValN(PassSummaryAction::Import, "import",
+ "Import typeid resolutions from summary and globals"),
+ clEnumValN(PassSummaryAction::Export, "export",
+ "Export typeid resolutions to summary and globals")),
+ cl::Hidden);
+
+static cl::opt<std::string> ClReadSummary(
+ "wholeprogramdevirt-read-summary",
+ cl::desc("Read summary from given YAML file before running pass"),
+ cl::Hidden);
+
+static cl::opt<std::string> ClWriteSummary(
+ "wholeprogramdevirt-write-summary",
+ cl::desc("Write summary to given YAML file after running pass"),
+ cl::Hidden);
+
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
@@ -259,15 +299,92 @@ struct VirtualCallSite {
}
};
+// Call site information collected for a specific VTableSlot and possibly a list
+// of constant integer arguments. The grouping by arguments is handled by the
+// VTableSlotInfo class.
+struct CallSiteInfo {
+ /// The set of call sites for this slot. Used during regular LTO and the
+ /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
+ /// call sites that appear in the merged module itself); in each of these
+ /// cases we are directly operating on the call sites at the IR level.
+ std::vector<VirtualCallSite> CallSites;
+
+ // These fields are used during the export phase of ThinLTO and reflect
+ // information collected from function summaries.
+
+ /// Whether any function summary contains an llvm.assume(llvm.type.test) for
+ /// this slot.
+ bool SummaryHasTypeTestAssumeUsers;
+
+ /// CFI-specific: a vector containing the list of function summaries that use
+ /// the llvm.type.checked.load intrinsic and therefore will require
+ /// resolutions for llvm.type.test in order to implement CFI checks if
+ /// devirtualization was unsuccessful. If devirtualization was successful, the
+ /// pass will clear this vector by calling markDevirt(). If at the end of the
+ /// pass the vector is non-empty, we will need to add a use of llvm.type.test
+ /// to each of the function summaries in the vector.
+ std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
+
+ bool isExported() const {
+ return SummaryHasTypeTestAssumeUsers ||
+ !SummaryTypeCheckedLoadUsers.empty();
+ }
+
+ /// As explained in the comment for SummaryTypeCheckedLoadUsers.
+ void markDevirt() { SummaryTypeCheckedLoadUsers.clear(); }
+};
+
+// Call site information collected for a specific VTableSlot.
+struct VTableSlotInfo {
+ // The set of call sites which do not have all constant integer arguments
+ // (excluding "this").
+ CallSiteInfo CSInfo;
+
+ // The set of call sites with all constant integer arguments (excluding
+ // "this"), grouped by argument list.
+ std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
+
+ void addCallSite(Value *VTable, CallSite CS, unsigned *NumUnsafeUses);
+
+private:
+ CallSiteInfo &findCallSiteInfo(CallSite CS);
+};
+
+CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) {
+ std::vector<uint64_t> Args;
+ auto *CI = dyn_cast<IntegerType>(CS.getType());
+ if (!CI || CI->getBitWidth() > 64 || CS.arg_empty())
+ return CSInfo;
+ for (auto &&Arg : make_range(CS.arg_begin() + 1, CS.arg_end())) {
+ auto *CI = dyn_cast<ConstantInt>(Arg);
+ if (!CI || CI->getBitWidth() > 64)
+ return CSInfo;
+ Args.push_back(CI->getZExtValue());
+ }
+ return ConstCSInfo[Args];
+}
+
+void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS,
+ unsigned *NumUnsafeUses) {
+ findCallSiteInfo(CS).CallSites.push_back({VTable, CS, NumUnsafeUses});
+}
+
struct DevirtModule {
Module &M;
+ function_ref<AAResults &(Function &)> AARGetter;
+
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
+
IntegerType *Int8Ty;
PointerType *Int8PtrTy;
IntegerType *Int32Ty;
+ IntegerType *Int64Ty;
+ IntegerType *IntPtrTy;
bool RemarksEnabled;
- MapVector<VTableSlot, std::vector<VirtualCallSite>> CallSlots;
+ MapVector<VTableSlot, VTableSlotInfo> CallSlots;
// This map keeps track of the number of "unsafe" uses of a loaded function
// pointer. The key is the associated llvm.type.test intrinsic call generated
@@ -279,11 +396,18 @@ struct DevirtModule {
// true.
std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
- DevirtModule(Module &M)
- : M(M), Int8Ty(Type::getInt8Ty(M.getContext())),
+ DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
+ ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : M(M), AARGetter(AARGetter), ExportSummary(ExportSummary),
+ ImportSummary(ImportSummary), Int8Ty(Type::getInt8Ty(M.getContext())),
Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
Int32Ty(Type::getInt32Ty(M.getContext())),
- RemarksEnabled(areRemarksEnabled()) {}
+ Int64Ty(Type::getInt64Ty(M.getContext())),
+ IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
+ RemarksEnabled(areRemarksEnabled()) {
+ assert(!(ExportSummary && ImportSummary));
+ }
bool areRemarksEnabled();
@@ -298,57 +422,169 @@ struct DevirtModule {
tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
const std::set<TypeMemberInfo> &TypeMemberInfos,
uint64_t ByteOffset);
+
+ void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
+ bool &IsExported);
bool trySingleImplDevirt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites);
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res);
+
bool tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<ConstantInt *> Args);
- bool tryUniformRetValOpt(IntegerType *RetType,
- MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites);
+ ArrayRef<uint64_t> Args);
+
+ void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
+ uint64_t TheRetVal);
+ bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+ CallSiteInfo &CSInfo,
+ WholeProgramDevirtResolution::ByArg *Res);
+
+ // Returns the global symbol name that is used to export information about the
+ // given vtable slot and list of arguments.
+ std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name);
+
+ // This function is called during the export phase to create a symbol
+ // definition containing information about the given vtable slot and list of
+ // arguments.
+ void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
+ Constant *C);
+
+ // This function is called during the import phase to create a reference to
+ // the symbol definition created during the export phase.
+ Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, unsigned AbsWidth = 0);
+
+ void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
+ Constant *UniqueMemberAddr);
bool tryUniqueRetValOpt(unsigned BitWidth,
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites);
+ CallSiteInfo &CSInfo,
+ WholeProgramDevirtResolution::ByArg *Res,
+ VTableSlot Slot, ArrayRef<uint64_t> Args);
+
+ void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
+ Constant *Byte, Constant *Bit);
bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<VirtualCallSite> CallSites);
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res, VTableSlot Slot);
void rebuildGlobal(VTableBits &B);
+ // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
+ void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
+
+ // If we were able to eliminate all unsafe uses for a type checked load,
+ // eliminate the associated type tests by replacing them with true.
+ void removeRedundantTypeTests();
+
bool run();
+
+ // Lower the module using the action and summary passed as command line
+ // arguments. For testing purposes only.
+ static bool runForTesting(Module &M,
+ function_ref<AAResults &(Function &)> AARGetter);
};
struct WholeProgramDevirt : public ModulePass {
static char ID;
- WholeProgramDevirt() : ModulePass(ID) {
+ bool UseCommandLine = false;
+
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
+
+ WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) {
+ initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
+ }
+
+ WholeProgramDevirt(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : ModulePass(ID), ExportSummary(ExportSummary),
+ ImportSummary(ImportSummary) {
initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
+ if (UseCommandLine)
+ return DevirtModule::runForTesting(M, LegacyAARGetter(*this));
+ return DevirtModule(M, LegacyAARGetter(*this), ExportSummary, ImportSummary)
+ .run();
+ }
- return DevirtModule(M).run();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
} // end anonymous namespace
-INITIALIZE_PASS(WholeProgramDevirt, "wholeprogramdevirt",
- "Whole program devirtualization", false, false)
+INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt",
+ "Whole program devirtualization", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt",
+ "Whole program devirtualization", false, false)
char WholeProgramDevirt::ID = 0;
-ModulePass *llvm::createWholeProgramDevirtPass() {
- return new WholeProgramDevirt;
+ModulePass *
+llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary) {
+ return new WholeProgramDevirt(ExportSummary, ImportSummary);
}
PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
- ModuleAnalysisManager &) {
- if (!DevirtModule(M).run())
+ ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ return FAM.getResult<AAManager>(F);
+ };
+ if (!DevirtModule(M, AARGetter, nullptr, nullptr).run())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
+bool DevirtModule::runForTesting(
+ Module &M, function_ref<AAResults &(Function &)> AARGetter) {
+ ModuleSummaryIndex Summary;
+
+ // Handle the command-line summary arguments. This code is for testing
+ // purposes only, so we handle errors directly.
+ if (!ClReadSummary.empty()) {
+ ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
+ ": ");
+ auto ReadSummaryFile =
+ ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
+
+ yaml::Input In(ReadSummaryFile->getBuffer());
+ In >> Summary;
+ ExitOnErr(errorCodeToError(In.error()));
+ }
+
+ bool Changed =
+ DevirtModule(
+ M, AARGetter,
+ ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
+ ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
+ .run();
+
+ if (!ClWriteSummary.empty()) {
+ ExitOnError ExitOnErr(
+ "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
+ std::error_code EC;
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+ ExitOnErr(errorCodeToError(EC));
+
+ yaml::Output Out(OS);
+ Out << Summary;
+ }
+
+ return Changed;
+}
+
void DevirtModule::buildTypeIdentifierMap(
std::vector<VTableBits> &Bits,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
@@ -443,9 +679,31 @@ bool DevirtModule::tryFindVirtualCallTargets(
return !TargetsForSlot.empty();
}
+void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
+ Constant *TheFn, bool &IsExported) {
+ auto Apply = [&](CallSiteInfo &CSInfo) {
+ for (auto &&VCallSite : CSInfo.CallSites) {
+ if (RemarksEnabled)
+ VCallSite.emitRemark("single-impl", TheFn->getName());
+ VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
+ TheFn, VCallSite.CS.getCalledValue()->getType()));
+ // This use is no longer unsafe.
+ if (VCallSite.NumUnsafeUses)
+ --*VCallSite.NumUnsafeUses;
+ }
+ if (CSInfo.isExported()) {
+ IsExported = true;
+ CSInfo.markDevirt();
+ }
+ };
+ Apply(SlotInfo.CSInfo);
+ for (auto &P : SlotInfo.ConstCSInfo)
+ Apply(P.second);
+}
+
bool DevirtModule::trySingleImplDevirt(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites) {
+ VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) {
// See if the program contains a single implementation of this virtual
// function.
Function *TheFn = TargetsForSlot[0].Fn;
@@ -453,39 +711,51 @@ bool DevirtModule::trySingleImplDevirt(
if (TheFn != Target.Fn)
return false;
+ // If so, update each call site to call that implementation directly.
if (RemarksEnabled)
TargetsForSlot[0].WasDevirt = true;
- // If so, update each call site to call that implementation directly.
- for (auto &&VCallSite : CallSites) {
- if (RemarksEnabled)
- VCallSite.emitRemark("single-impl", TheFn->getName());
- VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
- TheFn, VCallSite.CS.getCalledValue()->getType()));
- // This use is no longer unsafe.
- if (VCallSite.NumUnsafeUses)
- --*VCallSite.NumUnsafeUses;
+
+ bool IsExported = false;
+ applySingleImplDevirt(SlotInfo, TheFn, IsExported);
+ if (!IsExported)
+ return false;
+
+ // If the only implementation has local linkage, we must promote to external
+ // to make it visible to thin LTO objects. We can only get here during the
+ // ThinLTO export phase.
+ if (TheFn->hasLocalLinkage()) {
+ TheFn->setLinkage(GlobalValue::ExternalLinkage);
+ TheFn->setVisibility(GlobalValue::HiddenVisibility);
+ TheFn->setName(TheFn->getName() + "$merged");
}
+
+ Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
+ Res->SingleImplName = TheFn->getName();
+
return true;
}
bool DevirtModule::tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<ConstantInt *> Args) {
+ ArrayRef<uint64_t> Args) {
// Evaluate each function and store the result in each target's RetVal
// field.
for (VirtualCallTarget &Target : TargetsForSlot) {
if (Target.Fn->arg_size() != Args.size() + 1)
return false;
- for (unsigned I = 0; I != Args.size(); ++I)
- if (Target.Fn->getFunctionType()->getParamType(I + 1) !=
- Args[I]->getType())
- return false;
Evaluator Eval(M.getDataLayout(), nullptr);
SmallVector<Constant *, 2> EvalArgs;
EvalArgs.push_back(
Constant::getNullValue(Target.Fn->getFunctionType()->getParamType(0)));
- EvalArgs.insert(EvalArgs.end(), Args.begin(), Args.end());
+ for (unsigned I = 0; I != Args.size(); ++I) {
+ auto *ArgTy = dyn_cast<IntegerType>(
+ Target.Fn->getFunctionType()->getParamType(I + 1));
+ if (!ArgTy)
+ return false;
+ EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
+ }
+
Constant *RetVal;
if (!Eval.EvaluateFunction(Target.Fn, RetVal, EvalArgs) ||
!isa<ConstantInt>(RetVal))
@@ -495,9 +765,18 @@ bool DevirtModule::tryEvaluateFunctionsWithArgs(
return true;
}
+void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
+ uint64_t TheRetVal) {
+ for (auto Call : CSInfo.CallSites)
+ Call.replaceAndErase(
+ "uniform-ret-val", FnName, RemarksEnabled,
+ ConstantInt::get(cast<IntegerType>(Call.CS.getType()), TheRetVal));
+ CSInfo.markDevirt();
+}
+
bool DevirtModule::tryUniformRetValOpt(
- IntegerType *RetType, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites) {
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
+ WholeProgramDevirtResolution::ByArg *Res) {
// Uniform return value optimization. If all functions return the same
// constant, replace all calls with that constant.
uint64_t TheRetVal = TargetsForSlot[0].RetVal;
@@ -505,19 +784,77 @@ bool DevirtModule::tryUniformRetValOpt(
if (Target.RetVal != TheRetVal)
return false;
- auto TheRetValConst = ConstantInt::get(RetType, TheRetVal);
- for (auto Call : CallSites)
- Call.replaceAndErase("uniform-ret-val", TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, TheRetValConst);
+ if (CSInfo.isExported()) {
+ Res->TheKind = WholeProgramDevirtResolution::ByArg::UniformRetVal;
+ Res->Info = TheRetVal;
+ }
+
+ applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
if (RemarksEnabled)
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
return true;
}
+std::string DevirtModule::getGlobalName(VTableSlot Slot,
+ ArrayRef<uint64_t> Args,
+ StringRef Name) {
+ std::string FullName = "__typeid_";
+ raw_string_ostream OS(FullName);
+ OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
+ for (uint64_t Arg : Args)
+ OS << '_' << Arg;
+ OS << '_' << Name;
+ return OS.str();
+}
+
+void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, Constant *C) {
+ GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
+ getGlobalName(Slot, Args, Name), C, &M);
+ GA->setVisibility(GlobalValue::HiddenVisibility);
+}
+
+Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, unsigned AbsWidth) {
+ Constant *C = M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Ty);
+ auto *GV = dyn_cast<GlobalVariable>(C);
+ // We only need to set metadata if the global is newly created, in which
+ // case it would not have hidden visibility.
+ if (!GV || GV->getVisibility() == GlobalValue::HiddenVisibility)
+ return C;
+
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
+ auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
+ auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
+ GV->setMetadata(LLVMContext::MD_absolute_symbol,
+ MDNode::get(M.getContext(), {MinC, MaxC}));
+ };
+ if (AbsWidth == IntPtrTy->getBitWidth())
+ SetAbsRange(~0ull, ~0ull); // Full set.
+ else if (AbsWidth)
+ SetAbsRange(0, 1ull << AbsWidth);
+ return GV;
+}
+
+void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
+ bool IsOne,
+ Constant *UniqueMemberAddr) {
+ for (auto &&Call : CSInfo.CallSites) {
+ IRBuilder<> B(Call.CS.getInstruction());
+ Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
+ Call.VTable, UniqueMemberAddr);
+ Cmp = B.CreateZExt(Cmp, Call.CS->getType());
+ Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, Cmp);
+ }
+ CSInfo.markDevirt();
+}
+
bool DevirtModule::tryUniqueRetValOpt(
unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites) {
+ CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
+ VTableSlot Slot, ArrayRef<uint64_t> Args) {
// IsOne controls whether we look for a 0 or a 1.
auto tryUniqueRetValOptFor = [&](bool IsOne) {
const TypeMemberInfo *UniqueMember = nullptr;
@@ -533,16 +870,23 @@ bool DevirtModule::tryUniqueRetValOpt(
// checked for a uniform return value in tryUniformRetValOpt.
assert(UniqueMember);
- // Replace each call with the comparison.
- for (auto &&Call : CallSites) {
- IRBuilder<> B(Call.CS.getInstruction());
- Value *OneAddr = B.CreateBitCast(UniqueMember->Bits->GV, Int8PtrTy);
- OneAddr = B.CreateConstGEP1_64(OneAddr, UniqueMember->Offset);
- Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
- Call.VTable, OneAddr);
- Call.replaceAndErase("unique-ret-val", TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, Cmp);
+ Constant *UniqueMemberAddr =
+ ConstantExpr::getBitCast(UniqueMember->Bits->GV, Int8PtrTy);
+ UniqueMemberAddr = ConstantExpr::getGetElementPtr(
+ Int8Ty, UniqueMemberAddr,
+ ConstantInt::get(Int64Ty, UniqueMember->Offset));
+
+ if (CSInfo.isExported()) {
+ Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal;
+ Res->Info = IsOne;
+
+ exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
}
+
+ // Replace each call with the comparison.
+ applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
+ UniqueMemberAddr);
+
// Update devirtualization statistics for targets.
if (RemarksEnabled)
for (auto &&Target : TargetsForSlot)
@@ -560,9 +904,30 @@ bool DevirtModule::tryUniqueRetValOpt(
return false;
}
+void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
+ Constant *Byte, Constant *Bit) {
+ for (auto Call : CSInfo.CallSites) {
+ auto *RetType = cast<IntegerType>(Call.CS.getType());
+ IRBuilder<> B(Call.CS.getInstruction());
+ Value *Addr = B.CreateGEP(Int8Ty, Call.VTable, Byte);
+ if (RetType->getBitWidth() == 1) {
+ Value *Bits = B.CreateLoad(Addr);
+ Value *BitsAndBit = B.CreateAnd(Bits, Bit);
+ auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
+ Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
+ IsBitSet);
+ } else {
+ Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
+ Value *Val = B.CreateLoad(RetType, ValAddr);
+ Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled, Val);
+ }
+ }
+ CSInfo.markDevirt();
+}
+
bool DevirtModule::tryVirtualConstProp(
- MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<VirtualCallSite> CallSites) {
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res, VTableSlot Slot) {
// This only works if the function returns an integer.
auto RetType = dyn_cast<IntegerType>(TargetsForSlot[0].Fn->getReturnType());
if (!RetType)
@@ -571,55 +936,38 @@ bool DevirtModule::tryVirtualConstProp(
if (BitWidth > 64)
return false;
- // Make sure that each function does not access memory, takes at least one
- // argument, does not use its first argument (which we assume is 'this'),
- // and has the same return type.
+ // Make sure that each function is defined, does not access memory, takes at
+ // least one argument, does not use its first argument (which we assume is
+ // 'this'), and has the same return type.
+ //
+ // Note that we test whether this copy of the function is readnone, rather
+ // than testing function attributes, which must hold for any copy of the
+ // function, even a less optimized version substituted at link time. This is
+ // sound because the virtual constant propagation optimizations effectively
+ // inline all implementations of the virtual function into each call site,
+ // rather than using function attributes to perform local optimization.
for (VirtualCallTarget &Target : TargetsForSlot) {
- if (!Target.Fn->doesNotAccessMemory() || Target.Fn->arg_empty() ||
- !Target.Fn->arg_begin()->use_empty() ||
+ if (Target.Fn->isDeclaration() ||
+ computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) !=
+ MAK_ReadNone ||
+ Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() ||
Target.Fn->getReturnType() != RetType)
return false;
}
- // Group call sites by the list of constant arguments they pass.
- // The comparator ensures deterministic ordering.
- struct ByAPIntValue {
- bool operator()(const std::vector<ConstantInt *> &A,
- const std::vector<ConstantInt *> &B) const {
- return std::lexicographical_compare(
- A.begin(), A.end(), B.begin(), B.end(),
- [](ConstantInt *AI, ConstantInt *BI) {
- return AI->getValue().ult(BI->getValue());
- });
- }
- };
- std::map<std::vector<ConstantInt *>, std::vector<VirtualCallSite>,
- ByAPIntValue>
- VCallSitesByConstantArg;
- for (auto &&VCallSite : CallSites) {
- std::vector<ConstantInt *> Args;
- if (VCallSite.CS.getType() != RetType)
- continue;
- for (auto &&Arg :
- make_range(VCallSite.CS.arg_begin() + 1, VCallSite.CS.arg_end())) {
- if (!isa<ConstantInt>(Arg))
- break;
- Args.push_back(cast<ConstantInt>(&Arg));
- }
- if (Args.size() + 1 != VCallSite.CS.arg_size())
- continue;
-
- VCallSitesByConstantArg[Args].push_back(VCallSite);
- }
-
- for (auto &&CSByConstantArg : VCallSitesByConstantArg) {
+ for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
continue;
- if (tryUniformRetValOpt(RetType, TargetsForSlot, CSByConstantArg.second))
+ WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
+ if (Res)
+ ResByArg = &Res->ResByArg[CSByConstantArg.first];
+
+ if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
continue;
- if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second))
+ if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
+ ResByArg, Slot, CSByConstantArg.first))
continue;
// Find an allocation offset in bits in all vtables associated with the
@@ -659,26 +1007,20 @@ bool DevirtModule::tryVirtualConstProp(
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
- // Rewrite each call to a load from OffsetByte/OffsetBit.
- for (auto Call : CSByConstantArg.second) {
- IRBuilder<> B(Call.CS.getInstruction());
- Value *Addr = B.CreateConstGEP1_64(Call.VTable, OffsetByte);
- if (BitWidth == 1) {
- Value *Bits = B.CreateLoad(Addr);
- Value *Bit = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
- Value *BitsAndBit = B.CreateAnd(Bits, Bit);
- auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
- Call.replaceAndErase("virtual-const-prop-1-bit",
- TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, IsBitSet);
- } else {
- Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
- Value *Val = B.CreateLoad(RetType, ValAddr);
- Call.replaceAndErase("virtual-const-prop",
- TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, Val);
- }
+ Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
+ Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
+
+ if (CSByConstantArg.second.isExported()) {
+ ResByArg->TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp;
+ exportGlobal(Slot, CSByConstantArg.first, "byte",
+ ConstantExpr::getIntToPtr(ByteConst, Int8PtrTy));
+ exportGlobal(Slot, CSByConstantArg.first, "bit",
+ ConstantExpr::getIntToPtr(BitConst, Int8PtrTy));
}
+
+ // Rewrite each call to a load from OffsetByte/OffsetBit.
+ applyVirtualConstProp(CSByConstantArg.second,
+ TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
}
return true;
}
@@ -733,7 +1075,11 @@ bool DevirtModule::areRemarksEnabled() {
if (FL.empty())
return false;
const Function &Fn = FL.front();
- auto DI = OptimizationRemark(DEBUG_TYPE, Fn, DebugLoc(), "");
+
+ const auto &BBL = Fn.getBasicBlockList();
+ if (BBL.empty())
+ return false;
+ auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBL.front());
return DI.isEnabled();
}
@@ -766,8 +1112,8 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
if (SeenPtrs.insert(Ptr).second) {
for (DevirtCallSite Call : DevirtCalls) {
- CallSlots[{TypeId, Call.Offset}].push_back(
- {CI->getArgOperand(0), Call.CS, nullptr});
+ CallSlots[{TypeId, Call.Offset}].addCallSite(CI->getArgOperand(0),
+ Call.CS, nullptr);
}
}
}
@@ -853,14 +1199,79 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
if (HasNonCallUses)
++NumUnsafeUses;
for (DevirtCallSite Call : DevirtCalls) {
- CallSlots[{TypeId, Call.Offset}].push_back(
- {Ptr, Call.CS, &NumUnsafeUses});
+ CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS,
+ &NumUnsafeUses);
}
CI->eraseFromParent();
}
}
+void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
+ const TypeIdSummary *TidSummary =
+ ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString());
+ if (!TidSummary)
+ return;
+ auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
+ if (ResI == TidSummary->WPDRes.end())
+ return;
+ const WholeProgramDevirtResolution &Res = ResI->second;
+
+ if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
+ // The type of the function in the declaration is irrelevant because every
+ // call site will cast it to the correct type.
+ auto *SingleImpl = M.getOrInsertFunction(
+ Res.SingleImplName, Type::getVoidTy(M.getContext()));
+
+ // This is the import phase so we should not be exporting anything.
+ bool IsExported = false;
+ applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
+ assert(!IsExported);
+ }
+
+ for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
+ auto I = Res.ResByArg.find(CSByConstantArg.first);
+ if (I == Res.ResByArg.end())
+ continue;
+ auto &ResByArg = I->second;
+ // FIXME: We should figure out what to do about the "function name" argument
+ // to the apply* functions, as the function names are unavailable during the
+ // importing phase. For now we just pass the empty string. This does not
+ // impact correctness because the function names are just used for remarks.
+ switch (ResByArg.TheKind) {
+ case WholeProgramDevirtResolution::ByArg::UniformRetVal:
+ applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
+ break;
+ case WholeProgramDevirtResolution::ByArg::UniqueRetVal: {
+ Constant *UniqueMemberAddr =
+ importGlobal(Slot, CSByConstantArg.first, "unique_member");
+ applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
+ UniqueMemberAddr);
+ break;
+ }
+ case WholeProgramDevirtResolution::ByArg::VirtualConstProp: {
+ Constant *Byte = importGlobal(Slot, CSByConstantArg.first, "byte", 32);
+ Byte = ConstantExpr::getPtrToInt(Byte, Int32Ty);
+ Constant *Bit = importGlobal(Slot, CSByConstantArg.first, "bit", 8);
+ Bit = ConstantExpr::getPtrToInt(Bit, Int8Ty);
+ applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
+ }
+ default:
+ break;
+ }
+ }
+}
+
+void DevirtModule::removeRedundantTypeTests() {
+ auto True = ConstantInt::getTrue(M.getContext());
+ for (auto &&U : NumUnsafeUsesForTypeTest) {
+ if (U.second == 0) {
+ U.first->replaceAllUsesWith(True);
+ U.first->eraseFromParent();
+ }
+ }
+}
+
bool DevirtModule::run() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
@@ -868,7 +1279,11 @@ bool DevirtModule::run() {
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
- if ((!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
+ // Normally if there are no users of the devirtualization intrinsics in the
+ // module, this pass has nothing to do. But if we are exporting, we also need
+ // to handle any users that appear only in the function summaries.
+ if (!ExportSummary &&
+ (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
AssumeFunc->use_empty()) &&
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
return false;
@@ -879,6 +1294,17 @@ bool DevirtModule::run() {
if (TypeCheckedLoadFunc)
scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
+ if (ImportSummary) {
+ for (auto &S : CallSlots)
+ importResolution(S.first, S.second);
+
+ removeRedundantTypeTests();
+
+ // The rest of the code is only necessary when exporting or during regular
+ // LTO, so we are done.
+ return true;
+ }
+
// Rebuild type metadata into a map for easy lookup.
std::vector<VTableBits> Bits;
DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
@@ -886,6 +1312,53 @@ bool DevirtModule::run() {
if (TypeIdMap.empty())
return true;
+ // Collect information from summary about which calls to try to devirtualize.
+ if (ExportSummary) {
+ DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
+ for (auto &P : TypeIdMap) {
+ if (auto *TypeId = dyn_cast<MDString>(P.first))
+ MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
+ TypeId);
+ }
+
+ for (auto &P : *ExportSummary) {
+ for (auto &S : P.second) {
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS)
+ continue;
+ // FIXME: Only add live functions.
+ for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VF.GUID]) {
+ CallSlots[{MD, VF.Offset}].CSInfo.SummaryHasTypeTestAssumeUsers =
+ true;
+ }
+ }
+ for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VF.GUID]) {
+ CallSlots[{MD, VF.Offset}]
+ .CSInfo.SummaryTypeCheckedLoadUsers.push_back(FS);
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_test_assume_const_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
+ CallSlots[{MD, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .SummaryHasTypeTestAssumeUsers = true;
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_checked_load_const_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
+ CallSlots[{MD, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .SummaryTypeCheckedLoadUsers.push_back(FS);
+ }
+ }
+ }
+ }
+ }
+
// For each (type, offset) pair:
bool DidVirtualConstProp = false;
std::map<std::string, Function*> DevirtTargets;
@@ -894,19 +1367,39 @@ bool DevirtModule::run() {
// function implementation at offset S.first.ByteOffset, and add to
// TargetsForSlot.
std::vector<VirtualCallTarget> TargetsForSlot;
- if (!tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
- S.first.ByteOffset))
- continue;
-
- if (!trySingleImplDevirt(TargetsForSlot, S.second) &&
- tryVirtualConstProp(TargetsForSlot, S.second))
+ if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
+ S.first.ByteOffset)) {
+ WholeProgramDevirtResolution *Res = nullptr;
+ if (ExportSummary && isa<MDString>(S.first.TypeID))
+ Res = &ExportSummary
+ ->getOrInsertTypeIdSummary(
+ cast<MDString>(S.first.TypeID)->getString())
+ .WPDRes[S.first.ByteOffset];
+
+ if (!trySingleImplDevirt(TargetsForSlot, S.second, Res) &&
+ tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first))
DidVirtualConstProp = true;
- // Collect functions devirtualized at least for one call site for stats.
- if (RemarksEnabled)
- for (const auto &T : TargetsForSlot)
- if (T.WasDevirt)
- DevirtTargets[T.Fn->getName()] = T.Fn;
+ // Collect functions devirtualized at least for one call site for stats.
+ if (RemarksEnabled)
+ for (const auto &T : TargetsForSlot)
+ if (T.WasDevirt)
+ DevirtTargets[T.Fn->getName()] = T.Fn;
+ }
+
+ // CFI-specific: if we are exporting and any llvm.type.checked.load
+ // intrinsics were *not* devirtualized, we need to add the resulting
+ // llvm.type.test intrinsics to the function summaries so that the
+ // LowerTypeTests pass will export them.
+ if (ExportSummary && isa<MDString>(S.first.TypeID)) {
+ auto GUID =
+ GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
+ for (auto FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
+ FS->addTypeTest(GUID);
+ for (auto &CCS : S.second.ConstCSInfo)
+ for (auto FS : CCS.second.SummaryTypeCheckedLoadUsers)
+ FS->addTypeTest(GUID);
+ }
}
if (RemarksEnabled) {
@@ -914,23 +1407,12 @@ bool DevirtModule::run() {
for (const auto &DT : DevirtTargets) {
Function *F = DT.second;
DISubprogram *SP = F->getSubprogram();
- DebugLoc DL = SP ? DebugLoc::get(SP->getScopeLine(), 0, SP) : DebugLoc();
- emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, DL,
+ emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, SP,
Twine("devirtualized ") + F->getName());
}
}
- // If we were able to eliminate all unsafe uses for a type checked load,
- // eliminate the type test by replacing it with true.
- if (TypeCheckedLoadFunc) {
- auto True = ConstantInt::getTrue(M.getContext());
- for (auto &&U : NumUnsafeUsesForTypeTest) {
- if (U.second == 0) {
- U.first->replaceAllUsesWith(True);
- U.first->eraseFromParent();
- }
- }
- }
+ removeRedundantTypeTests();
// Rebuild each global we touched as part of virtual constant propagation to
// include the before and after bytes.