aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-05-27 18:44:32 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-05-27 18:44:32 +0000
commit5a5ac124e1efaf208671f01c46edb15f29ed2a0b (patch)
treea6140557876943cdd800ee997c9317283394b22c /lib/Transforms/IPO
parentf03b5bed27d0d2eafd68562ce14f8b5e3f1f0801 (diff)
downloadsrc-5a5ac124e1efaf208671f01c46edb15f29ed2a0b.tar.gz
src-5a5ac124e1efaf208671f01c46edb15f29ed2a0b.zip
Vendor import of llvm trunk r238337:vendor/llvm/llvm-trunk-r238337
Notes
Notes: svn path=/vendor/llvm/dist/; revision=283625 svn path=/vendor/llvm/llvm-trunk-r238337/; revision=283626; tag=vendor/llvm/llvm-trunk-r238337
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp108
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt5
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp25
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp198
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp18
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp26
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp125
-rw-r--r--lib/Transforms/IPO/IPO.cpp3
-rw-r--r--lib/Transforms/IPO/Inliner.cpp143
-rw-r--r--lib/Transforms/IPO/LLVMBuild.txt2
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp2
-rw-r--r--lib/Transforms/IPO/LowerBitSets.cpp732
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp66
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp10
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp110
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp4
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp24
17 files changed, 1197 insertions, 404 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 328202293867..7b7672d0edfe 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -36,6 +36,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -69,16 +70,15 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override;
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) {
+ : CallGraphSCCPass(ID), maxElements(maxElements) {
initializeArgPromotionPass(*PassRegistry::getPassRegistry());
}
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
- const DataLayout *DL;
private:
- bool isDenselyPacked(Type *type);
+ bool isDenselyPacked(Type *type, const DataLayout &DL);
bool canPaddingBeAccessed(Argument *Arg);
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
@@ -90,7 +90,7 @@ namespace {
bool doInitialization(CallGraph &CG) override;
/// The maximum number of elements to expand, or 0 for unlimited.
unsigned maxElements;
- DenseMap<const Function *, DISubprogram> FunctionDIs;
+ DenseMap<const Function *, DISubprogram *> FunctionDIs;
};
}
@@ -109,9 +109,6 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
bool Changed = false, LocalChange;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
-
do { // Iterate until we stop promoting from this SCC.
LocalChange = false;
// Attempt to promote arguments from all functions in this SCC.
@@ -128,7 +125,7 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
}
/// \brief Checks if a type could have padding bytes.
-bool ArgPromotion::isDenselyPacked(Type *type) {
+bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) {
// There is no size information, so be conservative.
if (!type->isSized())
@@ -136,7 +133,7 @@ bool ArgPromotion::isDenselyPacked(Type *type) {
// If the alloc size is not equal to the storage size, then there are padding
// bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
- if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type))
+ if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
return false;
if (!isa<CompositeType>(type))
@@ -144,19 +141,20 @@ bool ArgPromotion::isDenselyPacked(Type *type) {
// For homogenous sequential types, check for padding within members.
if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
- return isa<PointerType>(seqTy) || isDenselyPacked(seqTy->getElementType());
+ return isa<PointerType>(seqTy) ||
+ isDenselyPacked(seqTy->getElementType(), DL);
// Check for padding within and between elements of a struct.
StructType *StructTy = cast<StructType>(type);
- const StructLayout *Layout = DL->getStructLayout(StructTy);
+ const StructLayout *Layout = DL.getStructLayout(StructTy);
uint64_t StartPos = 0;
for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
Type *ElTy = StructTy->getElementType(i);
- if (!isDenselyPacked(ElTy))
+ if (!isDenselyPacked(ElTy, DL))
return false;
if (StartPos != Layout->getElementOffsetInBits(i))
return false;
- StartPos += DL->getTypeAllocSizeInBits(ElTy);
+ StartPos += DL.getTypeAllocSizeInBits(ElTy);
}
return true;
@@ -210,6 +208,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// Make sure that it is local to this module.
if (!F || !F->hasLocalLinkage()) return nullptr;
+ // Don't promote arguments for variadic functions. Adding, removing, or
+ // changing non-pack parameters can change the classification of pack
+ // parameters. Frontends encode that classification at the call site in the
+ // IR, while in the callee the classification is determined dynamically based
+ // on the number of registers consumed so far.
+ if (F->isVarArg()) return nullptr;
+
// First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument*, 16> PointerArgs;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
@@ -230,12 +235,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
isSelfRecursive = true;
}
- // Don't promote arguments for variadic functions. Adding, removing, or
- // changing non-pack parameters can change the classification of pack
- // parameters. Frontends encode that classification at the call site in the
- // IR, while in the callee the classification is determined dynamically based
- // on the number of registers consumed so far.
- if (F->isVarArg()) return nullptr;
+ const DataLayout &DL = F->getParent()->getDataLayout();
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
@@ -250,8 +250,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// packed or if we can prove the padding bytes are never accessed. This does
// not apply to inalloca.
bool isSafeToPromote =
- PtrArg->hasByValAttr() &&
- (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg));
+ PtrArg->hasByValAttr() &&
+ (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
if (isSafeToPromote) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
@@ -310,9 +310,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCallersPassInValidPointerForArgument(Argument *Arg,
- const DataLayout *DL) {
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
Function *Callee = Arg->getParent();
+ const DataLayout &DL = Callee->getParent()->getDataLayout();
unsigned ArgNo = Arg->getArgNo();
@@ -322,7 +322,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg,
CallSite CS(U);
assert(CS && "Should only have direct calls!");
- if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL))
+ if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
return false;
}
return true;
@@ -430,7 +430,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL))
+ if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -561,8 +561,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// Now check every path from the entry block to the load for transparency.
// To do this, we perform a depth first search on the inverse CFG from the
// loading block.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
if (AA.canBasicBlockModify(*TranspBB, Loc))
return false;
@@ -587,7 +586,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
FunctionType *FTy = F->getFunctionType();
std::vector<Type*> Params;
- typedef std::set<IndicesVector> ScalarizeTable;
+ typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
// ScalarizedElements - If we are promoting a pointer that has elements
// accessed out of it, keep track of which elements are accessed so that we
@@ -624,8 +623,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Simple byval argument? Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- Params.push_back(STy->getElementType(i));
+ Params.insert(Params.end(), STy->element_begin(), STy->element_end());
++NumByValArgsPromoted;
} else if (!ArgsToPromote.count(I)) {
// Unchanged argument
@@ -648,7 +646,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ScalarizeTable &ArgIndices = ScalarizedElements[I];
for (User *U : I->users()) {
Instruction *UI = cast<Instruction>(U);
- assert(isa<LoadInst>(UI) || isa<GetElementPtrInst>(UI));
+ Type *SrcTy;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ SrcTy = L->getType();
+ else
+ SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
IndicesVector Indices;
Indices.reserve(UI->getNumOperands() - 1);
// Since loads will only have a single operand, and GEPs only a single
@@ -660,7 +662,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// GEPs with a single 0 index can be merged with direct loads
if (Indices.size() == 1 && Indices.front() == 0)
Indices.clear();
- ArgIndices.insert(Indices);
+ ArgIndices.insert(std::make_pair(SrcTy, Indices));
LoadInst *OrigLoad;
if (LoadInst *L = dyn_cast<LoadInst>(UI))
OrigLoad = L;
@@ -674,11 +676,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
// not allowed to dereference ->begin() if size() is 0
- Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI));
+ Params.push_back(GetElementPtrInst::getIndexedType(
+ cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+ SI->second));
assert(Params.back());
}
- if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
++NumArgumentsPromoted;
else
++NumAggregatesPromoted;
@@ -702,8 +706,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(F);
if (DI != FunctionDIs.end()) {
- DISubprogram SP = DI->second;
- SP.replaceFunction(NF);
+ DISubprogram *SP = DI->second;
+ SP->replaceFunction(NF);
// Ensure the map is updated so it can be reused on subsequent argument
// promotions of the same function.
FunctionDIs.erase(DI);
@@ -769,9 +773,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
- (*AI)->getName()+"."+utostr(i),
- Call);
+ Value *Idx = GetElementPtrInst::Create(
+ STy, *AI, Idxs, (*AI)->getName() + "." + utostr(i), Call);
// TODO: Tell AA about the new values?
Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
}
@@ -784,12 +787,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
Value *V = *AI;
- LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)];
- if (!SI->empty()) {
- Ops.reserve(SI->size());
+ LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)];
+ if (!SI->second.empty()) {
+ Ops.reserve(SI->second.size());
Type *ElTy = V->getType();
- for (IndicesVector::const_iterator II = SI->begin(),
- IE = SI->end(); II != IE; ++II) {
+ for (IndicesVector::const_iterator II = SI->second.begin(),
+ IE = SI->second.end();
+ II != IE; ++II) {
// Use i32 to index structs, and i64 for others (pointers/arrays).
// This satisfies GEP constraints.
Type *IdxTy = (ElTy->isStructTy() ?
@@ -800,7 +804,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
}
// And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call);
+ V = GetElementPtrInst::Create(SI->first, V, Ops,
+ V->getName() + ".idx", Call);
Ops.clear();
AA.copyValue(OrigLoad->getOperand(0), V);
}
@@ -858,7 +863,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Update the callgraph to know that the callsite has been transformed.
CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
- CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+ CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
if (!Call->use_empty()) {
Call->replaceAllUsesWith(New);
@@ -904,10 +909,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx =
- GetElementPtrInst::Create(TheAlloca, Idxs,
- TheAlloca->getName()+"."+Twine(i),
- InsertPt);
+ Value *Idx = GetElementPtrInst::Create(
+ AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
+ InsertPt);
I2->setName(I->getName()+"."+Twine(i));
new StoreInst(I2++, Idx, InsertPt);
}
@@ -940,7 +944,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
while (!I->use_empty()) {
if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
- assert(ArgIndices.begin()->empty() &&
+ assert(ArgIndices.begin()->second.empty() &&
"Load element should sort to front!");
I2->setName(I->getName()+".val");
LI->replaceAllUsesWith(I2);
@@ -962,7 +966,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Function::arg_iterator TheArg = I2;
for (ScalarizeTable::iterator It = ArgIndices.begin();
- *It != Operands; ++It, ++TheArg) {
+ It->second != Operands; ++It, ++TheArg) {
assert(It != ArgIndices.end() && "GEP not handled??");
}
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 90c1c33e6dca..3df17b920a95 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -14,12 +14,17 @@ add_llvm_library(LLVMipo
Inliner.cpp
Internalize.cpp
LoopExtractor.cpp
+ LowerBitSets.cpp
MergeFunctions.cpp
PartialInlining.cpp
PassManagerBuilder.cpp
PruneEH.cpp
StripDeadPrototypes.cpp
StripSymbols.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/IPO
)
add_dependencies(LLVMipo intrinsics_gen)
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 0b6ade9eb536..8ce7646621ff 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -52,7 +52,6 @@ namespace {
// alignment to a concrete value.
unsigned getAlignment(GlobalVariable *GV) const;
- const DataLayout *DL;
};
}
@@ -89,32 +88,22 @@ static bool IsBetterCanonical(const GlobalVariable &A,
return A.hasUnnamedAddr();
}
-bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
- return DL || GV->getAlignment() != 0;
-}
-
unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
unsigned Align = GV->getAlignment();
if (Align)
return Align;
- if (DL)
- return DL->getPreferredAlignment(GV);
- return 0;
+ return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
}
bool ConstantMerge::runOnModule(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
-
- // Map unique <constants, has-unknown-alignment> pairs to globals. We don't
- // want to merge globals of unknown alignment with those of explicit
- // alignment. If we have DataLayout, we always know the alignment.
- DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap;
+
+ // Map unique constants to globals.
+ DenseMap<Constant *, GlobalVariable *> CMap;
// Replacements - This vector contains a list of replacements to perform.
SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
@@ -156,8 +145,7 @@ bool ConstantMerge::runOnModule(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
- GlobalVariable *&Slot = CMap[Pair];
+ GlobalVariable *&Slot = CMap[Init];
// If this is the first constant we find or if the old one is local,
// replace with the current one. If the current is externally visible
@@ -188,8 +176,7 @@ bool ConstantMerge::runOnModule(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
- GlobalVariable *Slot = CMap[Pair];
+ GlobalVariable *Slot = CMap[Init];
if (!Slot || Slot == GV)
continue;
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4045c09aaa2b..76898f275058 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -73,8 +73,8 @@ namespace {
}
std::string getDescription() const {
- return std::string((IsArg ? "Argument #" : "Return value #"))
- + utostr(Idx) + " of function " + F->getName().str();
+ return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) +
+ " of function " + F->getName()).str();
}
};
@@ -127,7 +127,7 @@ namespace {
// As the code generation for module is finished (and DIBuilder is
// finalized) we assume that subprogram descriptors won't be changed, and
// they are stored in map for short duration anyway.
- DenseMap<const Function *, DISubprogram> FunctionDIs;
+ DenseMap<const Function *, DISubprogram *> FunctionDIs;
protected:
// DAH uses this to specify a different ID.
@@ -146,7 +146,7 @@ namespace {
private:
Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses,
- unsigned RetValNum = 0);
+ unsigned RetValNum = -1U);
Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
void SurveyFunction(const Function &F);
@@ -303,8 +303,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(&Fn);
if (DI != FunctionDIs.end()) {
- DISubprogram SP = DI->second;
- SP.replaceFunction(NF);
+ DISubprogram *SP = DI->second;
+ SP->replaceFunction(NF);
// Ensure the map is updated so it can be reused on non-varargs argument
// eliminations of the same function.
FunctionDIs.erase(DI);
@@ -387,14 +387,32 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
/// for void functions and 1 for functions not returning a struct. It returns
/// the number of struct elements for functions returning a struct.
static unsigned NumRetVals(const Function *F) {
- if (F->getReturnType()->isVoidTy())
+ Type *RetTy = F->getReturnType();
+ if (RetTy->isVoidTy())
return 0;
- else if (StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+ else if (StructType *STy = dyn_cast<StructType>(RetTy))
return STy->getNumElements();
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ return ATy->getNumElements();
else
return 1;
}
+/// Returns the sub-type a function will return at a given Idx. Should
+/// correspond to the result type of an ExtractValue instruction executed with
+/// just that one Idx (i.e. only top-level structure is considered).
+static Type *getRetComponentType(const Function *F, unsigned Idx) {
+ Type *RetTy = F->getReturnType();
+ assert(!RetTy->isVoidTy() && "void type has no subtype");
+
+ if (StructType *STy = dyn_cast<StructType>(RetTy))
+ return STy->getElementType(Idx);
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ return ATy->getElementType();
+ else
+ return RetTy;
+}
+
/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
/// liveness of Use.
@@ -425,9 +443,24 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
// function's return value is live. We use RetValNum here, for the case
// that U is really a use of an insertvalue instruction that uses the
// original Use.
- RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
- // We might be live, depending on the liveness of Use.
- return MarkIfNotLive(Use, MaybeLiveUses);
+ const Function *F = RI->getParent()->getParent();
+ if (RetValNum != -1U) {
+ RetOrArg Use = CreateRet(F, RetValNum);
+ // We might be live, depending on the liveness of Use.
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ } else {
+ DAE::Liveness Result = MaybeLive;
+ for (unsigned i = 0; i < NumRetVals(F); ++i) {
+ RetOrArg Use = CreateRet(F, i);
+ // We might be live, depending on the liveness of Use. If any
+ // sub-value is live, then the entire value is considered live. This
+ // is a conservative choice, and better tracking is possible.
+ DAE::Liveness SubResult = MarkIfNotLive(Use, MaybeLiveUses);
+ if (Result != Live)
+ Result = SubResult;
+ }
+ return Result;
+ }
}
if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex()
@@ -449,7 +482,7 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
return Result;
}
- if (ImmutableCallSite CS = V) {
+ if (auto CS = ImmutableCallSite(V)) {
const Function *F = CS.getCalledFunction();
if (F) {
// Used in a direct call.
@@ -541,7 +574,6 @@ void DAE::SurveyFunction(const Function &F) {
// Keep track of the number of live retvals, so we can skip checks once all
// of them turn out to be live.
unsigned NumLiveRetVals = 0;
- Type *STy = dyn_cast<StructType>(F.getReturnType());
// Loop all uses of the function.
for (const Use &U : F.uses()) {
// If the function is PASSED IN as an argument, its address has been
@@ -563,34 +595,35 @@ void DAE::SurveyFunction(const Function &F) {
// Now, check how our return value(s) is/are used in this caller. Don't
// bother checking return values if all of them are live already.
- if (NumLiveRetVals != RetCount) {
- if (STy) {
- // Check all uses of the return value.
- for (const User *U : TheCall->users()) {
- const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U);
- if (Ext && Ext->hasIndices()) {
- // This use uses a part of our return value, survey the uses of
- // that part and store the results for this index only.
- unsigned Idx = *Ext->idx_begin();
- if (RetValLiveness[Idx] != Live) {
- RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
- if (RetValLiveness[Idx] == Live)
- NumLiveRetVals++;
- }
- } else {
- // Used by something else than extractvalue. Mark all return
- // values as live.
- for (unsigned i = 0; i != RetCount; ++i )
- RetValLiveness[i] = Live;
- NumLiveRetVals = RetCount;
- break;
- }
+ if (NumLiveRetVals == RetCount)
+ continue;
+
+ // Check all uses of the return value.
+ for (const Use &U : TheCall->uses()) {
+ if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) {
+ // This use uses a part of our return value, survey the uses of
+ // that part and store the results for this index only.
+ unsigned Idx = *Ext->idx_begin();
+ if (RetValLiveness[Idx] != Live) {
+ RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+ if (RetValLiveness[Idx] == Live)
+ NumLiveRetVals++;
}
} else {
- // Single return value
- RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]);
- if (RetValLiveness[0] == Live)
+ // Used by something else than extractvalue. Survey, but assume that the
+ // result applies to all sub-values.
+ UseVector MaybeLiveAggregateUses;
+ if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) {
NumLiveRetVals = RetCount;
+ RetValLiveness.assign(RetCount, Live);
+ break;
+ } else {
+ for (unsigned i = 0; i != RetCount; ++i) {
+ if (RetValLiveness[i] != Live)
+ MaybeLiveRetUses[i].append(MaybeLiveAggregateUses.begin(),
+ MaybeLiveAggregateUses.end());
+ }
+ }
}
}
}
@@ -775,39 +808,29 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (RetTy->isVoidTy() || HasLiveReturnedArg) {
NRetTy = RetTy;
} else {
- StructType *STy = dyn_cast<StructType>(RetTy);
- if (STy)
- // Look at each of the original return values individually.
- for (unsigned i = 0; i != RetCount; ++i) {
- RetOrArg Ret = CreateRet(F, i);
- if (LiveValues.erase(Ret)) {
- RetTypes.push_back(STy->getElementType(i));
- NewRetIdxs[i] = RetTypes.size() - 1;
- } else {
- ++NumRetValsEliminated;
- DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
- << F->getName() << "\n");
- }
- }
- else
- // We used to return a single value.
- if (LiveValues.erase(CreateRet(F, 0))) {
- RetTypes.push_back(RetTy);
- NewRetIdxs[0] = 0;
+ // Look at each of the original return values individually.
+ for (unsigned i = 0; i != RetCount; ++i) {
+ RetOrArg Ret = CreateRet(F, i);
+ if (LiveValues.erase(Ret)) {
+ RetTypes.push_back(getRetComponentType(F, i));
+ NewRetIdxs[i] = RetTypes.size() - 1;
} else {
- DEBUG(dbgs() << "DAE - Removing return value from " << F->getName()
- << "\n");
++NumRetValsEliminated;
+ DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+ << F->getName() << "\n");
+ }
+ }
+ if (RetTypes.size() > 1) {
+ // More than one return type? Reduce it down to size.
+ if (StructType *STy = dyn_cast<StructType>(RetTy)) {
+ // Make the new struct packed if we used to return a packed struct
+ // already.
+ NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+ } else {
+ assert(isa<ArrayType>(RetTy) && "unexpected multi-value return");
+ NRetTy = ArrayType::get(RetTypes[0], RetTypes.size());
}
- if (RetTypes.size() > 1)
- // More than one return type? Return a struct with them. Also, if we used
- // to return a struct and didn't change the number of return values,
- // return a struct again. This prevents changing {something} into
- // something and {} into void.
- // Make the new struct packed if we used to return a packed struct
- // already.
- NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
- else if (RetTypes.size() == 1)
+ } else if (RetTypes.size() == 1)
// One return type? Just a simple value then, but only if we didn't use to
// return a struct with that simple value before.
NRetTy = RetTypes.front();
@@ -826,17 +849,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
if (NRetTy->isVoidTy())
- RAttrs =
- AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex,
- AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
- removeAttributes(AttributeFuncs::
- typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex));
+ RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NRetTy));
else
assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
- hasAttributes(AttributeFuncs::
- typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex) &&
+ overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
"Return attributes no longer compatible?");
if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
@@ -880,13 +898,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
AttributeSet RAttrs = CallPAL.getRetAttributes();
// Adjust in case the function was changed to return void.
- RAttrs =
- AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex,
- AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
- removeAttributes(AttributeFuncs::
- typeIncompatible(NF->getReturnType(),
- AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex));
+ RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NF->getReturnType()));
if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
@@ -959,9 +973,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (!Call->getType()->isX86_MMXTy())
Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
} else {
- assert(RetTy->isStructTy() &&
+ assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
"Return type changed, but not into a void. The old return type"
- " must have been a struct!");
+ " must have been a struct or an array!");
Instruction *InsertPt = Call;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
BasicBlock::iterator IP = II->getNormalDest()->begin();
@@ -969,9 +983,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
InsertPt = IP;
}
- // We used to return a struct. Instead of doing smart stuff with all the
- // uses of this struct, we will just rebuild it using
- // extract/insertvalue chaining and let instcombine clean that up.
+ // We used to return a struct or array. Instead of doing smart stuff
+ // with all the uses, we will just rebuild it using extract/insertvalue
+ // chaining and let instcombine clean that up.
//
// Start out building up our return value from undef
Value *RetVal = UndefValue::get(RetTy);
@@ -1034,8 +1048,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (NFTy->getReturnType()->isVoidTy()) {
RetVal = nullptr;
} else {
- assert (RetTy->isStructTy());
- // The original return value was a struct, insert
+ assert(RetTy->isStructTy() || RetTy->isArrayTy());
+ // The original return value was a struct or array, insert
// extractvalue/insertvalue chains to extract only the values we need
// to return and insert them into our new result.
// This does generate messy code, but we'll let it to instcombine to
@@ -1069,7 +1083,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(F);
if (DI != FunctionDIs.end())
- DI->second.replaceFunction(NF);
+ DI->second->replaceFunction(NF);
// Now that the old function is dead, delete it.
F->eraseFromParent();
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 823ae53f1e25..92e384a340a7 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -31,7 +31,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
#define DEBUG_TYPE "functionattrs"
@@ -124,7 +124,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -139,7 +139,7 @@ INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
@@ -703,10 +703,14 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
}
if (ReadAttr != Attribute::None) {
- AttrBuilder B;
+ AttrBuilder B, R;
B.addAttribute(ReadAttr);
+ R.addAttribute(Attribute::ReadOnly)
+ .addAttribute(Attribute::ReadNone);
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
+ // Clear out existing readonly/readnone attributes
+ A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R));
A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
Changed = true;
@@ -755,8 +759,8 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
}
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(RVI);
- for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- FlowsToReturn.insert(PN->getIncomingValue(i));
+ for (Value *IncValue : PN->incoming_values())
+ FlowsToReturn.insert(IncValue);
continue;
}
@@ -1702,7 +1706,7 @@ bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
AA = &getAnalysis<AliasAnalysis>();
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = annotateLibraryCalls(SCC);
Changed |= AddReadAttrs(SCC);
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 0c844fe70650..ba04c80508c4 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Pass.h"
+#include <unordered_map>
using namespace llvm;
#define DEBUG_TYPE "globaldce"
@@ -47,6 +48,7 @@ namespace {
private:
SmallPtrSet<GlobalValue*, 32> AliveGlobals;
SmallPtrSet<Constant *, 8> SeenConstants;
+ std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
/// GlobalIsNeeded - mark the specific global value as needed, and
/// recursively mark anything that it uses as also needed.
@@ -78,6 +80,17 @@ bool GlobalDCE::runOnModule(Module &M) {
// Remove empty functions from the global ctors list.
Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+ // Collect the set of members for each comdat.
+ for (Function &F : M)
+ if (Comdat *C = F.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &F));
+ for (GlobalVariable &GV : M.globals())
+ if (Comdat *C = GV.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &GV));
+ for (GlobalAlias &GA : M.aliases())
+ if (Comdat *C = GA.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &GA));
+
// Loop over the module, adding globals which are obviously necessary.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
@@ -177,6 +190,7 @@ bool GlobalDCE::runOnModule(Module &M) {
// Make sure that all memory is released
AliveGlobals.clear();
SeenConstants.clear();
+ ComdatMembers.clear();
return Changed;
}
@@ -188,17 +202,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
if (!AliveGlobals.insert(G).second)
return;
- Module *M = G->getParent();
if (Comdat *C = G->getComdat()) {
- for (Function &F : *M)
- if (F.getComdat() == C)
- GlobalIsNeeded(&F);
- for (GlobalVariable &GV : M->globals())
- if (GV.getComdat() == C)
- GlobalIsNeeded(&GV);
- for (GlobalAlias &GA : M->aliases())
- if (GA.getComdat() == C)
- GlobalIsNeeded(&GA);
+ for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+ GlobalIsNeeded(CM.second);
}
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 6e0ae8347bc0..cc4a79fa67de 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -38,7 +39,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -68,7 +68,7 @@ STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
namespace {
struct GlobalOpt : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
GlobalOpt() : ModulePass(ID) {
@@ -86,7 +86,6 @@ namespace {
const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
- const DataLayout *DL;
TargetLibraryInfo *TLI;
SmallSet<const Comdat *, 8> NotDiscardableComdats;
};
@@ -95,7 +94,7 @@ namespace {
char GlobalOpt::ID = 0;
INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
@@ -269,7 +268,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
/// quick scan over the use list to clean up the easy and obvious cruft. This
/// returns true if it made a change.
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
bool Changed = false;
// Note that we need to use a weak value handle for the worklist items. When
@@ -318,8 +317,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// and will invalidate our notion of what Init is.
Constant *SubInit = nullptr;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
- ConstantExpr *CE =
- dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI));
+ ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
+ ConstantFoldInstruction(GEP, DL, TLI));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -565,6 +564,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
Value *NewPtr = NewGlobals[Val];
+ Type *NewTy = NewGlobals[Val]->getValueType();
// Form a shorter GEP if needed.
if (GEP->getNumOperands() > 3) {
@@ -573,15 +573,16 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
Idxs.push_back(NullInt);
for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
Idxs.push_back(CE->getOperand(i));
- NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
+ NewPtr =
+ ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
} else {
GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
SmallVector<Value*, 8> Idxs;
Idxs.push_back(NullInt);
for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
Idxs.push_back(GEPI->getOperand(i));
- NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
- GEPI->getName()+"."+Twine(Val),GEPI);
+ NewPtr = GetElementPtrInst::Create(
+ NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI);
}
}
GEP->replaceAllUsesWith(NewPtr);
@@ -721,8 +722,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
else
break;
if (Idxs.size() == GEPI->getNumOperands()-1)
- Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
- ConstantExpr::getGetElementPtr(NewV, Idxs));
+ Changed |= OptimizeAwayTrappingUsesOfValue(
+ GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
if (GEPI->use_empty()) {
Changed = true;
GEPI->eraseFromParent();
@@ -739,7 +740,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
/// if the loaded value is dynamically null, then we know that they cannot be
/// reachable with a null optimize away the load.
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
bool Changed = false;
@@ -802,7 +803,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
/// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
+static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
TargetLibraryInfo *TLI) {
for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
@@ -822,12 +823,10 @@ static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
/// the specified malloc. Because it is always the result of the specified
/// malloc, there is no reason to actually DO the malloc. Instead, turn the
/// malloc into a global, and any loads of GV as uses of the new global.
-static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
- CallInst *CI,
- Type *AllocTy,
- ConstantInt *NElements,
- const DataLayout *DL,
- TargetLibraryInfo *TLI) {
+static GlobalVariable *
+OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
+ ConstantInt *NElements, const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
Type *GlobalType;
@@ -1167,7 +1166,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
InsertedScalarizedValues,
PHIsToRewrite),
LI->getName()+".f"+Twine(FieldNo), LI);
- } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ } else {
+ PHINode *PN = cast<PHINode>(V);
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
@@ -1181,8 +1181,6 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
PN->getName()+".f"+Twine(FieldNo), PN);
Result = NewPN;
PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
- } else {
- llvm_unreachable("Unknown usable value");
}
return FieldVals[FieldNo] = Result;
@@ -1224,7 +1222,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
GEPIdx.push_back(GEPI->getOperand(1));
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
- Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
+ Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
GEPI->getName(), GEPI);
GEPI->replaceAllUsesWith(NGEPI);
GEPI->eraseFromParent();
@@ -1271,7 +1269,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
- Value *NElems, const DataLayout *DL,
+ Value *NElems, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
Type *MAT = getMallocAllocatedType(CI, TLI);
@@ -1301,10 +1299,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
GV->getThreadLocalMode());
FieldGlobals.push_back(NGV);
- unsigned TypeSize = DL->getTypeAllocSize(FieldTy);
+ unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
if (StructType *ST = dyn_cast<StructType>(FieldTy))
- TypeSize = DL->getStructLayout(ST)->getSizeInBytes();
- Type *IntPtrTy = DL->getIntPtrType(CI->getType());
+ TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
+ Type *IntPtrTy = DL.getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, nullptr,
@@ -1459,16 +1457,12 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
/// pointer global variable with a single value stored it that is a malloc or
/// cast of malloc.
-static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
- CallInst *CI,
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
Type *AllocTy,
AtomicOrdering Ordering,
Module::global_iterator &GVI,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
- if (!DL)
- return false;
-
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -1504,7 +1498,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// Restrict this transformation to only working on small allocations
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
- if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) {
+ if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
return true;
}
@@ -1534,8 +1528,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
- Type *IntPtrTy = DL->getIntPtrType(CI->getType());
- unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes();
+ Type *IntPtrTy = DL.getIntPtrType(CI->getType());
+ unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
@@ -1563,7 +1557,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
AtomicOrdering Ordering,
Module::global_iterator &GVI,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1733,6 +1727,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
Module::global_iterator &GVI,
const GlobalStatus &GS) {
+ auto &DL = GV->getParent()->getDataLayout();
// If this is a first class global and has only one accessing function
// and this function is main (which we know is not recursive), we replace
// the global with a local alloca in this function.
@@ -1804,12 +1799,10 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
++NumMarked;
return true;
} else if (!GV->getInitializer()->getType()->isSingleValueType()) {
- if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) {
- const DataLayout &DL = DLP->getDataLayout();
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
- GVI = FirstNewGV; // Don't skip the newly produced globals!
- return true;
- }
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
+ GVI = FirstNewGV; // Don't skip the newly produced globals!
+ return true;
}
} else if (GS.StoredType == GlobalStatus::StoredOnce) {
// If the initial value for the global was an undef value, and if only
@@ -1954,6 +1947,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
// Simplify the initializer.
if (GV->hasInitializer())
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+ auto &DL = M.getDataLayout();
Constant *New = ConstantFoldConstantExpression(CE, DL, TLI);
if (New && New != CE)
GV->setInitializer(New);
@@ -1971,9 +1965,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSetImpl<Constant*> &SimpleConstants,
- const DataLayout *DL);
-
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL);
/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
/// handled by the code generator. We don't want to generate something like:
@@ -1983,9 +1976,10 @@ isSimpleEnoughValueToCommit(Constant *C,
/// This function should be called if C was not found (but just got inserted)
/// in SimpleConstants to avoid having to rescan the same constants all the
/// time.
-static bool isSimpleEnoughValueToCommitHelper(Constant *C,
- SmallPtrSetImpl<Constant*> &SimpleConstants,
- const DataLayout *DL) {
+static bool
+isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
// Simple global addresses are supported, do not allow dllimport or
// thread-local globals.
if (auto *GV = dyn_cast<GlobalValue>(C))
@@ -2019,8 +2013,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
case Instruction::PtrToInt:
// int <=> ptr is fine if the int type is the same size as the
// pointer type.
- if (!DL || DL->getTypeSizeInBits(CE->getType()) !=
- DL->getTypeSizeInBits(CE->getOperand(0)->getType()))
+ if (DL.getTypeSizeInBits(CE->getType()) !=
+ DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
return false;
return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
@@ -2042,8 +2036,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSetImpl<Constant*> &SimpleConstants,
- const DataLayout *DL) {
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
// If we already checked this constant, we win.
if (!SimpleConstants.insert(C).second)
return true;
@@ -2174,8 +2168,8 @@ namespace {
/// Once an evaluation call fails, the evaluation object should not be reused.
class Evaluator {
public:
- Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI)
- : DL(DL), TLI(TLI) {
+ Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
+ : DL(DL), TLI(TLI) {
ValueStack.emplace_back();
}
@@ -2249,7 +2243,7 @@ private:
/// simple enough to live in a static initializer of a global.
SmallPtrSet<Constant*, 8> SimpleConstants;
- const DataLayout *DL;
+ const DataLayout &DL;
const TargetLibraryInfo *TLI;
};
@@ -2345,7 +2339,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
Constant * const IdxList[] = {IdxZero, IdxZero};
- Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
+ Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
@@ -2409,8 +2403,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
i != e; ++i)
GEPOps.push_back(getVal(*i));
InstResult =
- ConstantExpr::getGetElementPtr(P, GEPOps,
- cast<GEPOperator>(GEP)->isInBounds());
+ ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
+ cast<GEPOperator>(GEP)->isInBounds());
DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
<< "\n");
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
@@ -2498,9 +2492,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Value *Ptr = PtrArg->stripPointerCasts();
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
- if (DL && !Size->isAllOnesValue() &&
+ if (!Size->isAllOnesValue() &&
Size->getValue().getLimitedValue() >=
- DL->getTypeStoreSize(ElemTy)) {
+ DL.getTypeStoreSize(ElemTy)) {
Invariants.insert(GV);
DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
<< "\n");
@@ -2689,7 +2683,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
/// we can. Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
+static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Call the function.
Evaluator Eval(DL, TLI);
@@ -3040,9 +3034,8 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+ auto &DL = M.getDataLayout();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool LocalChange = true;
while (LocalChange) {
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index b4d31d8d6fc2..fcacec3286fa 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -16,7 +16,7 @@
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/IPO.h"
#include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
@@ -36,6 +36,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLoopExtractorPass(Registry);
initializeBlockExtractorPassPass(Registry);
initializeSingleLoopExtractorPass(Registry);
+ initializeLowerBitSetsPass(Registry);
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
initializePruneEHPass(Registry);
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 66867437e1b7..8f65a983a813 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -29,7 +30,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -72,8 +72,8 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
InlineLimit : Threshold),
InsertLifetime(InsertLifetime) {}
-/// getAnalysisUsage - For this class, we declare that we require and preserve
-/// the call graph. If the derived class implements this method, it should
+/// For this class, we declare that we require and preserve the call graph.
+/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
@@ -97,40 +97,31 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
AttributeSet::FunctionIndex,
B);
- AttributeSet CallerAttr = Caller->getAttributes(),
- CalleeAttr = Callee->getAttributes();
- if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq)) {
+ if (Callee->hasFnAttribute(Attribute::StackProtectReq)) {
Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
Caller->addFnAttr(Attribute::StackProtectReq);
- } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectStrong) &&
- !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq)) {
+ } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) &&
+ !Caller->hasFnAttribute(Attribute::StackProtectReq)) {
Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
Caller->addFnAttr(Attribute::StackProtectStrong);
- } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtect) &&
- !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq) &&
- !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectStrong))
+ } else if (Callee->hasFnAttribute(Attribute::StackProtect) &&
+ !Caller->hasFnAttribute(Attribute::StackProtectReq) &&
+ !Caller->hasFnAttribute(Attribute::StackProtectStrong))
Caller->addFnAttr(Attribute::StackProtect);
}
-/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
/// This function also does some basic book-keeping to update the IR. The
/// InlinedArrayAllocas map keeps track of any allocas that are already
-/// available from other functions inlined into the caller. If we are able to
+/// available from other functions inlined into the caller. If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas,
- int InlineHistory, bool InsertLifetime,
- const DataLayout *DL) {
+ int InlineHistory, bool InsertLifetime) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
@@ -206,11 +197,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
unsigned Align1 = AI->getAlignment(),
Align2 = AvailableAlloca->getAlignment();
- // If we don't have data layout information, and only one alloca is using
- // the target default, then we can't safely merge them because we can't
- // pick the greater alignment.
- if (!DL && (!Align1 || !Align2) && Align1 != Align2)
- continue;
// The available alloca has to be in the right function, not in some other
// function in this SCC.
@@ -231,8 +217,8 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
if (Align1 != Align2) {
if (!Align1 || !Align2) {
- assert(DL && "DataLayout required to compare default alignments");
- unsigned TypeAlign = DL->getABITypeAlignment(AI->getAllocatedType());
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
+ unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
Align1 = Align1 ? Align1 : TypeAlign;
Align2 = Align2 ? Align2 : TypeAlign;
@@ -273,8 +259,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
// would decrease the threshold.
Function *Caller = CS.getCaller();
bool OptSize = Caller && !Caller->isDeclaration() &&
- Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize);
+ Caller->hasFnAttribute(Attribute::OptimizeForSize);
if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
OptSizeThreshold < thres)
thres = OptSizeThreshold;
@@ -283,17 +268,14 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
// and the caller does not need to minimize its size.
Function *Callee = CS.getCalledFunction();
bool InlineHint = Callee && !Callee->isDeclaration() &&
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::InlineHint);
- if (InlineHint && HintThreshold > thres
- && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::MinSize))
+ Callee->hasFnAttribute(Attribute::InlineHint);
+ if (InlineHint && HintThreshold > thres &&
+ !Caller->hasFnAttribute(Attribute::MinSize))
thres = HintThreshold;
// Listen to the cold attribute when it would decrease the threshold.
bool ColdCallee = Callee && !Callee->isDeclaration() &&
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Cold);
+ Callee->hasFnAttribute(Attribute::Cold);
// Command line argument for InlineLimit will override the default
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
// do not use the default cold threshold even if it is smaller.
@@ -312,8 +294,7 @@ static void emitAnalysis(CallSite CS, const Twine &Msg) {
emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
}
-/// shouldInline - Return true if the inliner should attempt to inline
-/// at the given CallSite.
+/// Return true if the inliner should attempt to inline at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
InlineCost IC = getInlineCost(CS);
@@ -427,7 +408,7 @@ bool Inliner::shouldInline(CallSite CS) {
return true;
}
-/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// Return true if the specified inline history ID
/// indicates an inline history that includes the specified function.
static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
@@ -444,9 +425,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
SmallPtrSet<Function*, 8> SCCFunctions;
@@ -506,7 +486,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, DL, AA, ACT);
+ InlineFunctionInfo InlineInfo(&CG, AA, ACT);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -564,7 +544,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// Attempt to inline the function.
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
- InlineHistoryID, InsertLifetime, DL)) {
+ InlineHistoryID, InsertLifetime)) {
emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
Twine(Callee->getName() +
" will not be inlined into " +
@@ -636,16 +616,30 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
return Changed;
}
-// doFinalization - Remove now-dead linkonce functions at the end of
-// processing to avoid breaking the SCC traversal.
+/// Remove now-dead linkonce functions at the end of
+/// processing to avoid breaking the SCC traversal.
bool Inliner::doFinalization(CallGraph &CG) {
return removeDeadFunctions(CG);
}
-/// removeDeadFunctions - Remove dead functions that are not included in
-/// DNR (Do Not Remove) list.
+/// Remove dead functions that are not included in DNR (Do Not Remove) list.
bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
SmallVector<CallGraphNode*, 16> FunctionsToRemove;
+ SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
+ SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
+
+ auto RemoveCGN = [&](CallGraphNode *CGN) {
+ // Remove any call graph edges from the function to its callees.
+ CGN->removeAllCalledFunctions();
+
+ // Remove any edges from the external node to the function's call graph
+ // node. These edges might have been made irrelegant due to
+ // optimization of the program.
+ CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+ // Removing the node for callee from the call graph and delete it.
+ FunctionsToRemove.push_back(CGN);
+ };
// Scan for all of the functions, looking for ones that should now be removed
// from the program. Insert the dead ones in the FunctionsToRemove set.
@@ -658,9 +652,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// Handle the case when this function is called and we only want to care
// about always-inline functions. This is a bit of a hack to share code
// between here and the InlineAlways pass.
- if (AlwaysInlineOnly &&
- !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AlwaysInline))
+ if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
continue;
// If the only remaining users of the function are dead constants, remove
@@ -674,20 +666,45 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// without also dropping the other members of the COMDAT.
// The inliner doesn't visit non-function entities which are in COMDAT
// groups so it is unsafe to do so *unless* the linkage is local.
- if (!F->hasLocalLinkage() && F->hasComdat())
- continue;
-
- // Remove any call graph edges from the function to its callees.
- CGN->removeAllCalledFunctions();
+ if (!F->hasLocalLinkage()) {
+ if (const Comdat *C = F->getComdat()) {
+ --ComdatEntriesAlive[C];
+ DeadFunctionsInComdats.push_back(CGN);
+ continue;
+ }
+ }
- // Remove any edges from the external node to the function's call graph
- // node. These edges might have been made irrelegant due to
- // optimization of the program.
- CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+ RemoveCGN(CGN);
+ }
+ if (!DeadFunctionsInComdats.empty()) {
+ // Count up all the entities in COMDAT groups
+ auto ComdatGroupReferenced = [&](const Comdat *C) {
+ auto I = ComdatEntriesAlive.find(C);
+ if (I != ComdatEntriesAlive.end())
+ ++(I->getSecond());
+ };
+ for (const Function &F : CG.getModule())
+ if (const Comdat *C = F.getComdat())
+ ComdatGroupReferenced(C);
+ for (const GlobalVariable &GV : CG.getModule().globals())
+ if (const Comdat *C = GV.getComdat())
+ ComdatGroupReferenced(C);
+ for (const GlobalAlias &GA : CG.getModule().aliases())
+ if (const Comdat *C = GA.getComdat())
+ ComdatGroupReferenced(C);
+ for (CallGraphNode *CGN : DeadFunctionsInComdats) {
+ Function *F = CGN->getFunction();
+ const Comdat *C = F->getComdat();
+ int NumAlive = ComdatEntriesAlive[C];
+ // We can remove functions in a COMDAT group if the entire group is dead.
+ assert(NumAlive >= 0);
+ if (NumAlive > 0)
+ continue;
- // Removing the node for callee from the call graph and delete it.
- FunctionsToRemove.push_back(CGN);
+ RemoveCGN(CGN);
+ }
}
+
if (FunctionsToRemove.empty())
return false;
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index 77e0b22086fd..575dce4b33df 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
name = IPO
parent = Transforms
library_name = ipo
-required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils Vectorize
+required_libraries = Analysis Core IPA InstCombine Scalar Support TransformUtils Vectorize
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 20414aa05b4d..41334ca5b429 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -242,7 +242,7 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
if (!Split) continue;
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs);
+ SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
}
}
diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp
new file mode 100644
index 000000000000..bffeebb6e2ed
--- /dev/null
+++ b/lib/Transforms/IPO/LowerBitSets.cpp
@@ -0,0 +1,732 @@
+//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic.
+// See http://llvm.org/docs/LangRef.html#bitsets for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/LowerBitSets.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lowerbitsets"
+
+STATISTIC(ByteArraySizeBits, "Byte array size in bits");
+STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
+STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
+STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered");
+STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets");
+
+static cl::opt<bool> AvoidReuse(
+ "lowerbitsets-avoid-reuse",
+ cl::desc("Try to avoid reuse of byte array addresses using aliases"),
+ cl::Hidden, cl::init(true));
+
+bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
+ if (Offset < ByteOffset)
+ return false;
+
+ if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
+ return false;
+
+ uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
+ if (BitOffset >= BitSize)
+ return false;
+
+ return Bits.count(BitOffset);
+}
+
+bool BitSetInfo::containsValue(
+ const DataLayout &DL,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V,
+ uint64_t COffset) const {
+ if (auto GV = dyn_cast<GlobalVariable>(V)) {
+ auto I = GlobalLayout.find(GV);
+ if (I == GlobalLayout.end())
+ return false;
+ return containsGlobalOffset(I->second + COffset);
+ }
+
+ if (auto GEP = dyn_cast<GEPOperator>(V)) {
+ APInt APOffset(DL.getPointerSizeInBits(0), 0);
+ bool Result = GEP->accumulateConstantOffset(DL, APOffset);
+ if (!Result)
+ return false;
+ COffset += APOffset.getZExtValue();
+ return containsValue(DL, GlobalLayout, GEP->getPointerOperand(),
+ COffset);
+ }
+
+ if (auto Op = dyn_cast<Operator>(V)) {
+ if (Op->getOpcode() == Instruction::BitCast)
+ return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset);
+
+ if (Op->getOpcode() == Instruction::Select)
+ return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) &&
+ containsValue(DL, GlobalLayout, Op->getOperand(2), COffset);
+ }
+
+ return false;
+}
+
+BitSetInfo BitSetBuilder::build() {
+ if (Min > Max)
+ Min = 0;
+
+ // Normalize each offset against the minimum observed offset, and compute
+ // the bitwise OR of each of the offsets. The number of trailing zeros
+ // in the mask gives us the log2 of the alignment of all offsets, which
+ // allows us to compress the bitset by only storing one bit per aligned
+ // address.
+ uint64_t Mask = 0;
+ for (uint64_t &Offset : Offsets) {
+ Offset -= Min;
+ Mask |= Offset;
+ }
+
+ BitSetInfo BSI;
+ BSI.ByteOffset = Min;
+
+ BSI.AlignLog2 = 0;
+ if (Mask != 0)
+ BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
+
+ // Build the compressed bitset while normalizing the offsets against the
+ // computed alignment.
+ BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
+ for (uint64_t Offset : Offsets) {
+ Offset >>= BSI.AlignLog2;
+ BSI.Bits.insert(Offset);
+ }
+
+ return BSI;
+}
+
+void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
+ // Create a new fragment to hold the layout for F.
+ Fragments.emplace_back();
+ std::vector<uint64_t> &Fragment = Fragments.back();
+ uint64_t FragmentIndex = Fragments.size() - 1;
+
+ for (auto ObjIndex : F) {
+ uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
+ if (OldFragmentIndex == 0) {
+ // We haven't seen this object index before, so just add it to the current
+ // fragment.
+ Fragment.push_back(ObjIndex);
+ } else {
+ // This index belongs to an existing fragment. Copy the elements of the
+ // old fragment into this one and clear the old fragment. We don't update
+ // the fragment map just yet, this ensures that any further references to
+ // indices from the old fragment in this fragment do not insert any more
+ // indices.
+ std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
+ Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end());
+ OldFragment.clear();
+ }
+ }
+
+ // Update the fragment map to point our object indices to this fragment.
+ for (uint64_t ObjIndex : Fragment)
+ FragmentMap[ObjIndex] = FragmentIndex;
+}
+
+void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
+ uint64_t BitSize, uint64_t &AllocByteOffset,
+ uint8_t &AllocMask) {
+ // Find the smallest current allocation.
+ unsigned Bit = 0;
+ for (unsigned I = 1; I != BitsPerByte; ++I)
+ if (BitAllocs[I] < BitAllocs[Bit])
+ Bit = I;
+
+ AllocByteOffset = BitAllocs[Bit];
+
+ // Add our size to it.
+ unsigned ReqSize = AllocByteOffset + BitSize;
+ BitAllocs[Bit] = ReqSize;
+ if (Bytes.size() < ReqSize)
+ Bytes.resize(ReqSize);
+
+ // Set our bits.
+ AllocMask = 1 << Bit;
+ for (uint64_t B : Bits)
+ Bytes[AllocByteOffset + B] |= AllocMask;
+}
+
+namespace {
+
+struct ByteArrayInfo {
+ std::set<uint64_t> Bits;
+ uint64_t BitSize;
+ GlobalVariable *ByteArray;
+ Constant *Mask;
+};
+
+struct LowerBitSets : public ModulePass {
+ static char ID;
+ LowerBitSets() : ModulePass(ID) {
+ initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());
+ }
+
+ Module *M;
+
+ bool LinkerSubsectionsViaSymbols;
+ IntegerType *Int1Ty;
+ IntegerType *Int8Ty;
+ IntegerType *Int32Ty;
+ Type *Int32PtrTy;
+ IntegerType *Int64Ty;
+ Type *IntPtrTy;
+
+ // The llvm.bitsets named metadata.
+ NamedMDNode *BitSetNM;
+
+ // Mapping from bitset mdstrings to the call sites that test them.
+ DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
+
+ std::vector<ByteArrayInfo> ByteArrayInfos;
+
+ BitSetInfo
+ buildBitSet(MDString *BitSet,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+ ByteArrayInfo *createByteArray(BitSetInfo &BSI);
+ void allocateByteArrays();
+ Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ Value *BitOffset);
+ Value *
+ lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ GlobalVariable *CombinedGlobal,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+ void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets,
+ const std::vector<GlobalVariable *> &Globals);
+ bool buildBitSets();
+ bool eraseBitSetMetadata();
+
+ bool doInitialization(Module &M) override;
+ bool runOnModule(Module &M) override;
+};
+
+} // namespace
+
+INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
+ "Lower bitset metadata", false, false)
+INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets",
+ "Lower bitset metadata", false, false)
+char LowerBitSets::ID = 0;
+
+ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }
+
+bool LowerBitSets::doInitialization(Module &Mod) {
+ M = &Mod;
+ const DataLayout &DL = Mod.getDataLayout();
+
+ Triple TargetTriple(M->getTargetTriple());
+ LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+
+ Int1Ty = Type::getInt1Ty(M->getContext());
+ Int8Ty = Type::getInt8Ty(M->getContext());
+ Int32Ty = Type::getInt32Ty(M->getContext());
+ Int32PtrTy = PointerType::getUnqual(Int32Ty);
+ Int64Ty = Type::getInt64Ty(M->getContext());
+ IntPtrTy = DL.getIntPtrType(M->getContext(), 0);
+
+ BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+ BitSetTestCallSites.clear();
+
+ return false;
+}
+
+/// Build a bit set for BitSet using the object layouts in
+/// GlobalLayout.
+BitSetInfo LowerBitSets::buildBitSet(
+ MDString *BitSet,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ BitSetBuilder BSB;
+
+ // Compute the byte offset of each element of this bitset.
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+ continue;
+ auto OpGlobal = cast<GlobalVariable>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ uint64_t Offset =
+ cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2))
+ ->getValue())->getZExtValue();
+
+ Offset += GlobalLayout.find(OpGlobal)->second;
+
+ BSB.addOffset(Offset);
+ }
+ }
+
+ return BSB.build();
+}
+
+/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in
+/// Bits. This pattern matches to the bt instruction on x86.
+static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
+ Value *BitOffset) {
+ auto BitsType = cast<IntegerType>(Bits->getType());
+ unsigned BitWidth = BitsType->getBitWidth();
+
+ BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);
+ Value *BitIndex =
+ B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));
+ Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);
+ Value *MaskedBits = B.CreateAnd(Bits, BitMask);
+ return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
+}
+
+ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) {
+ // Create globals to stand in for byte arrays and masks. These never actually
+ // get initialized, we RAUW and erase them later in allocateByteArrays() once
+ // we know the offset and mask to use.
+ auto ByteArrayGlobal = new GlobalVariable(
+ *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+ auto MaskGlobal = new GlobalVariable(
+ *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+
+ ByteArrayInfos.emplace_back();
+ ByteArrayInfo *BAI = &ByteArrayInfos.back();
+
+ BAI->Bits = BSI.Bits;
+ BAI->BitSize = BSI.BitSize;
+ BAI->ByteArray = ByteArrayGlobal;
+ BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);
+ return BAI;
+}
+
+void LowerBitSets::allocateByteArrays() {
+ std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
+ [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
+ return BAI1.BitSize > BAI2.BitSize;
+ });
+
+ std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
+
+ ByteArrayBuilder BAB;
+ for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+ ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+ uint8_t Mask;
+ BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
+
+ BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));
+ cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
+ }
+
+ Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes);
+ auto ByteArray =
+ new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, ByteArrayConst);
+
+ for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+ ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+ Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
+ ByteArrayConst->getType(), ByteArray, Idxs);
+
+ // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
+ // that the pc-relative displacement is folded into the lea instead of the
+ // test instruction getting another displacement.
+ if (LinkerSubsectionsViaSymbols) {
+ BAI->ByteArray->replaceAllUsesWith(GEP);
+ } else {
+ GlobalAlias *Alias =
+ GlobalAlias::create(PointerType::getUnqual(Int8Ty),
+ GlobalValue::PrivateLinkage, "bits", GEP, M);
+ BAI->ByteArray->replaceAllUsesWith(Alias);
+ }
+ BAI->ByteArray->eraseFromParent();
+ }
+
+ ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
+ BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
+ BAB.BitAllocs[6] + BAB.BitAllocs[7];
+ ByteArraySizeBytes = BAB.Bytes.size();
+}
+
+/// Build a test that bit BitOffset is set in BSI, where
+/// BitSetGlobal is a global containing the bits in BSI.
+Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
+ ByteArrayInfo *&BAI, Value *BitOffset) {
+ if (BSI.BitSize <= 64) {
+ // If the bit set is sufficiently small, we can avoid a load by bit testing
+ // a constant.
+ IntegerType *BitsTy;
+ if (BSI.BitSize <= 32)
+ BitsTy = Int32Ty;
+ else
+ BitsTy = Int64Ty;
+
+ uint64_t Bits = 0;
+ for (auto Bit : BSI.Bits)
+ Bits |= uint64_t(1) << Bit;
+ Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
+ return createMaskedBitTest(B, BitsConst, BitOffset);
+ } else {
+ if (!BAI) {
+ ++NumByteArraysCreated;
+ BAI = createByteArray(BSI);
+ }
+
+ Constant *ByteArray = BAI->ByteArray;
+ Type *Ty = BAI->ByteArray->getValueType();
+ if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
+ // Each use of the byte array uses a different alias. This makes the
+ // backend less likely to reuse previously computed byte array addresses,
+ // improving the security of the CFI mechanism based on this pass.
+ ByteArray = GlobalAlias::create(BAI->ByteArray->getType(),
+ GlobalValue::PrivateLinkage, "bits_use",
+ ByteArray, M);
+ }
+
+ Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
+ Value *Byte = B.CreateLoad(ByteAddr);
+
+ Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
+ return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
+ }
+}
+
+/// Lower a llvm.bitset.test call to its implementation. Returns the value to
+/// replace the call with.
+Value *LowerBitSets::lowerBitSetCall(
+ CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ GlobalVariable *CombinedGlobal,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ Value *Ptr = CI->getArgOperand(0);
+ const DataLayout &DL = M->getDataLayout();
+
+ if (BSI.containsValue(DL, GlobalLayout, Ptr))
+ return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext());
+
+ Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
+ Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
+ GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
+
+ BasicBlock *InitialBB = CI->getParent();
+
+ IRBuilder<> B(CI);
+
+ Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
+
+ if (BSI.isSingleOffset())
+ return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
+
+ Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
+
+ Value *BitOffset;
+ if (BSI.AlignLog2 == 0) {
+ BitOffset = PtrOffset;
+ } else {
+ // We need to check that the offset both falls within our range and is
+ // suitably aligned. We can check both properties at the same time by
+ // performing a right rotate by log2(alignment) followed by an integer
+ // comparison against the bitset size. The rotate will move the lower
+ // order bits that need to be zero into the higher order bits of the
+ // result, causing the comparison to fail if they are nonzero. The rotate
+ // also conveniently gives us a bit offset to use during the load from
+ // the bitset.
+ Value *OffsetSHR =
+ B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
+ Value *OffsetSHL = B.CreateShl(
+ PtrOffset,
+ ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));
+ BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
+ }
+
+ Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
+ Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
+
+ // If the bit set is all ones, testing against it is unnecessary.
+ if (BSI.isAllOnes())
+ return OffsetInRange;
+
+ TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
+ IRBuilder<> ThenB(Term);
+
+ // Now that we know that the offset is in range and aligned, load the
+ // appropriate bit from the bitset.
+ Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);
+
+ // The value we want is 0 if we came directly from the initial block
+ // (having failed the range or alignment checks), or the loaded bit if
+ // we came from the block in which we loaded it.
+ B.SetInsertPoint(CI);
+ PHINode *P = B.CreatePHI(Int1Ty, 2);
+ P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
+ P->addIncoming(Bit, ThenB.GetInsertBlock());
+ return P;
+}
+
+/// Given a disjoint set of bitsets and globals, layout the globals, build the
+/// bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromGlobals(
+ const std::vector<MDString *> &BitSets,
+ const std::vector<GlobalVariable *> &Globals) {
+ // Build a new global with the combined contents of the referenced globals.
+ std::vector<Constant *> GlobalInits;
+ const DataLayout &DL = M->getDataLayout();
+ for (GlobalVariable *G : Globals) {
+ GlobalInits.push_back(G->getInitializer());
+ uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType());
+
+ // Compute the amount of padding required to align the next element to the
+ // next power of 2.
+ uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
+
+ // Cap at 128 was found experimentally to have a good data/instruction
+ // overhead tradeoff.
+ if (Padding > 128)
+ Padding = RoundUpToAlignment(InitSize, 128) - InitSize;
+
+ GlobalInits.push_back(
+ ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+ }
+ if (!GlobalInits.empty())
+ GlobalInits.pop_back();
+ Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
+ auto CombinedGlobal =
+ new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, NewInit);
+
+ const StructLayout *CombinedGlobalLayout =
+ DL.getStructLayout(cast<StructType>(NewInit->getType()));
+
+ // Compute the offsets of the original globals within the new global.
+ DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
+ for (unsigned I = 0; I != Globals.size(); ++I)
+ // Multiply by 2 to account for padding elements.
+ GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
+
+ // For each bitset in this disjoint set...
+ for (MDString *BS : BitSets) {
+ // Build the bitset.
+ BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+
+ ByteArrayInfo *BAI = 0;
+
+ // Lower each call to llvm.bitset.test for this bitset.
+ for (CallInst *CI : BitSetTestCallSites[BS]) {
+ ++NumBitSetCallsLowered;
+ Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
+ }
+
+ // Build aliases pointing to offsets into the combined global for each
+ // global from which we built the combined global, and replace references
+ // to the original globals with references to the aliases.
+ for (unsigned I = 0; I != Globals.size(); ++I) {
+ // Multiply by 2 to account for padding elements.
+ Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, I * 2)};
+ Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
+ NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
+ if (LinkerSubsectionsViaSymbols) {
+ Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+ } else {
+ GlobalAlias *GAlias =
+ GlobalAlias::create(Globals[I]->getType(), Globals[I]->getLinkage(),
+ "", CombinedGlobalElemPtr, M);
+ GAlias->takeName(Globals[I]);
+ Globals[I]->replaceAllUsesWith(GAlias);
+ }
+ Globals[I]->eraseFromParent();
+ }
+}
+
+/// Lower all bit sets in this module.
+bool LowerBitSets::buildBitSets() {
+ Function *BitSetTestFunc =
+ M->getFunction(Intrinsic::getName(Intrinsic::bitset_test));
+ if (!BitSetTestFunc)
+ return false;
+
+ // Equivalence class set containing bitsets and the globals they reference.
+ // This is used to partition the set of bitsets in the module into disjoint
+ // sets.
+ typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>>
+ GlobalClassesTy;
+ GlobalClassesTy GlobalClasses;
+
+ for (const Use &U : BitSetTestFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
+
+ auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata()))
+ report_fatal_error(
+ "Second argument of llvm.bitset.test must be metadata string");
+ auto BitSet = cast<MDString>(BitSetMDVal->getMetadata());
+
+ // Add the call site to the list of call sites for this bit set. We also use
+ // BitSetTestCallSites to keep track of whether we have seen this bit set
+ // before. If we have, we don't need to re-add the referenced globals to the
+ // equivalence class.
+ std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator,
+ bool> Ins =
+ BitSetTestCallSites.insert(
+ std::make_pair(BitSet, std::vector<CallInst *>()));
+ Ins.first->second.push_back(CI);
+ if (!Ins.second)
+ continue;
+
+ // Add the bitset to the equivalence class.
+ GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
+ GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+
+ if (!BitSetNM)
+ continue;
+
+ // Verify the bitset metadata and add the referenced globals to the bitset's
+ // equivalence class.
+ for (MDNode *Op : BitSetNM->operands()) {
+ if (Op->getNumOperands() != 3)
+ report_fatal_error(
+ "All operands of llvm.bitsets metadata must have 3 elements");
+
+ if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+ continue;
+
+ auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+ if (!OpConstMD)
+ report_fatal_error("Bit set element must be a constant");
+ auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue());
+ if (!OpGlobal)
+ report_fatal_error("Bit set element must refer to global");
+
+ auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+ if (!OffsetConstMD)
+ report_fatal_error("Bit set element offset must be a constant");
+ auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
+ if (!OffsetInt)
+ report_fatal_error(
+ "Bit set element offset must be an integer constant");
+
+ CurSet = GlobalClasses.unionSets(
+ CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
+ }
+ }
+
+ if (GlobalClasses.empty())
+ return false;
+
+ // For each disjoint set we found...
+ for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
+ E = GlobalClasses.end();
+ I != E; ++I) {
+ if (!I->isLeader()) continue;
+
+ ++NumBitSetDisjointSets;
+
+ // Build the list of bitsets and referenced globals in this disjoint set.
+ std::vector<MDString *> BitSets;
+ std::vector<GlobalVariable *> Globals;
+ llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
+ llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
+ for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
+ MI != GlobalClasses.member_end(); ++MI) {
+ if ((*MI).is<MDString *>()) {
+ BitSetIndices[MI->get<MDString *>()] = BitSets.size();
+ BitSets.push_back(MI->get<MDString *>());
+ } else {
+ GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
+ Globals.push_back(MI->get<GlobalVariable *>());
+ }
+ }
+
+ // For each bitset, build a set of indices that refer to globals referenced
+ // by the bitset.
+ std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ // Op = { bitset name, global, offset }
+ if (!Op->getOperand(1))
+ continue;
+ auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
+ if (I == BitSetIndices.end())
+ continue;
+
+ auto OpGlobal = cast<GlobalVariable>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+ }
+ }
+
+ // Order the sets of indices by size. The GlobalLayoutBuilder works best
+ // when given small index sets first.
+ std::stable_sort(
+ BitSetMembers.begin(), BitSetMembers.end(),
+ [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
+ return O1.size() < O2.size();
+ });
+
+ // Create a GlobalLayoutBuilder and provide it with index sets as layout
+ // fragments. The GlobalLayoutBuilder tries to lay out members of fragments
+ // as close together as possible.
+ GlobalLayoutBuilder GLB(Globals.size());
+ for (auto &&MemSet : BitSetMembers)
+ GLB.addFragment(MemSet);
+
+ // Build a vector of globals with the computed layout.
+ std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
+ auto OGI = OrderedGlobals.begin();
+ for (auto &&F : GLB.Fragments)
+ for (auto &&Offset : F)
+ *OGI++ = Globals[Offset];
+
+ // Order bitsets by name for determinism.
+ std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
+ return S1->getString() < S2->getString();
+ });
+
+ // Build the bitsets from this disjoint set.
+ buildBitSetsFromGlobals(BitSets, OrderedGlobals);
+ }
+
+ allocateByteArrays();
+
+ return true;
+}
+
+bool LowerBitSets::eraseBitSetMetadata() {
+ if (!BitSetNM)
+ return false;
+
+ M->eraseNamedMetadata(BitSetNM);
+ return true;
+}
+
+bool LowerBitSets::runOnModule(Module &M) {
+ bool Changed = buildBitSets();
+ Changed |= eraseBitSetMetadata();
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index b91ebf2b96b0..91a5eefca17d 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -127,9 +127,8 @@ namespace {
/// side of claiming that two functions are different).
class FunctionComparator {
public:
- FunctionComparator(const DataLayout *DL, const Function *F1,
- const Function *F2)
- : FnL(F1), FnR(F2), DL(DL) {}
+ FunctionComparator(const Function *F1, const Function *F2)
+ : FnL(F1), FnR(F2) {}
/// Test whether the two functions have equivalent behaviour.
int compare();
@@ -292,8 +291,7 @@ private:
/// Parts to be compared for each comparison stage,
/// most significant stage first:
/// 1. Address space. As numbers.
- /// 2. Constant offset, (if "DataLayout *DL" field is not NULL,
- /// using GEPOperator::accumulateConstantOffset method).
+ /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
/// 3. Pointer operand type (using cmpType method).
/// 4. Number of operands.
/// 5. Compare operands, using cmpValues method.
@@ -354,8 +352,6 @@ private:
// The two functions undergoing comparison.
const Function *FnL, *FnR;
- const DataLayout *DL;
-
/// Assign serial numbers to values from left function, and values from
/// right function.
/// Explanation:
@@ -394,14 +390,13 @@ private:
class FunctionNode {
AssertingVH<Function> F;
- const DataLayout *DL;
public:
- FunctionNode(Function *F, const DataLayout *DL) : F(F), DL(DL) {}
+ FunctionNode(Function *F) : F(F) {}
Function *getFunc() const { return F; }
void release() { F = 0; }
bool operator<(const FunctionNode &RHS) const {
- return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1;
+ return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
}
};
}
@@ -620,10 +615,11 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
PointerType *PTyL = dyn_cast<PointerType>(TyL);
PointerType *PTyR = dyn_cast<PointerType>(TyR);
- if (DL) {
- if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL);
- if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR);
- }
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ if (PTyL && PTyL->getAddressSpace() == 0)
+ TyL = DL.getIntPtrType(TyL);
+ if (PTyR && PTyR->getAddressSpace() == 0)
+ TyR = DL.getIntPtrType(TyR);
if (TyL == TyR)
return 0;
@@ -723,6 +719,15 @@ int FunctionComparator::cmpOperations(const Instruction *L,
R->getRawSubclassOptionalData()))
return Res;
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
+ if (int Res = cmpTypes(AI->getAllocatedType(),
+ cast<AllocaInst>(R)->getAllocatedType()))
+ return Res;
+ if (int Res =
+ cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()))
+ return Res;
+ }
+
// We have two instructions of identical opcode and #operands. Check to see
// if all operands are the same type
for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
@@ -855,13 +860,12 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
// When we have target data, we can reduce the GEP down to the value in bytes
// added to the address.
- if (DL) {
- unsigned BitWidth = DL->getPointerSizeInBits(ASL);
- APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
- if (GEPL->accumulateConstantOffset(*DL, OffsetL) &&
- GEPR->accumulateConstantOffset(*DL, OffsetR))
- return cmpAPInts(OffsetL, OffsetR);
- }
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ unsigned BitWidth = DL.getPointerSizeInBits(ASL);
+ APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+ if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
+ GEPR->accumulateConstantOffset(DL, OffsetR))
+ return cmpAPInts(OffsetL, OffsetR);
if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
(uint64_t)GEPR->getPointerOperand()->getType()))
@@ -1122,9 +1126,6 @@ private:
/// to modify it.
FnTreeType FnTree;
- /// DataLayout for more accurate GEP comparisons. May be NULL.
- const DataLayout *DL;
-
/// Whether or not the target supports global aliases.
bool HasGlobalAliases;
};
@@ -1152,8 +1153,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
Function *F1 = cast<Function>(*I);
Function *F2 = cast<Function>(*J);
- int Res1 = FunctionComparator(DL, F1, F2).compare();
- int Res2 = FunctionComparator(DL, F2, F1).compare();
+ int Res1 = FunctionComparator(F1, F2).compare();
+ int Res2 = FunctionComparator(F2, F1).compare();
// If F1 <= F2, then F2 >= F1, otherwise report failure.
if (Res1 != -Res2) {
@@ -1174,8 +1175,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
continue;
Function *F3 = cast<Function>(*K);
- int Res3 = FunctionComparator(DL, F1, F3).compare();
- int Res4 = FunctionComparator(DL, F2, F3).compare();
+ int Res3 = FunctionComparator(F1, F3).compare();
+ int Res4 = FunctionComparator(F2, F3).compare();
bool Transitive = true;
@@ -1212,8 +1213,6 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
@@ -1368,8 +1367,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
// Replace G with an alias to F and delete G.
void MergeFunctions::writeAlias(Function *F, Function *G) {
PointerType *PTy = G->getType();
- auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
- G->getLinkage(), "", F);
+ auto *GA = GlobalAlias::create(PTy, G->getLinkage(), "", F);
F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
GA->takeName(G);
GA->setVisibility(G->getVisibility());
@@ -1420,7 +1418,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// that was already inserted.
bool MergeFunctions::insert(Function *NewFunction) {
std::pair<FnTreeType::iterator, bool> Result =
- FnTree.insert(FunctionNode(NewFunction, DL));
+ FnTree.insert(FunctionNode(NewFunction));
if (Result.second) {
DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
@@ -1457,7 +1455,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
void MergeFunctions::remove(Function *F) {
// We need to make sure we remove F, not a function "equal" to F per the
// function equality comparator.
- FnTreeType::iterator found = FnTree.find(FunctionNode(F, DL));
+ FnTreeType::iterator found = FnTree.find(FunctionNode(F));
size_t Erased = 0;
if (found != FnTree.end() && found->getFunc() == F) {
Erased = 1;
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 76d6dfa8e881..4a7cb7ba7d12 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -58,13 +58,13 @@ Function* PartialInliner::unswitchFunction(Function* F) {
BasicBlock* returnBlock = nullptr;
BasicBlock* nonReturnBlock = nullptr;
unsigned returnCount = 0;
- for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
- SI != SE; ++SI)
- if (isa<ReturnInst>((*SI)->getTerminator())) {
- returnBlock = *SI;
+ for (BasicBlock *BB : successors(entryBlock)) {
+ if (isa<ReturnInst>(BB->getTerminator())) {
+ returnBlock = BB;
returnCount++;
} else
- nonReturnBlock = *SI;
+ nonReturnBlock = BB;
+ }
if (returnCount != 1)
return nullptr;
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 0414caa61fca..3496a663f53b 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -19,12 +19,11 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Vectorize.h"
@@ -60,6 +59,10 @@ static cl::opt<bool>
RunLoopRerolling("reroll-loops", cl::Hidden,
cl::desc("Run the loop rerolling pass"));
+static cl::opt<bool>
+RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
+ cl::desc("Run the float2int (float demotion) pass"));
+
static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
cl::Hidden,
cl::desc("Run the load combining pass"));
@@ -78,6 +81,14 @@ static cl::opt<bool>
EnableMLSM("mlsm", cl::init(true), cl::Hidden,
cl::desc("Enable motion of merged load and store"));
+static cl::opt<bool> EnableLoopInterchange(
+ "enable-loopinterchange", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopInterchange Pass"));
+
+static cl::opt<bool> EnableLoopDistribute(
+ "enable-loop-distribute", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopDistribution Pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -94,7 +105,6 @@ PassManagerBuilder::PassManagerBuilder() {
DisableGVNLoadPRE = false;
VerifyInput = false;
VerifyOutput = false;
- StripDebug = false;
MergeFunctions = false;
}
@@ -118,7 +128,7 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
}
void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
- PassManagerBase &PM) const {
+ legacy::PassManagerBase &PM) const {
for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
if ((*GlobalExtensions)[i].first == ETy)
(*GlobalExtensions)[i].second(*this, PM);
@@ -127,8 +137,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
Extensions[i].second(*this, PM);
}
-void
-PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
+void PassManagerBuilder::addInitialAliasAnalysisPasses(
+ legacy::PassManagerBase &PM) const {
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
@@ -139,11 +149,13 @@ PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
PM.add(createBasicAliasAnalysisPass());
}
-void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
+void PassManagerBuilder::populateFunctionPassManager(
+ legacy::FunctionPassManager &FPM) {
addExtensionsToPM(EP_EarlyAsPossible, FPM);
// Add LibraryInfo if we have some.
- if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo));
+ if (LibraryInfo)
+ FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
if (OptLevel == 0) return;
@@ -158,7 +170,8 @@ void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
FPM.add(createLowerExpectIntrinsicPass());
}
-void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
+void PassManagerBuilder::populateModulePassManager(
+ legacy::PassManagerBase &MPM) {
// If all optimizations are disabled, just run the always-inline pass and,
// if enabled, the function merging pass.
if (OptLevel == 0) {
@@ -182,7 +195,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
}
// Add LibraryInfo if we have some.
- if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo));
+ if (LibraryInfo)
+ MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
addInitialAliasAnalysisPasses(MPM);
@@ -236,7 +250,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
-
+ if (EnableLoopInterchange) {
+ MPM.add(createLoopInterchangePass()); // Interchange loops
+ MPM.add(createCFGSimplificationPass());
+ }
if (!DisableUnrollLoops)
MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
@@ -249,6 +266,11 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
MPM.add(createSCCPPass()); // Constant prop with SCCP
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
+
// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
MPM.add(createInstructionCombiningPass());
@@ -256,6 +278,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
+ MPM.add(createLICMPass());
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
@@ -293,11 +316,18 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (RunFloat2Int)
+ MPM.add(createFloat2IntPass());
+
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form.
- if (ExtraVectorizerPasses)
- MPM.add(createLoopRotatePass());
+ MPM.add(createLoopRotatePass());
+
+ // Distribute loops to allow partial vectorization. I.e. isolate dependences
+ // into separate loop that would otherwise inhibit vectorization.
+ if (EnableLoopDistribute)
+ MPM.add(createLoopDistributePass());
MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
// FIXME: Because of #pragma vectorize enable, the passes below are always
@@ -349,9 +379,19 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
- if (!DisableUnrollLoops)
+ if (!DisableUnrollLoops) {
MPM.add(createLoopUnrollPass()); // Unroll small loops
+ // LoopUnroll may generate some redundency to cleanup.
+ MPM.add(createInstructionCombiningPass());
+
+ // Runtime unrolling will introduce runtime check in loop prologue. If the
+ // unrolled loop is a inner loop, then the prologue will be inside the
+ // outer loop. LICM pass can help to promote the runtime check out if the
+ // checked value is loop invariant.
+ MPM.add(createLICMPass());
+ }
+
// After vectorization and unrolling, assume intrinsics may tell us more
// about pointer alignments.
MPM.add(createAlignmentFromAssumptionsPass());
@@ -374,7 +414,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_OptimizerLast, MPM);
}
-void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
+void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
@@ -445,6 +485,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
// More loops are countable; try to optimize them.
PM.add(createIndVarSimplifyPass());
PM.add(createLoopDeletionPass());
+ if (EnableLoopInterchange)
+ PM.add(createLoopInterchangePass());
+
PM.add(createLoopVectorizePass(true, LoopVectorize));
// More scalar chains could be vectorized due to more alias information
@@ -464,7 +507,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
+}
+void PassManagerBuilder::addLateLTOOptimizationPasses(
+ legacy::PassManagerBase &PM) {
// Delete basic blocks, which optimization passes may have killed.
PM.add(createCFGSimplificationPass());
@@ -477,32 +523,26 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
PM.add(createMergeFunctionsPass());
}
-void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
- TargetMachine *TM) {
- if (TM) {
- PM.add(new DataLayoutPass());
- TM->addAnalysisPasses(PM);
- }
-
+void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (LibraryInfo)
- PM.add(new TargetLibraryInfo(*LibraryInfo));
+ PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
if (VerifyInput)
PM.add(createVerifierPass());
- if (StripDebug)
- PM.add(createStripSymbolsPass(true));
+ if (OptLevel > 1)
+ addLTOOptimizationPasses(PM);
- if (VerifyInput)
- PM.add(createDebugInfoVerifierPass());
+ // Lower bit sets to globals. This pass supports Clang's control flow
+ // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
+ // is enabled. The pass does nothing if CFI is disabled.
+ PM.add(createLowerBitSetsPass());
if (OptLevel != 0)
- addLTOOptimizationPasses(PM);
+ addLateLTOOptimizationPasses(PM);
- if (VerifyOutput) {
+ if (VerifyOutput)
PM.add(createVerifierPass());
- PM.add(createDebugInfoVerifierPass());
- }
}
inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
@@ -568,7 +608,7 @@ void
LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
LLVMPassManagerRef PM) {
PassManagerBuilder *Builder = unwrap(PMB);
- FunctionPassManager *FPM = unwrap<FunctionPassManager>(PM);
+ legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
Builder->populateFunctionPassManager(*FPM);
}
@@ -576,7 +616,7 @@ void
LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
LLVMPassManagerRef PM) {
PassManagerBuilder *Builder = unwrap(PMB);
- PassManagerBase *MPM = unwrap(PM);
+ legacy::PassManagerBase *MPM = unwrap(PM);
Builder->populateModulePassManager(*MPM);
}
@@ -585,7 +625,7 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
LLVMBool Internalize,
LLVMBool RunInliner) {
PassManagerBuilder *Builder = unwrap(PMB);
- PassManagerBase *LPM = unwrap(PM);
+ legacy::PassManagerBase *LPM = unwrap(PM);
// A small backwards compatibility hack. populateLTOPassManager used to take
// an RunInliner option.
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 7bd4ce12860d..1943b930cbf9 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -18,8 +18,10 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -175,7 +177,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
bool MadeChange = false;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
- if (II->doesNotThrow()) {
+ if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) {
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
// Insert a call instruction before the invoke.
CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 816978ea9ce6..60c957347621 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -305,41 +305,31 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
SmallVector<Metadata *, 64> LiveSubprograms;
DenseSet<const MDNode *> VisitedSet;
- for (DICompileUnit DIC : F.compile_units()) {
- assert(DIC.Verify() && "DIC must verify as a DICompileUnit.");
-
+ for (DICompileUnit *DIC : F.compile_units()) {
// Create our live subprogram list.
- DIArray SPs = DIC.getSubprograms();
bool SubprogramChange = false;
- for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
- DISubprogram DISP(SPs.getElement(i));
- assert(DISP.Verify() && "DISP must verify as a DISubprogram.");
-
+ for (DISubprogram *DISP : DIC->getSubprograms()) {
// Make sure we visit each subprogram only once.
if (!VisitedSet.insert(DISP).second)
continue;
// If the function referenced by DISP is not null, the function is live.
- if (DISP.getFunction())
+ if (DISP->getFunction())
LiveSubprograms.push_back(DISP);
else
SubprogramChange = true;
}
// Create our live global variable list.
- DIArray GVs = DIC.getGlobalVariables();
bool GlobalVariableChange = false;
- for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
- DIGlobalVariable DIG(GVs.getElement(i));
- assert(DIG.Verify() && "DIG must verify as DIGlobalVariable.");
-
+ for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) {
// Make sure we only visit each global variable only once.
if (!VisitedSet.insert(DIG).second)
continue;
// If the global variable referenced by DIG is not null, the global
// variable is live.
- if (DIG.getGlobal())
+ if (DIG->getVariable())
LiveGlobalVariables.push_back(DIG);
else
GlobalVariableChange = true;
@@ -349,12 +339,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
// subprogram list/global variable list with our new live subprogram/global
// variable list.
if (SubprogramChange) {
- DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms)));
+ DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
Changed = true;
}
if (GlobalVariableChange) {
- DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables)));
+ DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
Changed = true;
}