diff options
author | Roman Divacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
---|---|---|
committer | Roman Divacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
commit | 59850d0874429601812bc13408cb1f776649027c (patch) | |
tree | b21f6de4e08b89bb7931806bab798fc2a5e3a686 /lib/Analysis | |
parent | 18f153bdb9db52e7089a2d5293b96c45a3124a26 (diff) | |
download | src-59850d0874429601812bc13408cb1f776649027c.tar.gz src-59850d0874429601812bc13408cb1f776649027c.zip |
Update llvm to r84119.vendor/llvm/llvm-r84119
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=198090
svn path=/vendor/llvm/llvm-84119/; revision=198091; tag=vendor/llvm/llvm-r84119
Diffstat (limited to 'lib/Analysis')
43 files changed, 6507 insertions, 2459 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index c5523ec4634d..c456990d8ae2 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -88,7 +88,7 @@ AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) { AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(LoadInst *L, Value *P, unsigned Size) { - return alias(L->getOperand(0), TD->getTypeStoreSize(L->getType()), + return alias(L->getOperand(0), getTypeStoreSize(L->getType()), P, Size) ? Ref : NoModRef; } @@ -97,7 +97,7 @@ AliasAnalysis::getModRefInfo(StoreInst *S, Value *P, unsigned Size) { // If the stored address cannot alias the pointer in question, then the // pointer cannot be modified by the store. if (!alias(S->getOperand(1), - TD->getTypeStoreSize(S->getOperand(0)->getType()), P, Size)) + getTypeStoreSize(S->getOperand(0)->getType()), P, Size)) return NoModRef; // If the pointer is a pointer to constant memory, then it could not have been @@ -177,18 +177,23 @@ AliasAnalysis::~AliasAnalysis() {} /// AliasAnalysis interface before any other methods are called. /// void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { - TD = &P->getAnalysis<TargetData>(); + TD = P->getAnalysisIfAvailable<TargetData>(); AA = &P->getAnalysis<AliasAnalysis>(); } // getAnalysisUsage - All alias analysis implementations should invoke this -// directly (using AliasAnalysis::getAnalysisUsage(AU)) to make sure that -// TargetData is required by the pass. +// directly (using AliasAnalysis::getAnalysisUsage(AU)). void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); // All AA's need TargetData. AU.addRequired<AliasAnalysis>(); // All AA's chain } +/// getTypeStoreSize - Return the TargetData store size for the given type, +/// if known, or a conservative value otherwise. +/// +unsigned AliasAnalysis::getTypeStoreSize(const Type *Ty) { + return TD ? TD->getTypeStoreSize(Ty) : ~0u; +} + /// canBasicBlockModify - Return true if it is possible for execution of the /// specified basic block to modify the value pointed to by Ptr. /// @@ -228,13 +233,15 @@ bool llvm::isNoAliasCall(const Value *V) { /// isIdentifiedObject - Return true if this pointer refers to a distinct and /// identifiable object. This returns true for: -/// Global Variables and Functions +/// Global Variables and Functions (but not Global Aliases) /// Allocas and Mallocs /// ByVal and NoAlias Arguments /// NoAlias returns /// bool llvm::isIdentifiedObject(const Value *V) { - if (isa<GlobalValue>(V) || isa<AllocationInst>(V) || isNoAliasCall(V)) + if (isa<AllocationInst>(V) || isNoAliasCall(V)) + return true; + if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V)) return true; if (const Argument *A = dyn_cast<Argument>(V)) return A->hasNoAliasAttr() || A->hasByValAttr(); diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 4362d7d301a8..272c871ce239 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -18,11 +18,12 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt<bool> -PrintAll("count-aa-print-all-queries", cl::ReallyHidden); +PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true)); static cl::opt<bool> PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden); @@ -41,33 +42,33 @@ namespace { } void printLine(const char *Desc, unsigned Val, unsigned Sum) { - cerr << " " << Val << " " << Desc << " responses (" - << Val*100/Sum << "%)\n"; + errs() << " " << Val << " " << Desc << " responses (" + << Val*100/Sum << "%)\n"; } ~AliasAnalysisCounter() { unsigned AASum = No+May+Must; unsigned MRSum = NoMR+JustRef+JustMod+MR; if (AASum + MRSum) { // Print a report if any counted queries occurred... - cerr << "\n===== Alias Analysis Counter Report =====\n" - << " Analysis counted: " << Name << "\n" - << " " << AASum << " Total Alias Queries Performed\n"; + errs() << "\n===== Alias Analysis Counter Report =====\n" + << " Analysis counted: " << Name << "\n" + << " " << AASum << " Total Alias Queries Performed\n"; if (AASum) { printLine("no alias", No, AASum); printLine("may alias", May, AASum); printLine("must alias", Must, AASum); - cerr << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" - << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n"; + errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" + << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n"; } - cerr << " " << MRSum << " Total Mod/Ref Queries Performed\n"; + errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n"; if (MRSum) { printLine("no mod/ref", NoMR, MRSum); printLine("ref", JustRef, MRSum); printLine("mod", JustMod, MRSum); printLine("mod/ref", MR, MRSum); - cerr << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum<< "%/" - << JustRef*100/MRSum << "%/" << JustMod*100/MRSum << "%/" - << MR*100/MRSum <<"%\n\n"; + errs() << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum + << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum + << "%/" << MR*100/MRSum <<"%\n\n"; } } } @@ -89,19 +90,6 @@ namespace { bool pointsToConstantMemory(const Value *P) { return getAnalysis<AliasAnalysis>().pointsToConstantMemory(P); } - bool doesNotAccessMemory(CallSite CS) { - return getAnalysis<AliasAnalysis>().doesNotAccessMemory(CS); - } - bool doesNotAccessMemory(Function *F) { - return getAnalysis<AliasAnalysis>().doesNotAccessMemory(F); - } - bool onlyReadsMemory(CallSite CS) { - return getAnalysis<AliasAnalysis>().onlyReadsMemory(CS); - } - bool onlyReadsMemory(Function *F) { - return getAnalysis<AliasAnalysis>().onlyReadsMemory(F); - } - // Forwarding functions: just delegate to a real AA implementation, counting // the number of responses... @@ -131,20 +119,20 @@ AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size, const char *AliasString; switch (R) { - default: assert(0 && "Unknown alias type!"); + default: llvm_unreachable("Unknown alias type!"); case NoAlias: No++; AliasString = "No alias"; break; case MayAlias: May++; AliasString = "May alias"; break; case MustAlias: Must++; AliasString = "Must alias"; break; } if (PrintAll || (PrintAllFailures && R == MayAlias)) { - cerr << AliasString << ":\t"; - cerr << "[" << V1Size << "B] "; - WriteAsOperand(*cerr.stream(), V1, true, M); - cerr << ", "; - cerr << "[" << V2Size << "B] "; - WriteAsOperand(*cerr.stream(), V2, true, M); - cerr << "\n"; + errs() << AliasString << ":\t"; + errs() << "[" << V1Size << "B] "; + WriteAsOperand(errs(), V1, true, M); + errs() << ", "; + errs() << "[" << V2Size << "B] "; + WriteAsOperand(errs(), V2, true, M); + errs() << "\n"; } return R; @@ -156,7 +144,7 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) { const char *MRString; switch (R) { - default: assert(0 && "Unknown mod/ref type!"); + default: llvm_unreachable("Unknown mod/ref type!"); case NoModRef: NoMR++; MRString = "NoModRef"; break; case Ref: JustRef++; MRString = "JustRef"; break; case Mod: JustMod++; MRString = "JustMod"; break; @@ -164,10 +152,10 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) { } if (PrintAll || (PrintAllFailures && R == ModRef)) { - cerr << MRString << ": Ptr: "; - cerr << "[" << Size << "B] "; - WriteAsOperand(*cerr.stream(), P, true, M); - cerr << "\t<->" << *CS.getInstruction(); + errs() << MRString << ": Ptr: "; + errs() << "[" << Size << "B] "; + WriteAsOperand(errs(), P, true, M); + errs() << "\t<->" << *CS.getInstruction(); } return R; } diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 07820e350681..bb95c01e2ea9 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -29,9 +29,8 @@ #include "llvm/Support/InstIterator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" -#include <set> -#include <sstream> +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" using namespace llvm; static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden); @@ -81,18 +80,21 @@ X("aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true); FunctionPass *llvm::createAAEvalPass() { return new AAEval(); } -static void PrintResults(const char *Msg, bool P, const Value *V1, const Value *V2, - const Module *M) { +static void PrintResults(const char *Msg, bool P, const Value *V1, + const Value *V2, const Module *M) { if (P) { - std::stringstream s1, s2; - WriteAsOperand(s1, V1, true, M); - WriteAsOperand(s2, V2, true, M); - std::string o1(s1.str()), o2(s2.str()); + std::string o1, o2; + { + raw_string_ostream os1(o1), os2(o2); + WriteAsOperand(os1, V1, true, M); + WriteAsOperand(os2, V2, true, M); + } + if (o2 < o1) - std::swap(o1, o2); - cerr << " " << Msg << ":\t" - << o1 << ", " - << o2 << "\n"; + std::swap(o1, o2); + errs() << " " << Msg << ":\t" + << o1 << ", " + << o2 << "\n"; } } @@ -100,19 +102,17 @@ static inline void PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, Module *M) { if (P) { - cerr << " " << Msg << ": Ptr: "; - WriteAsOperand(*cerr.stream(), Ptr, true, M); - cerr << "\t<->" << *I; + errs() << " " << Msg << ": Ptr: "; + WriteAsOperand(errs(), Ptr, true, M); + errs() << "\t<->" << *I << '\n'; } } bool AAEval::runOnFunction(Function &F) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - const TargetData &TD = AA.getTargetData(); - - std::set<Value *> Pointers; - std::set<CallSite> CallSites; + SetVector<Value *> Pointers; + SetVector<CallSite> CallSites; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) if (isa<PointerType>(I->getType())) // Add all pointer arguments @@ -136,20 +136,20 @@ bool AAEval::runOnFunction(Function &F) { if (PrintNoAlias || PrintMayAlias || PrintMustAlias || PrintNoModRef || PrintMod || PrintRef || PrintModRef) - cerr << "Function: " << F.getName() << ": " << Pointers.size() - << " pointers, " << CallSites.size() << " call sites\n"; + errs() << "Function: " << F.getName() << ": " << Pointers.size() + << " pointers, " << CallSites.size() << " call sites\n"; // iterate over the worklist, and run the full (n^2)/2 disambiguations - for (std::set<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); + for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { - unsigned I1Size = 0; + unsigned I1Size = ~0u; const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); - if (I1ElTy->isSized()) I1Size = TD.getTypeStoreSize(I1ElTy); + if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); - for (std::set<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { - unsigned I2Size = 0; + for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { + unsigned I2Size = ~0u; const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); - if (I2ElTy->isSized()) I2Size = TD.getTypeStoreSize(I2ElTy); + if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); switch (AA.alias(*I1, I1Size, *I2, I2Size)) { case AliasAnalysis::NoAlias: @@ -162,21 +162,21 @@ bool AAEval::runOnFunction(Function &F) { PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); ++MustAlias; break; default: - cerr << "Unknown alias query result!\n"; + errs() << "Unknown alias query result!\n"; } } } // Mod/ref alias analysis: compare all pairs of calls and values - for (std::set<CallSite>::iterator C = CallSites.begin(), + for (SetVector<CallSite>::iterator C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { Instruction *I = C->getInstruction(); - for (std::set<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end(); + for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end(); V != Ve; ++V) { - unsigned Size = 0; + unsigned Size = ~0u; const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); - if (ElTy->isSized()) Size = TD.getTypeStoreSize(ElTy); + if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); switch (AA.getModRefInfo(*C, *V, Size)) { case AliasAnalysis::NoModRef: @@ -192,7 +192,7 @@ bool AAEval::runOnFunction(Function &F) { PrintModRefResults(" ModRef", PrintModRef, I, *V, F.getParent()); ++ModRef; break; default: - cerr << "Unknown alias query result!\n"; + errs() << "Unknown alias query result!\n"; } } } @@ -201,45 +201,45 @@ bool AAEval::runOnFunction(Function &F) { } static void PrintPercent(unsigned Num, unsigned Sum) { - cerr << "(" << Num*100ULL/Sum << "." - << ((Num*1000ULL/Sum) % 10) << "%)\n"; + errs() << "(" << Num*100ULL/Sum << "." + << ((Num*1000ULL/Sum) % 10) << "%)\n"; } bool AAEval::doFinalization(Module &M) { unsigned AliasSum = NoAlias + MayAlias + MustAlias; - cerr << "===== Alias Analysis Evaluator Report =====\n"; + errs() << "===== Alias Analysis Evaluator Report =====\n"; if (AliasSum == 0) { - cerr << " Alias Analysis Evaluator Summary: No pointers!\n"; + errs() << " Alias Analysis Evaluator Summary: No pointers!\n"; } else { - cerr << " " << AliasSum << " Total Alias Queries Performed\n"; - cerr << " " << NoAlias << " no alias responses "; + errs() << " " << AliasSum << " Total Alias Queries Performed\n"; + errs() << " " << NoAlias << " no alias responses "; PrintPercent(NoAlias, AliasSum); - cerr << " " << MayAlias << " may alias responses "; + errs() << " " << MayAlias << " may alias responses "; PrintPercent(MayAlias, AliasSum); - cerr << " " << MustAlias << " must alias responses "; + errs() << " " << MustAlias << " must alias responses "; PrintPercent(MustAlias, AliasSum); - cerr << " Alias Analysis Evaluator Pointer Alias Summary: " - << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/" - << MustAlias*100/AliasSum << "%\n"; + errs() << " Alias Analysis Evaluator Pointer Alias Summary: " + << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/" + << MustAlias*100/AliasSum << "%\n"; } // Display the summary for mod/ref analysis unsigned ModRefSum = NoModRef + Mod + Ref + ModRef; if (ModRefSum == 0) { - cerr << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n"; + errs() << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n"; } else { - cerr << " " << ModRefSum << " Total ModRef Queries Performed\n"; - cerr << " " << NoModRef << " no mod/ref responses "; + errs() << " " << ModRefSum << " Total ModRef Queries Performed\n"; + errs() << " " << NoModRef << " no mod/ref responses "; PrintPercent(NoModRef, ModRefSum); - cerr << " " << Mod << " mod responses "; + errs() << " " << Mod << " mod responses "; PrintPercent(Mod, ModRefSum); - cerr << " " << Ref << " ref responses "; + errs() << " " << Ref << " ref responses "; PrintPercent(Ref, ModRefSum); - cerr << " " << ModRef << " mod & ref responses "; + errs() << " " << ModRef << " mod & ref responses "; PrintPercent(ModRef, ModRefSum); - cerr << " Alias Analysis Evaluator Mod/Ref Summary: " - << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/" - << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n"; + errs() << " Alias Analysis Evaluator Mod/Ref Summary: " + << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/" + << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n"; } return false; diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 18c2b66505f6..b056d0091a09 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -20,8 +20,10 @@ #include "llvm/Target/TargetData.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; /// mergeSetIn - Merge the specified alias set into this alias set. @@ -186,8 +188,8 @@ bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const { void AliasSetTracker::clear() { // Delete all the PointerRec entries. - for (DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.begin(), - E = PointerMap.end(); I != E; ++I) + for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end(); + I != E; ++I) I->second->eraseFromList(); PointerMap.clear(); @@ -279,7 +281,7 @@ bool AliasSetTracker::add(Value *Ptr, unsigned Size) { bool AliasSetTracker::add(LoadInst *LI) { bool NewPtr; AliasSet &AS = addPointer(LI->getOperand(0), - AA.getTargetData().getTypeStoreSize(LI->getType()), + AA.getTypeStoreSize(LI->getType()), AliasSet::Refs, NewPtr); if (LI->isVolatile()) AS.setVolatile(); return NewPtr; @@ -289,7 +291,7 @@ bool AliasSetTracker::add(StoreInst *SI) { bool NewPtr; Value *Val = SI->getOperand(0); AliasSet &AS = addPointer(SI->getOperand(1), - AA.getTargetData().getTypeStoreSize(Val->getType()), + AA.getTypeStoreSize(Val->getType()), AliasSet::Mods, NewPtr); if (SI->isVolatile()) AS.setVolatile(); return NewPtr; @@ -411,7 +413,7 @@ bool AliasSetTracker::remove(Value *Ptr, unsigned Size) { } bool AliasSetTracker::remove(LoadInst *LI) { - unsigned Size = AA.getTargetData().getTypeStoreSize(LI->getType()); + unsigned Size = AA.getTypeStoreSize(LI->getType()); AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size); if (!AS) return false; remove(*AS); @@ -419,8 +421,7 @@ bool AliasSetTracker::remove(LoadInst *LI) { } bool AliasSetTracker::remove(StoreInst *SI) { - unsigned Size = - AA.getTargetData().getTypeStoreSize(SI->getOperand(0)->getType()); + unsigned Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size); if (!AS) return false; remove(*AS); @@ -485,7 +486,7 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { AS->removeCallSite(CS); // First, look up the PointerRec for this pointer. - DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.find(PtrVal); + PointerMapType::iterator I = PointerMap.find(PtrVal); if (I == PointerMap.end()) return; // Noop // If we found one, remove the pointer from the alias set it is in. @@ -511,7 +512,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { AA.copyValue(From, To); // First, look up the PointerRec for this pointer. - DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.find(From); + PointerMapType::iterator I = PointerMap.find(From); if (I == PointerMap.end()) return; // Noop assert(I->second->hasAliasSet() && "Dead entry?"); @@ -531,15 +532,15 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { // AliasSet/AliasSetTracker Printing Support //===----------------------------------------------------------------------===// -void AliasSet::print(std::ostream &OS) const { - OS << " AliasSet[" << (void*)this << "," << RefCount << "] "; +void AliasSet::print(raw_ostream &OS) const { + OS << " AliasSet[" << format("0x%p", (void*)this) << "," << RefCount << "] "; OS << (AliasTy == MustAlias ? "must" : "may") << " alias, "; switch (AccessTy) { case NoModRef: OS << "No access "; break; case Refs : OS << "Ref "; break; case Mods : OS << "Mod "; break; case ModRef : OS << "Mod/Ref "; break; - default: assert(0 && "Bad value for AccessTy!"); + default: llvm_unreachable("Bad value for AccessTy!"); } if (isVolatile()) OS << "[volatile] "; if (Forward) @@ -564,7 +565,7 @@ void AliasSet::print(std::ostream &OS) const { OS << "\n"; } -void AliasSetTracker::print(std::ostream &OS) const { +void AliasSetTracker::print(raw_ostream &OS) const { OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for " << PointerMap.size() << " pointer values.\n"; for (const_iterator I = begin(), E = end(); I != E; ++I) @@ -572,8 +573,26 @@ void AliasSetTracker::print(std::ostream &OS) const { OS << "\n"; } -void AliasSet::dump() const { print (cerr); } -void AliasSetTracker::dump() const { print(cerr); } +void AliasSet::dump() const { print(errs()); } +void AliasSetTracker::dump() const { print(errs()); } + +//===----------------------------------------------------------------------===// +// ASTCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void AliasSetTracker::ASTCallbackVH::deleted() { + assert(AST && "ASTCallbackVH called with a null AliasSetTracker!"); + AST->deleteValue(getValPtr()); + // this now dangles! +} + +AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast) + : CallbackVH(V), AST(ast) {} + +AliasSetTracker::ASTCallbackVH & +AliasSetTracker::ASTCallbackVH::operator=(Value *V) { + return *this = ASTCallbackVH(V, AST); +} //===----------------------------------------------------------------------===// // AliasSetPrinter Pass @@ -596,7 +615,7 @@ namespace { for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) Tracker->add(&*I); - Tracker->print(cerr); + Tracker->print(errs()); delete Tracker; return false; } diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 493c6e88b3f8..f8cb32321b00 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -9,7 +9,6 @@ #include "llvm-c/Analysis.h" #include "llvm/Analysis/Verifier.h" -#include <fstream> #include <cstring> using namespace llvm; diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index f689dcac305a..2c4efc4985b3 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/Passes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -22,11 +23,15 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include <algorithm> using namespace llvm; @@ -35,12 +40,8 @@ using namespace llvm; // Useful predicates //===----------------------------------------------------------------------===// -static const User *isGEP(const Value *V) { - if (isa<GetElementPtrInst>(V) || - (isa<ConstantExpr>(V) && - cast<ConstantExpr>(V)->getOpcode() == Instruction::GetElementPtr)) - return cast<User>(V); - return 0; +static const GEPOperator *isGEP(const Value *V) { + return dyn_cast<GEPOperator>(V); } static const Value *GetGEPOperands(const Value *V, @@ -103,7 +104,7 @@ static bool isNonEscapingLocalObject(const Value *V) { /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, unsigned Size, - const TargetData &TD) { + LLVMContext &Context, const TargetData &TD) { const Type *AccessTy; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { AccessTy = GV->getType()->getElementType(); @@ -112,6 +113,12 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size, AccessTy = AI->getType()->getElementType(); else return false; + } else if (const CallInst* CI = extractMallocCall(V)) { + if (!isArrayMalloc(V, Context, &TD)) + // The size is the argument to the malloc call. + if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getOperand(1))) + return (C->getZExtValue() < Size); + return false; } else if (const Argument *A = dyn_cast<Argument>(V)) { if (A->hasByValAttr()) AccessTy = cast<PointerType>(A->getType())->getElementType(); @@ -142,11 +149,10 @@ namespace { explicit NoAA(void *PID) : ImmutablePass(PID) { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); } virtual void initializePass() { - TD = &getAnalysis<TargetData>(); + TD = getAnalysisIfAvailable<TargetData>(); } virtual AliasResult alias(const Value *V1, unsigned V1Size, @@ -156,7 +162,7 @@ namespace { virtual void getArgumentAccesses(Function *F, CallSite CS, std::vector<PointerAccessInfo> &Info) { - assert(0 && "This method may not be called on this function!"); + llvm_unreachable("This method may not be called on this function!"); } virtual void getMustAliases(Value *P, std::vector<Value*> &RetVals) { } @@ -196,7 +202,12 @@ namespace { static char ID; // Class identification, replacement for typeinfo BasicAliasAnalysis() : NoAA(&ID) {} AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size); + const Value *V2, unsigned V2Size) { + assert(VisitedPHIs.empty() && "VisitedPHIs must be cleared after use!"); + AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size); + VisitedPHIs.clear(); + return Alias; + } ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); ModRefResult getModRefInfo(CallSite CS1, CallSite CS2); @@ -210,6 +221,22 @@ namespace { bool pointsToConstantMemory(const Value *P); private: + // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call. + SmallSet<const PHINode*, 16> VisitedPHIs; + + // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction + // against another. + AliasResult aliasGEP(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + + // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction + // against another. + AliasResult aliasPHI(const PHINode *PN, unsigned PNSize, + const Value *V2, unsigned V2Size); + + AliasResult aliasCheck(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + // CheckGEPInstructions - Check two GEP instructions with known // must-aliasing base pointers. This checks to see if the index expressions // preclude the pointers from aliasing... @@ -279,6 +306,27 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { if (!passedAsArg) return NoModRef; } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::atomic_cmp_swap: + case Intrinsic::atomic_swap: + case Intrinsic::atomic_load_add: + case Intrinsic::atomic_load_sub: + case Intrinsic::atomic_load_and: + case Intrinsic::atomic_load_nand: + case Intrinsic::atomic_load_or: + case Intrinsic::atomic_load_xor: + case Intrinsic::atomic_load_max: + case Intrinsic::atomic_load_min: + case Intrinsic::atomic_load_umax: + case Intrinsic::atomic_load_umin: + if (alias(II->getOperand(1), Size, P, Size) == NoAlias) + return NoModRef; + break; + } + } } // The AliasAnalysis base class has some smarts, lets use them. @@ -303,71 +351,12 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) { return NoAA::getModRefInfo(CS1, CS2); } - -// alias - Provide a bunch of ad-hoc rules to disambiguate in common cases, such -// as array references. +// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction +// against another. // AliasAnalysis::AliasResult -BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { - // Strip off any constant expression casts if they exist - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V1)) - if (CE->isCast() && isa<PointerType>(CE->getOperand(0)->getType())) - V1 = CE->getOperand(0); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V2)) - if (CE->isCast() && isa<PointerType>(CE->getOperand(0)->getType())) - V2 = CE->getOperand(0); - - // Are we checking for alias of the same value? - if (V1 == V2) return MustAlias; - - if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType())) - return NoAlias; // Scalars cannot alias each other - - // Strip off cast instructions. Since V1 and V2 are pointers, they must be - // pointer<->pointer bitcasts. - if (const BitCastInst *I = dyn_cast<BitCastInst>(V1)) - return alias(I->getOperand(0), V1Size, V2, V2Size); - if (const BitCastInst *I = dyn_cast<BitCastInst>(V2)) - return alias(V1, V1Size, I->getOperand(0), V2Size); - - // Figure out what objects these things are pointing to if we can. - const Value *O1 = V1->getUnderlyingObject(); - const Value *O2 = V2->getUnderlyingObject(); - - if (O1 != O2) { - // If V1/V2 point to two different objects we know that we have no alias. - if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) - return NoAlias; - - // Arguments can't alias with local allocations or noalias calls. - if ((isa<Argument>(O1) && (isa<AllocationInst>(O2) || isNoAliasCall(O2))) || - (isa<Argument>(O2) && (isa<AllocationInst>(O1) || isNoAliasCall(O1)))) - return NoAlias; - - // Most objects can't alias null. - if ((isa<ConstantPointerNull>(V2) && isKnownNonNull(O1)) || - (isa<ConstantPointerNull>(V1) && isKnownNonNull(O2))) - return NoAlias; - } - - // If the size of one access is larger than the entire object on the other - // side, then we know such behavior is undefined and can assume no alias. - const TargetData &TD = getTargetData(); - if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, TD)) || - (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, TD))) - return NoAlias; - - // If one pointer is the result of a call/invoke and the other is a - // non-escaping local object, then we know the object couldn't escape to a - // point where the call could return it. - if ((isa<CallInst>(O1) || isa<InvokeInst>(O1)) && - isNonEscapingLocalObject(O2) && O1 != O2) - return NoAlias; - if ((isa<CallInst>(O2) || isa<InvokeInst>(O2)) && - isNonEscapingLocalObject(O1) && O1 != O2) - return NoAlias; - +BasicAliasAnalysis::aliasGEP(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { // If we have two gep instructions with must-alias'ing base pointers, figure // out if the indexes to the GEP tell us anything about the derived pointer. // Note that we also handle chains of getelementptr instructions as well as @@ -387,8 +376,8 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size, GEP1->getOperand(0)->getType() == GEP2->getOperand(0)->getType() && // All operands are the same, ignoring the base. std::equal(GEP1->op_begin()+1, GEP1->op_end(), GEP2->op_begin()+1)) - return alias(GEP1->getOperand(0), V1Size, GEP2->getOperand(0), V2Size); - + return aliasCheck(GEP1->getOperand(0), V1Size, + GEP2->getOperand(0), V2Size); // Drill down into the first non-gep value, to test for must-aliasing of // the base pointers. @@ -405,7 +394,7 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size, const Value *BasePtr2 = GEP2->getOperand(0); // Do the base pointers alias? - AliasResult BaseAlias = alias(BasePtr1, ~0U, BasePtr2, ~0U); + AliasResult BaseAlias = aliasCheck(BasePtr1, ~0U, BasePtr2, ~0U); if (BaseAlias == NoAlias) return NoAlias; if (BaseAlias == MustAlias) { // If the base pointers alias each other exactly, check to see if we can @@ -435,79 +424,190 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size, // instruction. If one pointer is a GEP with a non-zero index of the other // pointer, we know they cannot alias. // - if (isGEP(V2)) { - std::swap(V1, V2); - std::swap(V1Size, V2Size); - } + if (V1Size == ~0U || V2Size == ~0U) + return MayAlias; - if (V1Size != ~0U && V2Size != ~0U) - if (isGEP(V1)) { - SmallVector<Value*, 16> GEPOperands; - const Value *BasePtr = GetGEPOperands(V1, GEPOperands); - - AliasResult R = alias(BasePtr, V1Size, V2, V2Size); - if (R == MustAlias) { - // If there is at least one non-zero constant index, we know they cannot - // alias. - bool ConstantFound = false; - bool AllZerosFound = true; - for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i) - if (const Constant *C = dyn_cast<Constant>(GEPOperands[i])) { - if (!C->isNullValue()) { - ConstantFound = true; - AllZerosFound = false; - break; - } - } else { - AllZerosFound = false; - } + SmallVector<Value*, 16> GEPOperands; + const Value *BasePtr = GetGEPOperands(V1, GEPOperands); + + AliasResult R = aliasCheck(BasePtr, ~0U, V2, V2Size); + if (R != MustAlias) + // If V2 may alias GEP base pointer, conservatively returns MayAlias. + // If V2 is known not to alias GEP base pointer, then the two values + // cannot alias per GEP semantics: "A pointer value formed from a + // getelementptr instruction is associated with the addresses associated + // with the first operand of the getelementptr". + return R; + + // If there is at least one non-zero constant index, we know they cannot + // alias. + bool ConstantFound = false; + bool AllZerosFound = true; + for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i) + if (const Constant *C = dyn_cast<Constant>(GEPOperands[i])) { + if (!C->isNullValue()) { + ConstantFound = true; + AllZerosFound = false; + break; + } + } else { + AllZerosFound = false; + } - // If we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 must aliases - // the ptr, the end result is a must alias also. - if (AllZerosFound) - return MustAlias; + // If we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 must aliases + // the ptr, the end result is a must alias also. + if (AllZerosFound) + return MustAlias; - if (ConstantFound) { - if (V2Size <= 1 && V1Size <= 1) // Just pointer check? - return NoAlias; + if (ConstantFound) { + if (V2Size <= 1 && V1Size <= 1) // Just pointer check? + return NoAlias; - // Otherwise we have to check to see that the distance is more than - // the size of the argument... build an index vector that is equal to - // the arguments provided, except substitute 0's for any variable - // indexes we find... - if (cast<PointerType>( - BasePtr->getType())->getElementType()->isSized()) { - for (unsigned i = 0; i != GEPOperands.size(); ++i) - if (!isa<ConstantInt>(GEPOperands[i])) - GEPOperands[i] = - Constant::getNullValue(GEPOperands[i]->getType()); - int64_t Offset = - getTargetData().getIndexedOffset(BasePtr->getType(), - &GEPOperands[0], - GEPOperands.size()); - - if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size) - return NoAlias; - } - } - } + // Otherwise we have to check to see that the distance is more than + // the size of the argument... build an index vector that is equal to + // the arguments provided, except substitute 0's for any variable + // indexes we find... + if (TD && + cast<PointerType>(BasePtr->getType())->getElementType()->isSized()) { + for (unsigned i = 0; i != GEPOperands.size(); ++i) + if (!isa<ConstantInt>(GEPOperands[i])) + GEPOperands[i] = Constant::getNullValue(GEPOperands[i]->getType()); + int64_t Offset = TD->getIndexedOffset(BasePtr->getType(), + &GEPOperands[0], + GEPOperands.size()); + + if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size) + return NoAlias; } + } + + return MayAlias; +} + +// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction +// against another. +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, + const Value *V2, unsigned V2Size) { + // The PHI node has already been visited, avoid recursion any further. + if (!VisitedPHIs.insert(PN)) + return MayAlias; + + SmallSet<Value*, 4> UniqueSrc; + SmallVector<Value*, 4> V1Srcs; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *PV1 = PN->getIncomingValue(i); + if (isa<PHINode>(PV1)) + // If any of the source itself is a PHI, return MayAlias conservatively + // to avoid compile time explosion. The worst possible case is if both + // sides are PHI nodes. In which case, this is O(m x n) time where 'm' + // and 'n' are the number of PHI sources. + return MayAlias; + if (UniqueSrc.insert(PV1)) + V1Srcs.push_back(PV1); + } + + AliasResult Alias = aliasCheck(V1Srcs[0], PNSize, V2, V2Size); + // Early exit if the check of the first PHI source against V2 is MayAlias. + // Other results are not possible. + if (Alias == MayAlias) + return MayAlias; + + // If all sources of the PHI node NoAlias or MustAlias V2, then returns + // NoAlias / MustAlias. Otherwise, returns MayAlias. + for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { + Value *V = V1Srcs[i]; + AliasResult ThisAlias = aliasCheck(V, PNSize, V2, V2Size); + if (ThisAlias != Alias || ThisAlias == MayAlias) + return MayAlias; + } + + return Alias; +} + +// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases, +// such as array references. +// +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + // Strip off any casts if they exist. + V1 = V1->stripPointerCasts(); + V2 = V2->stripPointerCasts(); + + // Are we checking for alias of the same value? + if (V1 == V2) return MustAlias; + + if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType())) + return NoAlias; // Scalars cannot alias each other + + // Figure out what objects these things are pointing to if we can. + const Value *O1 = V1->getUnderlyingObject(); + const Value *O2 = V2->getUnderlyingObject(); + + if (O1 != O2) { + // If V1/V2 point to two different objects we know that we have no alias. + if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) + return NoAlias; + + // Arguments can't alias with local allocations or noalias calls. + if ((isa<Argument>(O1) && (isa<AllocationInst>(O2) || isNoAliasCall(O2))) || + (isa<Argument>(O2) && (isa<AllocationInst>(O1) || isNoAliasCall(O1)))) + return NoAlias; + + // Most objects can't alias null. + if ((isa<ConstantPointerNull>(V2) && isKnownNonNull(O1)) || + (isa<ConstantPointerNull>(V1) && isKnownNonNull(O2))) + return NoAlias; + } + + // If the size of one access is larger than the entire object on the other + // side, then we know such behavior is undefined and can assume no alias. + LLVMContext &Context = V1->getContext(); + if (TD) + if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, Context, *TD)) || + (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, Context, *TD))) + return NoAlias; + + // If one pointer is the result of a call/invoke and the other is a + // non-escaping local object, then we know the object couldn't escape to a + // point where the call could return it. + if ((isa<CallInst>(O1) || isa<InvokeInst>(O1)) && + isNonEscapingLocalObject(O2) && O1 != O2) + return NoAlias; + if ((isa<CallInst>(O2) || isa<InvokeInst>(O2)) && + isNonEscapingLocalObject(O1) && O1 != O2) + return NoAlias; + + if (!isGEP(V1) && isGEP(V2)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + } + if (isGEP(V1)) + return aliasGEP(V1, V1Size, V2, V2Size); + + if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + } + if (const PHINode *PN = dyn_cast<PHINode>(V1)) + return aliasPHI(PN, V1Size, V2, V2Size); return MayAlias; } // This function is used to determine if the indices of two GEP instructions are // equal. V1 and V2 are the indices. -static bool IndexOperandsEqual(Value *V1, Value *V2) { +static bool IndexOperandsEqual(Value *V1, Value *V2, LLVMContext &Context) { if (V1->getType() == V2->getType()) return V1 == V2; if (Constant *C1 = dyn_cast<Constant>(V1)) if (Constant *C2 = dyn_cast<Constant>(V2)) { // Sign extend the constants to long types, if necessary - if (C1->getType() != Type::Int64Ty) - C1 = ConstantExpr::getSExt(C1, Type::Int64Ty); - if (C2->getType() != Type::Int64Ty) - C2 = ConstantExpr::getSExt(C2, Type::Int64Ty); + if (C1->getType() != Type::getInt64Ty(Context)) + C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context)); + if (C2->getType() != Type::getInt64Ty(Context)) + C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context)); return C1 == C2; } return false; @@ -528,6 +628,8 @@ BasicAliasAnalysis::CheckGEPInstructions( const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty); + LLVMContext &Context = GEPPointerTy->getContext(); + // Find the (possibly empty) initial sequence of equal values... which are not // necessarily constants. unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops; @@ -535,7 +637,8 @@ BasicAliasAnalysis::CheckGEPInstructions( unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands); unsigned UnequalOper = 0; while (UnequalOper != MinOperands && - IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) { + IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper], + Context)) { // Advance through the type as we go... ++UnequalOper; if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr1Ty)) @@ -599,10 +702,10 @@ BasicAliasAnalysis::CheckGEPInstructions( if (Constant *G2OC = dyn_cast<ConstantInt>(const_cast<Value*>(G2Oper))){ if (G1OC->getType() != G2OC->getType()) { // Sign extend both operands to long. - if (G1OC->getType() != Type::Int64Ty) - G1OC = ConstantExpr::getSExt(G1OC, Type::Int64Ty); - if (G2OC->getType() != Type::Int64Ty) - G2OC = ConstantExpr::getSExt(G2OC, Type::Int64Ty); + if (G1OC->getType() != Type::getInt64Ty(Context)) + G1OC = ConstantExpr::getSExt(G1OC, Type::getInt64Ty(Context)); + if (G2OC->getType() != Type::getInt64Ty(Context)) + G2OC = ConstantExpr::getSExt(G2OC, Type::getInt64Ty(Context)); GEP1Ops[FirstConstantOper] = G1OC; GEP2Ops[FirstConstantOper] = G2OC; } @@ -673,6 +776,10 @@ BasicAliasAnalysis::CheckGEPInstructions( // However, one GEP may have more operands than the other. If this is the // case, there may still be hope. Check this now. if (FirstConstantOper == MinOperands) { + // Without TargetData, we won't know what the offsets are. + if (!TD) + return MayAlias; + // Make GEP1Ops be the longer one if there is a longer one. if (NumGEP1Ops < NumGEP2Ops) { std::swap(GEP1Ops, GEP2Ops); @@ -692,13 +799,12 @@ BasicAliasAnalysis::CheckGEPInstructions( GEP1Ops[i] = Constant::getNullValue(GEP1Ops[i]->getType()); // Okay, now get the offset. This is the relative offset for the full // instruction. - const TargetData &TD = getTargetData(); - int64_t Offset1 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops, - NumGEP1Ops); + int64_t Offset1 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops, + NumGEP1Ops); // Now check without any constants at the end. - int64_t Offset2 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops, - MinOperands); + int64_t Offset2 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops, + MinOperands); // Make sure we compare the absolute difference. if (Offset1 > Offset2) @@ -734,7 +840,8 @@ BasicAliasAnalysis::CheckGEPInstructions( const Type *ZeroIdxTy = GEPPointerTy; for (unsigned i = 0; i != FirstConstantOper; ++i) { if (!isa<StructType>(ZeroIdxTy)) - GEP1Ops[i] = GEP2Ops[i] = Constant::getNullValue(Type::Int32Ty); + GEP1Ops[i] = GEP2Ops[i] = + Constant::getNullValue(Type::getInt32Ty(Context)); if (const CompositeType *CT = dyn_cast<CompositeType>(ZeroIdxTy)) ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]); @@ -775,9 +882,13 @@ BasicAliasAnalysis::CheckGEPInstructions( // value possible. // if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr1Ty)) - GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,AT->getNumElements()-1); + GEP1Ops[i] = + ConstantInt::get(Type::getInt64Ty(Context), + AT->getNumElements()-1); else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr1Ty)) - GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,VT->getNumElements()-1); + GEP1Ops[i] = + ConstantInt::get(Type::getInt64Ty(Context), + VT->getNumElements()-1); } } @@ -812,11 +923,11 @@ BasicAliasAnalysis::CheckGEPInstructions( } } - if (GEPPointerTy->getElementType()->isSized()) { + if (TD && GEPPointerTy->getElementType()->isSized()) { int64_t Offset1 = - getTargetData().getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops); + TD->getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops); int64_t Offset2 = - getTargetData().getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops); + TD->getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops); assert(Offset1 != Offset2 && "There is at least one different constant here!"); diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 8ada5a3f74cd..6fed4005d193 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -25,38 +25,36 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/GraphWriter.h" -#include "llvm/Config/config.h" -#include <iosfwd> -#include <sstream> -#include <fstream> using namespace llvm; namespace llvm { template<> struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits { static std::string getGraphName(const Function *F) { - return "CFG for '" + F->getName() + "' function"; + return "CFG for '" + F->getNameStr() + "' function"; } static std::string getNodeLabel(const BasicBlock *Node, const Function *Graph, bool ShortNames) { if (ShortNames && !Node->getName().empty()) - return Node->getName() + ":"; + return Node->getNameStr() + ":"; + + std::string Str; + raw_string_ostream OS(Str); - std::ostringstream Out; if (ShortNames) { - WriteAsOperand(Out, Node, false); - return Out.str(); + WriteAsOperand(OS, Node, false); + return OS.str(); } if (Node->getName().empty()) { - WriteAsOperand(Out, Node, false); - Out << ":"; + WriteAsOperand(OS, Node, false); + OS << ":"; } - - Out << *Node; - std::string OutStr = Out.str(); + + OS << *Node; + std::string OutStr = OS.str(); if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); // Process string output to make it nicer... @@ -94,7 +92,7 @@ namespace { return false; } - void print(std::ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = 0) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -112,11 +110,11 @@ namespace { CFGOnlyViewer() : FunctionPass(&ID) {} virtual bool runOnFunction(Function &F) { - F.viewCFG(); + F.viewCFGOnly(); return false; } - void print(std::ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = 0) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -136,19 +134,21 @@ namespace { explicit CFGPrinter(void *pid) : FunctionPass(pid) {} virtual bool runOnFunction(Function &F) { - std::string Filename = "cfg." + F.getName() + ".dot"; - cerr << "Writing '" << Filename << "'..."; - std::ofstream File(Filename.c_str()); + std::string Filename = "cfg." + F.getNameStr() + ".dot"; + errs() << "Writing '" << Filename << "'..."; + + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); - if (File.good()) + if (ErrorInfo.empty()) WriteGraph(File, (const Function*)&F); else - cerr << " error opening file for writing!"; - cerr << "\n"; + errs() << " error opening file for writing!"; + errs() << "\n"; return false; } - void print(std::ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = 0) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -166,18 +166,20 @@ namespace { CFGOnlyPrinter() : FunctionPass(&ID) {} explicit CFGOnlyPrinter(void *pid) : FunctionPass(pid) {} virtual bool runOnFunction(Function &F) { - std::string Filename = "cfg." + F.getName() + ".dot"; - cerr << "Writing '" << Filename << "'..."; - std::ofstream File(Filename.c_str()); + std::string Filename = "cfg." + F.getNameStr() + ".dot"; + errs() << "Writing '" << Filename << "'..."; - if (File.good()) + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) WriteGraph(File, (const Function*)&F, true); else - cerr << " error opening file for writing!"; - cerr << "\n"; + errs() << " error opening file for writing!"; + errs() << "\n"; return false; } - void print(std::ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = 0) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -196,7 +198,7 @@ P2("dot-cfg-only", /// being a 'dot' and 'gv' program in your path. /// void Function::viewCFG() const { - ViewGraph(this, "cfg" + getName()); + ViewGraph(this, "cfg" + getNameStr()); } /// viewCFGOnly - This function is meant for use from the debugger. It works @@ -205,7 +207,7 @@ void Function::viewCFG() const { /// his can make the graph smaller. /// void Function::viewCFGOnly() const { - ViewGraph(this, "cfg" + getName(), true); + ViewGraph(this, "cfg" + getNameStr(), true); } FunctionPass *llvm::createCFGPrinterPass () { diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 6f2a06c7ac8f..1d2f118bb446 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -6,28 +6,33 @@ add_llvm_library(LLVMAnalysis AliasSetTracker.cpp Analysis.cpp BasicAliasAnalysis.cpp - CaptureTracking.cpp CFGPrinter.cpp + CaptureTracking.cpp ConstantFolding.cpp DbgInfoPrinter.cpp DebugInfo.cpp + IVUsers.cpp + InlineCost.cpp InstCount.cpp Interval.cpp IntervalPartition.cpp - IVUsers.cpp LibCallAliasAnalysis.cpp LibCallSemantics.cpp LiveValues.cpp LoopDependenceAnalysis.cpp LoopInfo.cpp LoopPass.cpp - LoopVR.cpp + MallocHelper.cpp MemoryDependenceAnalysis.cpp + PointerTracking.cpp PostDominators.cpp + ProfileEstimatorPass.cpp ProfileInfo.cpp ProfileInfoLoader.cpp ProfileInfoLoaderPass.cpp + ProfileVerifierPass.cpp ScalarEvolution.cpp + ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionExpander.cpp SparsePropagation.cpp Trace.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index a19b8e4f94db..b30ac719ae0e 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -54,7 +54,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) { // its return value and doesn't unwind (a readonly function can leak bits // by throwing an exception or not depending on the input value). if (CS.onlyReadsMemory() && CS.doesNotThrow() && - I->getType() == Type::VoidTy) + I->getType() == Type::getVoidTy(V->getContext())) break; // Not captured if only passed via 'nocapture' arguments. Note that diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 5aa4d56c4e67..0ce1c24bed67 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1,4 +1,4 @@ -//===-- ConstantFolding.cpp - Analyze constant folding possibilities ------===// +//===-- ConstantFolding.cpp - Fold instructions into constants ------------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This family of functions determines the possibility of performing constant -// folding. +// This file defines routines for folding instructions into constants. +// +// Also, to supplement the basic VMCore ConstantExpr simplifications, +// this file defines some additional folding routines that can make use of +// TargetData information. These functions cannot go in VMCore due to library +// dependency issues. // //===----------------------------------------------------------------------===// @@ -19,9 +23,11 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include <cerrno> @@ -92,7 +98,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, /// these together. If target data info is available, it is provided as TD, /// otherwise TD is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const TargetData *TD){ + Constant *Op1, const TargetData *TD, + LLVMContext &Context){ // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. @@ -121,40 +128,103 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// constant expression, do so. static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps, const Type *ResultTy, + LLVMContext &Context, const TargetData *TD) { Constant *Ptr = Ops[0]; if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized()) return 0; - - uint64_t BasePtr = 0; + + unsigned BitWidth = TD->getTypeSizeInBits(TD->getIntPtrType(Context)); + APInt BasePtr(BitWidth, 0); + bool BaseIsInt = true; if (!Ptr->isNullValue()) { // If this is a inttoptr from a constant int, we can fold this as the base, // otherwise we can't. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) if (CE->getOpcode() == Instruction::IntToPtr) - if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) - BasePtr = Base->getZExtValue(); + if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) { + BasePtr = Base->getValue(); + BasePtr.zextOrTrunc(BitWidth); + } if (BasePtr == 0) - return 0; + BaseIsInt = false; } // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' for (unsigned i = 1; i != NumOps; ++i) if (!isa<ConstantInt>(Ops[i])) - return false; + return 0; - uint64_t Offset = TD->getIndexedOffset(Ptr->getType(), - (Value**)Ops+1, NumOps-1); - Constant *C = ConstantInt::get(TD->getIntPtrType(), Offset+BasePtr); - return ConstantExpr::getIntToPtr(C, ResultTy); + APInt Offset = APInt(BitWidth, + TD->getIndexedOffset(Ptr->getType(), + (Value**)Ops+1, NumOps-1)); + // If the base value for this address is a literal integer value, fold the + // getelementptr to the resulting integer value casted to the pointer type. + if (BaseIsInt) { + Constant *C = ConstantInt::get(Context, Offset+BasePtr); + return ConstantExpr::getIntToPtr(C, ResultTy); + } + + // Otherwise form a regular getelementptr. Recompute the indices so that + // we eliminate over-indexing of the notional static type array bounds. + // This makes it easy to determine if the getelementptr is "inbounds". + // Also, this helps GlobalOpt do SROA on GlobalVariables. + const Type *Ty = Ptr->getType(); + SmallVector<Constant*, 32> NewIdxs; + do { + if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { + // The only pointer indexing we'll do is on the first index of the GEP. + if (isa<PointerType>(ATy) && !NewIdxs.empty()) + break; + // Determine which element of the array the offset points into. + APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); + if (ElemSize == 0) + return 0; + APInt NewIdx = Offset.udiv(ElemSize); + Offset -= NewIdx * ElemSize; + NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Context), NewIdx)); + Ty = ATy->getElementType(); + } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { + // Determine which field of the struct the offset points into. The + // getZExtValue is at least as safe as the StructLayout API because we + // know the offset is within the struct at this point. + const StructLayout &SL = *TD->getStructLayout(STy); + unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue()); + NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Context), ElIdx)); + Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx)); + Ty = STy->getTypeAtIndex(ElIdx); + } else { + // We've reached some non-indexable type. + break; + } + } while (Ty != cast<PointerType>(ResultTy)->getElementType()); + + // If we haven't used up the entire offset by descending the static + // type, then the offset is pointing into the middle of an indivisible + // member, so we can't simplify it. + if (Offset != 0) + return 0; + + // Create a GEP. + Constant *C = + ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size()); + assert(cast<PointerType>(C->getType())->getElementType() == Ty && + "Computed GetElementPtr has unexpected type!"); + + // If we ended up indexing a member with a type that doesn't match + // the type of what the original indices indexed, add a cast. + if (Ty != cast<PointerType>(ResultTy)->getElementType()) + C = ConstantExpr::getBitCast(C, ResultTy); + + return C; } /// FoldBitCast - Constant fold bitcast, symbolically evaluating it with /// targetdata. Return 0 if unfoldable. static Constant *FoldBitCast(Constant *C, const Type *DestTy, - const TargetData &TD) { + const TargetData &TD, LLVMContext &Context) { // If this is a bitcast from constant vector -> vector, fold it. if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) { if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { @@ -180,10 +250,10 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, if (DstEltTy->isFloatingPoint()) { // Fold to an vector of integers with same size as our FP type. unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); - const Type *DestIVTy = VectorType::get(IntegerType::get(FPWidth), - NumDstElt); + const Type *DestIVTy = VectorType::get( + IntegerType::get(Context, FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. - C = FoldBitCast(C, DestIVTy, TD); + C = FoldBitCast(C, DestIVTy, TD, Context); if (!C) return 0; // Finally, VMCore can handle this now that #elts line up. @@ -194,8 +264,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // it to integer first. if (SrcEltTy->isFloatingPoint()) { unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); - const Type *SrcIVTy = VectorType::get(IntegerType::get(FPWidth), - NumSrcElt); + const Type *SrcIVTy = VectorType::get( + IntegerType::get(Context, FPWidth), NumSrcElt); // Ask VMCore to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); CV = dyn_cast<ConstantVector>(C); @@ -228,7 +298,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // Shift it to the right place, depending on endianness. Src = ConstantExpr::getShl(Src, - ConstantInt::get(Src->getType(), ShiftAmt)); + ConstantInt::get(Src->getType(), ShiftAmt)); ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; // Mix it in. @@ -251,7 +321,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // Shift the piece of the value into the right place, depending on // endianness. Constant *Elt = ConstantExpr::getLShr(Src, - ConstantInt::get(Src->getType(), ShiftAmt)); + ConstantInt::get(Src->getType(), ShiftAmt)); ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; // Truncate and remember this piece. @@ -278,7 +348,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, /// is returned. Note that this function can only fail when attempting to fold /// instructions like loads and stores, which have no constant expression form. /// -Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { +Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context, + const TargetData *TD) { if (PHINode *PN = dyn_cast<PHINode>(I)) { if (PN->getNumIncomingValues() == 0) return UndefValue::get(PN->getType()); @@ -306,16 +377,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { if (const CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), - Ops.data(), Ops.size(), TD); - else - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Ops.data(), Ops.size(), TD); + Ops.data(), Ops.size(), + Context, TD); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + Ops.data(), Ops.size(), Context, TD); } /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified TargetData. If successful, the constant result is /// result is returned, if not, null is returned. Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, + LLVMContext &Context, const TargetData *TD) { SmallVector<Constant*, 8> Ops; for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) @@ -323,10 +396,10 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, if (CE->isCompare()) return ConstantFoldCompareInstOperands(CE->getPredicate(), - Ops.data(), Ops.size(), TD); - else - return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), - Ops.data(), Ops.size(), TD); + Ops.data(), Ops.size(), + Context, TD); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), + Ops.data(), Ops.size(), Context, TD); } /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the @@ -337,11 +410,13 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, /// Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, Constant* const* Ops, unsigned NumOps, + LLVMContext &Context, const TargetData *TD) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) - if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) + if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD, + Context)) return C; return ConstantExpr::get(Opcode, Ops[0], Ops[1]); @@ -356,9 +431,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, return 0; case Instruction::ICmp: case Instruction::FCmp: - case Instruction::VICmp: - case Instruction::VFCmp: - assert(0 &&"This function is invalid for compares: no predicate specified"); + llvm_unreachable("This function is invalid for compares: no predicate specified"); case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. @@ -368,7 +441,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, unsigned InWidth = Input->getType()->getScalarSizeInBits(); if (TD->getPointerSizeInBits() < InWidth) { Constant *Mask = - ConstantInt::get(APInt::getLowBitsSet(InWidth, + ConstantInt::get(Context, APInt::getLowBitsSet(InWidth, TD->getPointerSizeInBits())); Input = ConstantExpr::getAnd(Input, Mask); } @@ -387,7 +460,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, CE->getType()->getScalarSizeInBits()) { if (CE->getOpcode() == Instruction::PtrToInt) { Constant *Input = CE->getOperand(0); - Constant *C = FoldBitCast(Input, DestTy, *TD); + Constant *C = FoldBitCast(Input, DestTy, *TD, Context); return C ? C : ConstantExpr::getBitCast(Input, DestTy); } // If there's a constant offset added to the integer value before @@ -412,9 +485,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, AT->getNumElements()))) { Constant *Index[] = { Constant::getNullValue(CE->getType()), - ConstantInt::get(ElemIdx) + ConstantInt::get(Context, ElemIdx) }; - return ConstantExpr::getGetElementPtr(GV, &Index[0], 2); + return + ConstantExpr::getGetElementPtr(GV, &Index[0], 2); } } } @@ -434,7 +508,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::BitCast: if (TD) - if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD)) + if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD, Context)) return C; return ConstantExpr::getBitCast(Ops[0], DestTy); case Instruction::Select: @@ -446,7 +520,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD)) + if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, Context, TD)) return C; return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1); @@ -460,6 +534,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant*const * Ops, unsigned NumOps, + LLVMContext &Context, const TargetData *TD) { // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp (ptrtoint x), 0 -> icmp x, null @@ -470,14 +545,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // around to know if bit truncation is happening. if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops[0])) { if (TD && Ops[1]->isNullValue()) { - const Type *IntPtrTy = TD->getIntPtrType(); + const Type *IntPtrTy = TD->getIntPtrType(Context); if (CE0->getOpcode() == Instruction::IntToPtr) { // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), IntPtrTy, false); Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) }; - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, + Context, TD); } // Only do this transformation if the int is intptrty in size, otherwise @@ -487,13 +563,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *C = CE0->getOperand(0); Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) }; // FIXME! - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, + Context, TD); } } if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops[1])) { if (TD && CE0->getOpcode() == CE1->getOpcode()) { - const Type *IntPtrTy = TD->getIntPtrType(); + const Type *IntPtrTy = TD->getIntPtrType(Context); if (CE0->getOpcode() == Instruction::IntToPtr) { // Convert the integer value to the right size to ensure we get the @@ -503,7 +580,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), IntPtrTy, false); Constant *NewOps[] = { C0, C1 }; - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, + Context, TD); } // Only do this transformation if the int is intptrty in size, otherwise @@ -514,7 +592,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *NewOps[] = { CE0->getOperand(0), CE1->getOperand(0) }; - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, + Context, TD); } } } @@ -597,74 +676,47 @@ llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: + case Intrinsic::uadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::ssub_with_overflow: return true; - default: break; + default: + return false; + case 0: break; } if (!F->hasName()) return false; - const char *Str = F->getNameStart(); - unsigned Len = F->getNameLen(); + StringRef Name = F->getName(); // In these cases, the check of the length is required. We don't want to // return true for a name like "cos\0blah" which strcmp would return equal to // "cos", but has length 8. - switch (Str[0]) { + switch (Name[0]) { default: return false; case 'a': - if (Len == 4) - return !strcmp(Str, "acos") || !strcmp(Str, "asin") || - !strcmp(Str, "atan"); - else if (Len == 5) - return !strcmp(Str, "atan2"); - return false; + return Name == "acos" || Name == "asin" || + Name == "atan" || Name == "atan2"; case 'c': - if (Len == 3) - return !strcmp(Str, "cos"); - else if (Len == 4) - return !strcmp(Str, "ceil") || !strcmp(Str, "cosf") || - !strcmp(Str, "cosh"); - return false; + return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; case 'e': - if (Len == 3) - return !strcmp(Str, "exp"); - return false; + return Name == "exp"; case 'f': - if (Len == 4) - return !strcmp(Str, "fabs") || !strcmp(Str, "fmod"); - else if (Len == 5) - return !strcmp(Str, "floor"); - return false; - break; + return Name == "fabs" || Name == "fmod" || Name == "floor"; case 'l': - if (Len == 3 && !strcmp(Str, "log")) - return true; - if (Len == 5 && !strcmp(Str, "log10")) - return true; - return false; + return Name == "log" || Name == "log10"; case 'p': - if (Len == 3 && !strcmp(Str, "pow")) - return true; - return false; + return Name == "pow"; case 's': - if (Len == 3) - return !strcmp(Str, "sin"); - if (Len == 4) - return !strcmp(Str, "sinh") || !strcmp(Str, "sqrt") || - !strcmp(Str, "sinf"); - if (Len == 5) - return !strcmp(Str, "sqrtf"); - return false; + return Name == "sin" || Name == "sinh" || Name == "sqrt" || + Name == "sinf" || Name == "sqrtf"; case 't': - if (Len == 3 && !strcmp(Str, "tan")) - return true; - else if (Len == 4 && !strcmp(Str, "tanh")) - return true; - return false; + return Name == "tan" || Name == "tanh"; } } static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, - const Type *Ty) { + const Type *Ty, LLVMContext &Context) { errno = 0; V = NativeFP(V); if (errno != 0) { @@ -672,17 +724,18 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, return 0; } - if (Ty == Type::FloatTy) - return ConstantFP::get(APFloat((float)V)); - if (Ty == Type::DoubleTy) - return ConstantFP::get(APFloat(V)); - assert(0 && "Can only constant fold float/double"); + if (Ty->isFloatTy()) + return ConstantFP::get(Context, APFloat((float)V)); + if (Ty->isDoubleTy()) + return ConstantFP::get(Context, APFloat(V)); + llvm_unreachable("Can only constant fold float/double"); return 0; // dummy return to suppress warning } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V, double W, - const Type *Ty) { + const Type *Ty, + LLVMContext &Context) { errno = 0; V = NativeFP(V, W); if (errno != 0) { @@ -690,137 +743,195 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), return 0; } - if (Ty == Type::FloatTy) - return ConstantFP::get(APFloat((float)V)); - if (Ty == Type::DoubleTy) - return ConstantFP::get(APFloat(V)); - assert(0 && "Can only constant fold float/double"); + if (Ty->isFloatTy()) + return ConstantFP::get(Context, APFloat((float)V)); + if (Ty->isDoubleTy()) + return ConstantFP::get(Context, APFloat(V)); + llvm_unreachable("Can only constant fold float/double"); return 0; // dummy return to suppress warning } /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. - Constant * llvm::ConstantFoldCall(Function *F, - Constant* const* Operands, unsigned NumOperands) { + Constant *const *Operands, unsigned NumOperands) { if (!F->hasName()) return 0; - const char *Str = F->getNameStart(); - unsigned Len = F->getNameLen(); - + LLVMContext &Context = F->getContext(); + StringRef Name = F->getName(); + const Type *Ty = F->getReturnType(); if (NumOperands == 1) { if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) { - if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy) + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; /// Currently APFloat versions of these functions do not exist, so we use /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. - double V = Ty==Type::FloatTy ? (double)Op->getValueAPF().convertToFloat(): + double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() : Op->getValueAPF().convertToDouble(); - switch (Str[0]) { + switch (Name[0]) { case 'a': - if (Len == 4 && !strcmp(Str, "acos")) - return ConstantFoldFP(acos, V, Ty); - else if (Len == 4 && !strcmp(Str, "asin")) - return ConstantFoldFP(asin, V, Ty); - else if (Len == 4 && !strcmp(Str, "atan")) - return ConstantFoldFP(atan, V, Ty); + if (Name == "acos") + return ConstantFoldFP(acos, V, Ty, Context); + else if (Name == "asin") + return ConstantFoldFP(asin, V, Ty, Context); + else if (Name == "atan") + return ConstantFoldFP(atan, V, Ty, Context); break; case 'c': - if (Len == 4 && !strcmp(Str, "ceil")) - return ConstantFoldFP(ceil, V, Ty); - else if (Len == 3 && !strcmp(Str, "cos")) - return ConstantFoldFP(cos, V, Ty); - else if (Len == 4 && !strcmp(Str, "cosh")) - return ConstantFoldFP(cosh, V, Ty); - else if (Len == 4 && !strcmp(Str, "cosf")) - return ConstantFoldFP(cos, V, Ty); + if (Name == "ceil") + return ConstantFoldFP(ceil, V, Ty, Context); + else if (Name == "cos") + return ConstantFoldFP(cos, V, Ty, Context); + else if (Name == "cosh") + return ConstantFoldFP(cosh, V, Ty, Context); + else if (Name == "cosf") + return ConstantFoldFP(cos, V, Ty, Context); break; case 'e': - if (Len == 3 && !strcmp(Str, "exp")) - return ConstantFoldFP(exp, V, Ty); + if (Name == "exp") + return ConstantFoldFP(exp, V, Ty, Context); break; case 'f': - if (Len == 4 && !strcmp(Str, "fabs")) - return ConstantFoldFP(fabs, V, Ty); - else if (Len == 5 && !strcmp(Str, "floor")) - return ConstantFoldFP(floor, V, Ty); + if (Name == "fabs") + return ConstantFoldFP(fabs, V, Ty, Context); + else if (Name == "floor") + return ConstantFoldFP(floor, V, Ty, Context); break; case 'l': - if (Len == 3 && !strcmp(Str, "log") && V > 0) - return ConstantFoldFP(log, V, Ty); - else if (Len == 5 && !strcmp(Str, "log10") && V > 0) - return ConstantFoldFP(log10, V, Ty); - else if (!strcmp(Str, "llvm.sqrt.f32") || - !strcmp(Str, "llvm.sqrt.f64")) { + if (Name == "log" && V > 0) + return ConstantFoldFP(log, V, Ty, Context); + else if (Name == "log10" && V > 0) + return ConstantFoldFP(log10, V, Ty, Context); + else if (Name == "llvm.sqrt.f32" || + Name == "llvm.sqrt.f64") { if (V >= -0.0) - return ConstantFoldFP(sqrt, V, Ty); + return ConstantFoldFP(sqrt, V, Ty, Context); else // Undefined return Constant::getNullValue(Ty); } break; case 's': - if (Len == 3 && !strcmp(Str, "sin")) - return ConstantFoldFP(sin, V, Ty); - else if (Len == 4 && !strcmp(Str, "sinh")) - return ConstantFoldFP(sinh, V, Ty); - else if (Len == 4 && !strcmp(Str, "sqrt") && V >= 0) - return ConstantFoldFP(sqrt, V, Ty); - else if (Len == 5 && !strcmp(Str, "sqrtf") && V >= 0) - return ConstantFoldFP(sqrt, V, Ty); - else if (Len == 4 && !strcmp(Str, "sinf")) - return ConstantFoldFP(sin, V, Ty); + if (Name == "sin") + return ConstantFoldFP(sin, V, Ty, Context); + else if (Name == "sinh") + return ConstantFoldFP(sinh, V, Ty, Context); + else if (Name == "sqrt" && V >= 0) + return ConstantFoldFP(sqrt, V, Ty, Context); + else if (Name == "sqrtf" && V >= 0) + return ConstantFoldFP(sqrt, V, Ty, Context); + else if (Name == "sinf") + return ConstantFoldFP(sin, V, Ty, Context); break; case 't': - if (Len == 3 && !strcmp(Str, "tan")) - return ConstantFoldFP(tan, V, Ty); - else if (Len == 4 && !strcmp(Str, "tanh")) - return ConstantFoldFP(tanh, V, Ty); + if (Name == "tan") + return ConstantFoldFP(tan, V, Ty, Context); + else if (Name == "tanh") + return ConstantFoldFP(tanh, V, Ty, Context); break; default: break; } - } else if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) { - if (Len > 11 && !memcmp(Str, "llvm.bswap", 10)) - return ConstantInt::get(Op->getValue().byteSwap()); - else if (Len > 11 && !memcmp(Str, "llvm.ctpop", 10)) + return 0; + } + + + if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) { + if (Name.startswith("llvm.bswap")) + return ConstantInt::get(Context, Op->getValue().byteSwap()); + else if (Name.startswith("llvm.ctpop")) return ConstantInt::get(Ty, Op->getValue().countPopulation()); - else if (Len > 10 && !memcmp(Str, "llvm.cttz", 9)) + else if (Name.startswith("llvm.cttz")) return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); - else if (Len > 10 && !memcmp(Str, "llvm.ctlz", 9)) + else if (Name.startswith("llvm.ctlz")) return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); + return 0; } - } else if (NumOperands == 2) { + + return 0; + } + + if (NumOperands == 2) { if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { - if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy) + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; - double Op1V = Ty==Type::FloatTy ? - (double)Op1->getValueAPF().convertToFloat(): + double Op1V = Ty->isFloatTy() ? + (double)Op1->getValueAPF().convertToFloat() : Op1->getValueAPF().convertToDouble(); if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { - double Op2V = Ty==Type::FloatTy ? + if (Op2->getType() != Op1->getType()) + return 0; + + double Op2V = Ty->isFloatTy() ? (double)Op2->getValueAPF().convertToFloat(): Op2->getValueAPF().convertToDouble(); - if (Len == 3 && !strcmp(Str, "pow")) { - return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - } else if (Len == 4 && !strcmp(Str, "fmod")) { - return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - } else if (Len == 5 && !strcmp(Str, "atan2")) { - return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); - } + if (Name == "pow") + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty, Context); + if (Name == "fmod") + return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty, Context); + if (Name == "atan2") + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty, Context); } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { - if (!strcmp(Str, "llvm.powi.f32")) { - return ConstantFP::get(APFloat((float)std::pow((float)Op1V, + if (Name == "llvm.powi.f32") + return ConstantFP::get(Context, APFloat((float)std::pow((float)Op1V, (int)Op2C->getZExtValue()))); - } else if (!strcmp(Str, "llvm.powi.f64")) { - return ConstantFP::get(APFloat((double)std::pow((double)Op1V, + if (Name == "llvm.powi.f64") + return ConstantFP::get(Context, APFloat((double)std::pow((double)Op1V, (int)Op2C->getZExtValue()))); + } + return 0; + } + + + if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) { + if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::uadd_with_overflow: { + Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result. + Constant *Ops[] = { + Res, ConstantExpr::getICmp(CmpInst::ICMP_ULT, Res, Op1) // overflow. + }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::usub_with_overflow: { + Constant *Res = ConstantExpr::getSub(Op1, Op2); // result. + Constant *Ops[] = { + Res, ConstantExpr::getICmp(CmpInst::ICMP_UGT, Res, Op1) // overflow. + }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::sadd_with_overflow: { + Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result. + Constant *Overflow = ConstantExpr::getSelect( + ConstantExpr::getICmp(CmpInst::ICMP_SGT, + ConstantInt::get(Op1->getType(), 0), Op1), + ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op2), + ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op2)); // overflow. + + Constant *Ops[] = { Res, Overflow }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::ssub_with_overflow: { + Constant *Res = ConstantExpr::getSub(Op1, Op2); // result. + Constant *Overflow = ConstantExpr::getSelect( + ConstantExpr::getICmp(CmpInst::ICMP_SGT, + ConstantInt::get(Op2->getType(), 0), Op2), + ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op1), + ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op1)); // overflow. + + Constant *Ops[] = { Res, Overflow }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } } } + + return 0; } + return 0; } return 0; } diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp index 6c549e6345e6..2bbe2e0ecb4f 100644 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ b/lib/Analysis/DbgInfoPrinter.cpp @@ -90,10 +90,9 @@ void PrintDbgInfo::printStopPoint(const DbgStopPointInst *DSI) { } void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) { - DISubprogram Subprogram(cast<GlobalVariable>(FS->getSubprogram())); - std::string Res1, Res2; - Out << "; fully qualified function name: " << Subprogram.getDisplayName(Res1) - << " return type: " << Subprogram.getReturnTypeName(Res2) + DISubprogram Subprogram(FS->getSubprogram()); + Out << "; fully qualified function name: " << Subprogram.getDisplayName() + << " return type: " << Subprogram.getReturnTypeName() << " at line " << Subprogram.getLineNumber() << "\n\n"; } @@ -152,7 +151,7 @@ bool PrintDbgInfo::runOnFunction(Function &F) { Printed = true; } - Out << *i; + Out << *i << '\n'; printVariableDeclaration(i); if (const User *U = dyn_cast<User>(i)) { diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 9eecc339b483..7bb7e9b4af2d 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -18,12 +18,13 @@ #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/DebugLoc.h" -#include "llvm/Support/Streams.h" - +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::dwarf; @@ -32,18 +33,12 @@ using namespace llvm::dwarf; //===----------------------------------------------------------------------===// /// ValidDebugInfo - Return true if V represents valid debug info value. -bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) { - if (!V) - return false; - - GlobalVariable *GV = dyn_cast<GlobalVariable>(V->stripPointerCasts()); - if (!GV) - return false; - - if (!GV->hasInternalLinkage () && !GV->hasLinkOnceLinkage()) +/// FIXME : Add DIDescriptor.isValid() +bool DIDescriptor::ValidDebugInfo(MDNode *N, CodeGenOpt::Level OptLevel) { + if (!N) return false; - DIDescriptor DI(GV); + DIDescriptor DI(N); // Check current version. Allow Version6 for now. unsigned Version = DI.getVersion(); @@ -53,13 +48,13 @@ bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) { unsigned Tag = DI.getTag(); switch (Tag) { case DW_TAG_variable: - assert(DIVariable(GV).Verify() && "Invalid DebugInfo value"); + assert(DIVariable(N).Verify() && "Invalid DebugInfo value"); break; case DW_TAG_compile_unit: - assert(DICompileUnit(GV).Verify() && "Invalid DebugInfo value"); + assert(DICompileUnit(N).Verify() && "Invalid DebugInfo value"); break; case DW_TAG_subprogram: - assert(DISubprogram(GV).Verify() && "Invalid DebugInfo value"); + assert(DISubprogram(N).Verify() && "Invalid DebugInfo value"); break; case DW_TAG_lexical_block: // FIXME: This interfers with the quality of generated code during @@ -74,84 +69,75 @@ bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) { return true; } -DIDescriptor::DIDescriptor(GlobalVariable *GV, unsigned RequiredTag) { - DbgGV = GV; - - // If this is non-null, check to see if the Tag matches. If not, set to null. - if (GV && getTag() != RequiredTag) - DbgGV = 0; -} +DIDescriptor::DIDescriptor(MDNode *N, unsigned RequiredTag) { + DbgNode = N; -const std::string & -DIDescriptor::getStringField(unsigned Elt, std::string &Result) const { - if (DbgGV == 0) { - Result.clear(); - return Result; + // If this is non-null, check to see if the Tag matches. If not, set to null. + if (N && getTag() != RequiredTag) { + DbgNode = 0; } +} - Constant *C = DbgGV->getInitializer(); - if (C == 0 || Elt >= C->getNumOperands()) { - Result.clear(); - return Result; - } +const char * +DIDescriptor::getStringField(unsigned Elt) const { + if (DbgNode == 0) + return NULL; - // Fills in the string if it succeeds - if (!GetConstantStringInfo(C->getOperand(Elt), Result)) - Result.clear(); + if (Elt < DbgNode->getNumElements()) + if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getElement(Elt))) + return MDS->getString().data(); - return Result; + return NULL; } uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { - if (DbgGV == 0) return 0; - - Constant *C = DbgGV->getInitializer(); - if (C == 0 || Elt >= C->getNumOperands()) + if (DbgNode == 0) return 0; - if (ConstantInt *CI = dyn_cast<ConstantInt>(C->getOperand(Elt))) - return CI->getZExtValue(); + if (Elt < DbgNode->getNumElements()) + if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getElement(Elt))) + return CI->getZExtValue(); + return 0; } DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { - if (DbgGV == 0) return DIDescriptor(); - - Constant *C = DbgGV->getInitializer(); - if (C == 0 || Elt >= C->getNumOperands()) + if (DbgNode == 0) return DIDescriptor(); - C = C->getOperand(Elt); - return DIDescriptor(dyn_cast<GlobalVariable>(C->stripPointerCasts())); + if (Elt < DbgNode->getNumElements() && DbgNode->getElement(Elt)) + return DIDescriptor(dyn_cast<MDNode>(DbgNode->getElement(Elt))); + + return DIDescriptor(); } GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { - if (DbgGV == 0) return 0; - - Constant *C = DbgGV->getInitializer(); - if (C == 0 || Elt >= C->getNumOperands()) + if (DbgNode == 0) return 0; - C = C->getOperand(Elt); - return dyn_cast<GlobalVariable>(C->stripPointerCasts()); + if (Elt < DbgNode->getNumElements()) + return dyn_cast_or_null<GlobalVariable>(DbgNode->getElement(Elt)); + return 0; } //===----------------------------------------------------------------------===// -// Simple Descriptor Constructors and other Methods +// Predicates //===----------------------------------------------------------------------===// -// Needed by DIVariable::getType(). -DIType::DIType(GlobalVariable *GV) : DIDescriptor(GV) { - if (!GV) return; - unsigned tag = getTag(); - if (tag != dwarf::DW_TAG_base_type && !DIDerivedType::isDerivedType(tag) && - !DICompositeType::isCompositeType(tag)) - DbgGV = 0; +/// isBasicType - Return true if the specified tag is legal for +/// DIBasicType. +bool DIDescriptor::isBasicType() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_base_type; } -/// isDerivedType - Return true if the specified tag is legal for -/// DIDerivedType. -bool DIType::isDerivedType(unsigned Tag) { +/// isDerivedType - Return true if the specified tag is legal for DIDerivedType. +bool DIDescriptor::isDerivedType() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + switch (Tag) { case dwarf::DW_TAG_typedef: case dwarf::DW_TAG_pointer_type: @@ -163,16 +149,18 @@ bool DIType::isDerivedType(unsigned Tag) { case dwarf::DW_TAG_inheritance: return true; default: - // FIXME: Even though it doesn't make sense, CompositeTypes are current - // modelled as DerivedTypes, this should return true for them as well. - return false; + // CompositeTypes are currently modelled as DerivedTypes. + return isCompositeType(); } } /// isCompositeType - Return true if the specified tag is legal for /// DICompositeType. -bool DIType::isCompositeType(unsigned TAG) { - switch (TAG) { +bool DIDescriptor::isCompositeType() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + switch (Tag) { case dwarf::DW_TAG_array_type: case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: @@ -187,7 +175,10 @@ bool DIType::isCompositeType(unsigned TAG) { } /// isVariable - Return true if the specified tag is legal for DIVariable. -bool DIVariable::isVariable(unsigned Tag) { +bool DIDescriptor::isVariable() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + switch (Tag) { case dwarf::DW_TAG_auto_variable: case dwarf::DW_TAG_arg_variable: @@ -198,19 +189,126 @@ bool DIVariable::isVariable(unsigned Tag) { } } +/// isType - Return true if the specified tag is legal for DIType. +bool DIDescriptor::isType() const { + return isBasicType() || isCompositeType() || isDerivedType(); +} + +/// isSubprogram - Return true if the specified tag is legal for +/// DISubprogram. +bool DIDescriptor::isSubprogram() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_subprogram; +} + +/// isGlobalVariable - Return true if the specified tag is legal for +/// DIGlobalVariable. +bool DIDescriptor::isGlobalVariable() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_variable; +} + +/// isGlobal - Return true if the specified tag is legal for DIGlobal. +bool DIDescriptor::isGlobal() const { + return isGlobalVariable(); +} + +/// isScope - Return true if the specified tag is one of the scope +/// related tag. +bool DIDescriptor::isScope() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + switch (Tag) { + case dwarf::DW_TAG_compile_unit: + case dwarf::DW_TAG_lexical_block: + case dwarf::DW_TAG_subprogram: + return true; + default: + break; + } + return false; +} + +/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit. +bool DIDescriptor::isCompileUnit() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_compile_unit; +} + +/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block. +bool DIDescriptor::isLexicalBlock() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_lexical_block; +} + +/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type. +bool DIDescriptor::isSubrange() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_subrange_type; +} + +/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator. +bool DIDescriptor::isEnumerator() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_enumerator; +} + +//===----------------------------------------------------------------------===// +// Simple Descriptor Constructors and other Methods +//===----------------------------------------------------------------------===// + +DIType::DIType(MDNode *N) : DIDescriptor(N) { + if (!N) return; + if (!isBasicType() && !isDerivedType() && !isCompositeType()) { + DbgNode = 0; + } +} + unsigned DIArray::getNumElements() const { - assert (DbgGV && "Invalid DIArray"); - Constant *C = DbgGV->getInitializer(); - assert (C && "Invalid DIArray initializer"); - return C->getNumOperands(); + assert (DbgNode && "Invalid DIArray"); + return DbgNode->getNumElements(); +} + +/// replaceAllUsesWith - Replace all uses of debug info referenced by +/// this descriptor. After this completes, the current debug info value +/// is erased. +void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) { + if (isNull()) + return; + + assert (!D.isNull() && "Can not replace with null"); + + // Since we use a TrackingVH for the node, its easy for clients to manufacture + // legitimate situations where they want to replaceAllUsesWith() on something + // which, due to uniquing, has merged with the source. We shield clients from + // this detail by allowing a value to be replaced with replaceAllUsesWith() + // itself. + if (getNode() != D.getNode()) { + MDNode *Node = DbgNode; + Node->replaceAllUsesWith(D.getNode()); + delete Node; + } } /// Verify - Verify that a compile unit is well formed. bool DICompileUnit::Verify() const { - if (isNull()) + if (isNull()) return false; - std::string Res; - if (getFilename(Res).empty()) + const char *N = getFilename(); + if (!N) return false; // It is possible that directory and produce string is empty. return true; @@ -218,26 +316,26 @@ bool DICompileUnit::Verify() const { /// Verify - Verify that a type descriptor is well formed. bool DIType::Verify() const { - if (isNull()) + if (isNull()) return false; - if (getContext().isNull()) + if (getContext().isNull()) return false; DICompileUnit CU = getCompileUnit(); - if (!CU.isNull() && !CU.Verify()) + if (!CU.isNull() && !CU.Verify()) return false; return true; } /// Verify - Verify that a composite type descriptor is well formed. bool DICompositeType::Verify() const { - if (isNull()) + if (isNull()) return false; - if (getContext().isNull()) + if (getContext().isNull()) return false; DICompileUnit CU = getCompileUnit(); - if (!CU.isNull() && !CU.Verify()) + if (!CU.isNull() && !CU.Verify()) return false; return true; } @@ -246,12 +344,12 @@ bool DICompositeType::Verify() const { bool DISubprogram::Verify() const { if (isNull()) return false; - + if (getContext().isNull()) return false; DICompileUnit CU = getCompileUnit(); - if (!CU.Verify()) + if (!CU.Verify()) return false; DICompositeType Ty = getType(); @@ -264,12 +362,12 @@ bool DISubprogram::Verify() const { bool DIGlobalVariable::Verify() const { if (isNull()) return false; - + if (getContext().isNull()) return false; DICompileUnit CU = getCompileUnit(); - if (!CU.isNull() && !CU.Verify()) + if (!CU.isNull() && !CU.Verify()) return false; DIType Ty = getType(); @@ -286,7 +384,7 @@ bool DIGlobalVariable::Verify() const { bool DIVariable::Verify() const { if (isNull()) return false; - + if (getContext().isNull()) return false; @@ -312,15 +410,38 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { /// information for the function F. bool DISubprogram::describes(const Function *F) { assert (F && "Invalid function"); - std::string Name; - getLinkageName(Name); - if (Name.empty()) - getName(Name); - if (!Name.empty() && (strcmp(Name.c_str(), F->getNameStart()) == false)) + const char *Name = getLinkageName(); + if (!Name) + Name = getName(); + if (strcmp(F->getName().data(), Name) == 0) return true; return false; } +const char *DIScope::getFilename() const { + if (isLexicalBlock()) + return DILexicalBlock(DbgNode).getFilename(); + else if (isSubprogram()) + return DISubprogram(DbgNode).getFilename(); + else if (isCompileUnit()) + return DICompileUnit(DbgNode).getFilename(); + else + assert (0 && "Invalid DIScope!"); + return NULL; +} + +const char *DIScope::getDirectory() const { + if (isLexicalBlock()) + return DILexicalBlock(DbgNode).getDirectory(); + else if (isSubprogram()) + return DISubprogram(DbgNode).getDirectory(); + else if (isCompileUnit()) + return DICompileUnit(DbgNode).getDirectory(); + else + assert (0 && "Invalid DIScope!"); + return NULL; +} + //===----------------------------------------------------------------------===// // DIDescriptor: dump routines for all descriptors. //===----------------------------------------------------------------------===// @@ -328,69 +449,67 @@ bool DISubprogram::describes(const Function *F) { /// dump - Print descriptor. void DIDescriptor::dump() const { - cerr << "[" << dwarf::TagString(getTag()) << "] "; - cerr << std::hex << "[GV:" << DbgGV << "]" << std::dec; + errs() << "[" << dwarf::TagString(getTag()) << "] "; + errs().write_hex((intptr_t) &*DbgNode) << ']'; } /// dump - Print compile unit. void DICompileUnit::dump() const { if (getLanguage()) - cerr << " [" << dwarf::LanguageString(getLanguage()) << "] "; + errs() << " [" << dwarf::LanguageString(getLanguage()) << "] "; - std::string Res1, Res2; - cerr << " [" << getDirectory(Res1) << "/" << getFilename(Res2) << " ]"; + errs() << " [" << getDirectory() << "/" << getFilename() << " ]"; } /// dump - Print type. void DIType::dump() const { if (isNull()) return; - std::string Res; - if (!getName(Res).empty()) - cerr << " [" << Res << "] "; + if (const char *Res = getName()) + errs() << " [" << Res << "] "; unsigned Tag = getTag(); - cerr << " [" << dwarf::TagString(Tag) << "] "; + errs() << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context getCompileUnit().dump(); - cerr << " [" - << getLineNumber() << ", " - << getSizeInBits() << ", " - << getAlignInBits() << ", " - << getOffsetInBits() - << "] "; - - if (isPrivate()) - cerr << " [private] "; + errs() << " [" + << getLineNumber() << ", " + << getSizeInBits() << ", " + << getAlignInBits() << ", " + << getOffsetInBits() + << "] "; + + if (isPrivate()) + errs() << " [private] "; else if (isProtected()) - cerr << " [protected] "; + errs() << " [protected] "; if (isForwardDecl()) - cerr << " [fwd] "; - - if (isBasicType(Tag)) - DIBasicType(DbgGV).dump(); - else if (isDerivedType(Tag)) - DIDerivedType(DbgGV).dump(); - else if (isCompositeType(Tag)) - DICompositeType(DbgGV).dump(); + errs() << " [fwd] "; + + if (isBasicType()) + DIBasicType(DbgNode).dump(); + else if (isDerivedType()) + DIDerivedType(DbgNode).dump(); + else if (isCompositeType()) + DICompositeType(DbgNode).dump(); else { - cerr << "Invalid DIType\n"; + errs() << "Invalid DIType\n"; return; } - cerr << "\n"; + errs() << "\n"; } /// dump - Print basic type. void DIBasicType::dump() const { - cerr << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; + errs() << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; } /// dump - Print derived type. void DIDerivedType::dump() const { - cerr << "\n\t Derived From: "; getTypeDerivedFrom().dump(); + errs() << "\n\t Derived From: "; getTypeDerivedFrom().dump(); } /// dump - Print composite type. @@ -398,54 +517,72 @@ void DICompositeType::dump() const { DIArray A = getTypeArray(); if (A.isNull()) return; - cerr << " [" << A.getNumElements() << " elements]"; + errs() << " [" << A.getNumElements() << " elements]"; } /// dump - Print global. void DIGlobal::dump() const { - std::string Res; - if (!getName(Res).empty()) - cerr << " [" << Res << "] "; + if (const char *Res = getName()) + errs() << " [" << Res << "] "; unsigned Tag = getTag(); - cerr << " [" << dwarf::TagString(Tag) << "] "; + errs() << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context getCompileUnit().dump(); - cerr << " [" << getLineNumber() << "] "; + errs() << " [" << getLineNumber() << "] "; if (isLocalToUnit()) - cerr << " [local] "; + errs() << " [local] "; if (isDefinition()) - cerr << " [def] "; + errs() << " [def] "; - if (isGlobalVariable(Tag)) - DIGlobalVariable(DbgGV).dump(); + if (isGlobalVariable()) + DIGlobalVariable(DbgNode).dump(); - cerr << "\n"; + errs() << "\n"; } /// dump - Print subprogram. void DISubprogram::dump() const { - DIGlobal::dump(); + if (const char *Res = getName()) + errs() << " [" << Res << "] "; + + unsigned Tag = getTag(); + errs() << " [" << dwarf::TagString(Tag) << "] "; + + // TODO : Print context + getCompileUnit().dump(); + errs() << " [" << getLineNumber() << "] "; + + if (isLocalToUnit()) + errs() << " [local] "; + + if (isDefinition()) + errs() << " [def] "; + + errs() << "\n"; } /// dump - Print global variable. void DIGlobalVariable::dump() const { - cerr << " ["; getGlobal()->dump(); cerr << "] "; + errs() << " ["; + getGlobal()->dump(); + errs() << "] "; } /// dump - Print variable. void DIVariable::dump() const { - std::string Res; - if (!getName(Res).empty()) - cerr << " [" << Res << "] "; + if (const char *Res = getName()) + errs() << " [" << Res << "] "; getCompileUnit().dump(); - cerr << " [" << getLineNumber() << "] "; + errs() << " [" << getLineNumber() << "] "; getType().dump(); - cerr << "\n"; + errs() << "\n"; + + // FIXME: Dump complex addresses } //===----------------------------------------------------------------------===// @@ -453,98 +590,46 @@ void DIVariable::dump() const { //===----------------------------------------------------------------------===// DIFactory::DIFactory(Module &m) - : M(m), StopPointFn(0), FuncStartFn(0), RegionStartFn(0), RegionEndFn(0), + : M(m), VMContext(M.getContext()), StopPointFn(0), FuncStartFn(0), + RegionStartFn(0), RegionEndFn(0), DeclareFn(0) { - EmptyStructPtr = PointerType::getUnqual(StructType::get()); -} - -/// getCastToEmpty - Return this descriptor as a Constant* with type '{}*'. -/// This is only valid when the descriptor is non-null. -Constant *DIFactory::getCastToEmpty(DIDescriptor D) { - if (D.isNull()) return Constant::getNullValue(EmptyStructPtr); - return ConstantExpr::getBitCast(D.getGV(), EmptyStructPtr); + EmptyStructPtr = PointerType::getUnqual(StructType::get(VMContext)); } Constant *DIFactory::GetTagConstant(unsigned TAG) { assert((TAG & LLVMDebugVersionMask) == 0 && "Tag too large for debug encoding!"); - return ConstantInt::get(Type::Int32Ty, TAG | LLVMDebugVersion); -} - -Constant *DIFactory::GetStringConstant(const std::string &String) { - // Check string cache for previous edition. - Constant *&Slot = StringCache[String]; - - // Return Constant if previously defined. - if (Slot) return Slot; - - const PointerType *DestTy = PointerType::getUnqual(Type::Int8Ty); - - // If empty string then use a i8* null instead. - if (String.empty()) - return Slot = ConstantPointerNull::get(DestTy); - - // Construct string as an llvm constant. - Constant *ConstStr = ConstantArray::get(String); - - // Otherwise create and return a new string global. - GlobalVariable *StrGV = new GlobalVariable(ConstStr->getType(), true, - GlobalVariable::InternalLinkage, - ConstStr, ".str", &M); - StrGV->setSection("llvm.metadata"); - return Slot = ConstantExpr::getBitCast(StrGV, DestTy); + return ConstantInt::get(Type::getInt32Ty(VMContext), TAG | LLVMDebugVersion); } //===----------------------------------------------------------------------===// // DIFactory: Primary Constructors //===----------------------------------------------------------------------===// -/// GetOrCreateArray - Create an descriptor for an array of descriptors. +/// GetOrCreateArray - Create an descriptor for an array of descriptors. /// This implicitly uniques the arrays created. DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) { - SmallVector<Constant*, 16> Elts; - - for (unsigned i = 0; i != NumTys; ++i) - Elts.push_back(getCastToEmpty(Tys[i])); - - Constant *Init = ConstantArray::get(ArrayType::get(EmptyStructPtr, - Elts.size()), - Elts.data(), Elts.size()); - // If we already have this array, just return the uniqued version. - DIDescriptor &Entry = SimpleConstantCache[Init]; - if (!Entry.isNull()) return DIArray(Entry.getGV()); - - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.array", &M); - GV->setSection("llvm.metadata"); - Entry = DIDescriptor(GV); - return DIArray(GV); + SmallVector<Value*, 16> Elts; + + if (NumTys == 0) + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); + else + for (unsigned i = 0; i != NumTys; ++i) + Elts.push_back(Tys[i].getNode()); + + return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size())); } /// GetOrCreateSubrange - Create a descriptor for a value range. This /// implicitly uniques the values returned. DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subrange_type), - ConstantInt::get(Type::Int64Ty, Lo), - ConstantInt::get(Type::Int64Ty, Hi) + ConstantInt::get(Type::getInt64Ty(VMContext), Lo), + ConstantInt::get(Type::getInt64Ty(VMContext), Hi) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - // If we already have this range, just return the uniqued version. - DIDescriptor &Entry = SimpleConstantCache[Init]; - if (!Entry.isNull()) return DISubrange(Entry.getGV()); - - M.addTypeName("llvm.dbg.subrange.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.subrange", &M); - GV->setSection("llvm.metadata"); - Entry = DIDescriptor(GV); - return DISubrange(GV); + return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); } @@ -552,92 +637,69 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { /// CreateCompileUnit - Create a new descriptor for the specified compile /// unit. Note that this does not unique compile units within the module. DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID, - const std::string &Filename, - const std::string &Directory, - const std::string &Producer, + StringRef Filename, + StringRef Directory, + StringRef Producer, bool isMain, bool isOptimized, const char *Flags, unsigned RunTimeVer) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_compile_unit), - Constant::getNullValue(EmptyStructPtr), - ConstantInt::get(Type::Int32Ty, LangID), - GetStringConstant(Filename), - GetStringConstant(Directory), - GetStringConstant(Producer), - ConstantInt::get(Type::Int1Ty, isMain), - ConstantInt::get(Type::Int1Ty, isOptimized), - GetStringConstant(Flags), - ConstantInt::get(Type::Int32Ty, RunTimeVer) + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + ConstantInt::get(Type::getInt32Ty(VMContext), LangID), + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + MDString::get(VMContext, Producer), + ConstantInt::get(Type::getInt1Ty(VMContext), isMain), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + MDString::get(VMContext, Flags), + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.compile_unit.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::LinkOnceAnyLinkage, - Init, "llvm.dbg.compile_unit", &M); - GV->setSection("llvm.metadata"); - return DICompileUnit(GV); + + return DICompileUnit(MDNode::get(VMContext, &Elts[0], 10)); } /// CreateEnumerator - Create a single enumerator value. -DIEnumerator DIFactory::CreateEnumerator(const std::string &Name, uint64_t Val){ - Constant *Elts[] = { +DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){ + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_enumerator), - GetStringConstant(Name), - ConstantInt::get(Type::Int64Ty, Val) + MDString::get(VMContext, Name), + ConstantInt::get(Type::getInt64Ty(VMContext), Val) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.enumerator.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.enumerator", &M); - GV->setSection("llvm.metadata"); - return DIEnumerator(GV); + return DIEnumerator(MDNode::get(VMContext, &Elts[0], 3)); } /// CreateBasicType - Create a basic type like int, float, etc. DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, unsigned Encoding) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_base_type), - getCastToEmpty(Context), - GetStringConstant(Name), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNumber), - ConstantInt::get(Type::Int64Ty, SizeInBits), - ConstantInt::get(Type::Int64Ty, AlignInBits), - ConstantInt::get(Type::Int64Ty, OffsetInBits), - ConstantInt::get(Type::Int32Ty, Flags), - ConstantInt::get(Type::Int32Ty, Encoding) + Context.getNode(), + MDString::get(VMContext, Name), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.basictype.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.basictype", &M); - GV->setSection("llvm.metadata"); - return DIBasicType(GV); + return DIBasicType(MDNode::get(VMContext, &Elts[0], 10)); } /// CreateDerivedType - Create a derived type like const qualified type, /// pointer, typedef, etc. DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -645,33 +707,25 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, uint64_t OffsetInBits, unsigned Flags, DIType DerivedFrom) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(Tag), - getCastToEmpty(Context), - GetStringConstant(Name), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNumber), - ConstantInt::get(Type::Int64Ty, SizeInBits), - ConstantInt::get(Type::Int64Ty, AlignInBits), - ConstantInt::get(Type::Int64Ty, OffsetInBits), - ConstantInt::get(Type::Int32Ty, Flags), - getCastToEmpty(DerivedFrom) + Context.getNode(), + MDString::get(VMContext, Name), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom.getNode(), }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.derivedtype.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.derivedtype", &M); - GV->setSection("llvm.metadata"); - return DIDerivedType(GV); + return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10)); } /// CreateCompositeType - Create a composite type like array, struct, etc. DICompositeType DIFactory::CreateCompositeType(unsigned Tag, DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -682,143 +736,143 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag, DIArray Elements, unsigned RuntimeLang) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(Tag), - getCastToEmpty(Context), - GetStringConstant(Name), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNumber), - ConstantInt::get(Type::Int64Ty, SizeInBits), - ConstantInt::get(Type::Int64Ty, AlignInBits), - ConstantInt::get(Type::Int64Ty, OffsetInBits), - ConstantInt::get(Type::Int32Ty, Flags), - getCastToEmpty(DerivedFrom), - getCastToEmpty(Elements), - ConstantInt::get(Type::Int32Ty, RuntimeLang) + Context.getNode(), + MDString::get(VMContext, Name), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom.getNode(), + Elements.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.composite.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.composite", &M); - GV->setSection("llvm.metadata"); - return DICompositeType(GV); + return DICompositeType(MDNode::get(VMContext, &Elts[0], 12)); } /// CreateSubprogram - Create a new descriptor for the specified subprogram. /// See comments in DISubprogram for descriptions of these fields. This /// method does not unique the generated descriptors. -DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, - const std::string &Name, - const std::string &DisplayName, - const std::string &LinkageName, +DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, + StringRef Name, + StringRef DisplayName, + StringRef LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type, bool isLocalToUnit, bool isDefinition) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subprogram), - Constant::getNullValue(EmptyStructPtr), - getCastToEmpty(Context), - GetStringConstant(Name), - GetStringConstant(DisplayName), - GetStringConstant(LinkageName), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNo), - getCastToEmpty(Type), - ConstantInt::get(Type::Int1Ty, isLocalToUnit), - ConstantInt::get(Type::Int1Ty, isDefinition) + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context.getNode(), + MDString::get(VMContext, Name), + MDString::get(VMContext, DisplayName), + MDString::get(VMContext, LinkageName), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Type.getNode(), + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.subprogram.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::LinkOnceAnyLinkage, - Init, "llvm.dbg.subprogram", &M); - GV->setSection("llvm.metadata"); - return DISubprogram(GV); + return DISubprogram(MDNode::get(VMContext, &Elts[0], 11)); } /// CreateGlobalVariable - Create a new descriptor for the specified global. DIGlobalVariable -DIFactory::CreateGlobalVariable(DIDescriptor Context, const std::string &Name, - const std::string &DisplayName, - const std::string &LinkageName, +DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, + StringRef DisplayName, + StringRef LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type,bool isLocalToUnit, bool isDefinition, llvm::GlobalVariable *Val) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_variable), - Constant::getNullValue(EmptyStructPtr), - getCastToEmpty(Context), - GetStringConstant(Name), - GetStringConstant(DisplayName), - GetStringConstant(LinkageName), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNo), - getCastToEmpty(Type), - ConstantInt::get(Type::Int1Ty, isLocalToUnit), - ConstantInt::get(Type::Int1Ty, isDefinition), - ConstantExpr::getBitCast(Val, EmptyStructPtr) + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context.getNode(), + MDString::get(VMContext, Name), + MDString::get(VMContext, DisplayName), + MDString::get(VMContext, LinkageName), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Type.getNode(), + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + Val }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.global_variable.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::LinkOnceAnyLinkage, - Init, "llvm.dbg.global_variable", &M); - GV->setSection("llvm.metadata"); - return DIGlobalVariable(GV); + + Value *const *Vs = &Elts[0]; + MDNode *Node = MDNode::get(VMContext,Vs, 12); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addElement(Node); + + return DIGlobalVariable(Node); } /// CreateVariable - Create a new descriptor for the specified variable. DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNo, DIType Type) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(Tag), - getCastToEmpty(Context), - GetStringConstant(Name), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNo), - getCastToEmpty(Type) + Context.getNode(), + MDString::get(VMContext, Name), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Type.getNode(), }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.variable.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.variable", &M); - GV->setSection("llvm.metadata"); - return DIVariable(GV); + return DIVariable(MDNode::get(VMContext, &Elts[0], 6)); +} + + +/// CreateComplexVariable - Create a new descriptor for the specified variable +/// which has a complex address expression for its address. +DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context, + const std::string &Name, + DICompileUnit CompileUnit, + unsigned LineNo, + DIType Type, SmallVector<Value *, 9> &addr) { + SmallVector<Value *, 9> Elts; + Elts.push_back(GetTagConstant(Tag)); + Elts.push_back(Context.getNode()); + Elts.push_back(MDString::get(VMContext, Name)); + Elts.push_back(CompileUnit.getNode()); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)); + Elts.push_back(Type.getNode()); + Elts.insert(Elts.end(), addr.begin(), addr.end()); + + return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size())); } /// CreateBlock - This creates a descriptor for a lexical block with the -/// specified parent context. -DIBlock DIFactory::CreateBlock(DIDescriptor Context) { - Constant *Elts[] = { +/// specified parent VMContext. +DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context) { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_lexical_block), - getCastToEmpty(Context) + Context.getNode() + }; + return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 2)); +} + +/// CreateLocation - Creates a debug info location. +DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, + DIScope S, DILocation OrigLoc) { + Value *Elts[] = { + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo), + S.getNode(), + OrigLoc.getNode(), }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.block.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.block", &M); - GV->setSection("llvm.metadata"); - return DIBlock(GV); + return DILocation(MDNode::get(VMContext, &Elts[0], 4)); } @@ -830,17 +884,17 @@ DIBlock DIFactory::CreateBlock(DIDescriptor Context) { /// inserting it at the end of the specified basic block. void DIFactory::InsertStopPoint(DICompileUnit CU, unsigned LineNo, unsigned ColNo, BasicBlock *BB) { - + // Lazily construct llvm.dbg.stoppoint function. if (!StopPointFn) - StopPointFn = llvm::Intrinsic::getDeclaration(&M, + StopPointFn = llvm::Intrinsic::getDeclaration(&M, llvm::Intrinsic::dbg_stoppoint); - + // Invoke llvm.dbg.stoppoint Value *Args[] = { - llvm::ConstantInt::get(llvm::Type::Int32Ty, LineNo), - llvm::ConstantInt::get(llvm::Type::Int32Ty, ColNo), - getCastToEmpty(CU) + ConstantInt::get(llvm::Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(llvm::Type::getInt32Ty(VMContext), ColNo), + CU.getNode() }; CallInst::Create(StopPointFn, Args, Args+3, "", BB); } @@ -851,9 +905,9 @@ void DIFactory::InsertSubprogramStart(DISubprogram SP, BasicBlock *BB) { // Lazily construct llvm.dbg.func.start. if (!FuncStartFn) FuncStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_func_start); - + // Call llvm.dbg.func.start which also implicitly sets a stoppoint. - CallInst::Create(FuncStartFn, getCastToEmpty(SP), "", BB); + CallInst::Create(FuncStartFn, SP.getNode(), "", BB); } /// InsertRegionStart - Insert a new llvm.dbg.region.start intrinsic call to @@ -864,7 +918,7 @@ void DIFactory::InsertRegionStart(DIDescriptor D, BasicBlock *BB) { RegionStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_start); // Call llvm.dbg.func.start. - CallInst::Create(RegionStartFn, getCastToEmpty(D), "", BB); + CallInst::Create(RegionStartFn, D.getNode(), "", BB); } /// InsertRegionEnd - Insert a new llvm.dbg.region.end intrinsic call to @@ -875,19 +929,220 @@ void DIFactory::InsertRegionEnd(DIDescriptor D, BasicBlock *BB) { RegionEndFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_end); // Call llvm.dbg.region.end. - CallInst::Create(RegionEndFn, getCastToEmpty(D), "", BB); + CallInst::Create(RegionEndFn, D.getNode(), "", BB); } /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -void DIFactory::InsertDeclare(Value *Storage, DIVariable D, BasicBlock *BB) { +void DIFactory::InsertDeclare(Value *Storage, DIVariable D, + Instruction *InsertBefore) { // Cast the storage to a {}* for the call to llvm.dbg.declare. - Storage = new BitCastInst(Storage, EmptyStructPtr, "", BB); - + Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertBefore); + if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { Storage, getCastToEmpty(D) }; - CallInst::Create(DeclareFn, Args, Args+2, "", BB); + Value *Args[] = { Storage, D.getNode() }; + CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); +} + +/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. +void DIFactory::InsertDeclare(Value *Storage, DIVariable D, + BasicBlock *InsertAtEnd) { + // Cast the storage to a {}* for the call to llvm.dbg.declare. + Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertAtEnd); + + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { Storage, D.getNode() }; + CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); +} + + +//===----------------------------------------------------------------------===// +// DebugInfoFinder implementations. +//===----------------------------------------------------------------------===// + +/// processModule - Process entire module and collect debug info. +void DebugInfoFinder::processModule(Module &M) { + +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + MetadataContext &TheMetadata = M.getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); +#endif + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; + ++BI) { + if (DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(BI)) + processStopPoint(SPI); + else if (DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI)) + processFuncStart(FSI); + else if (DbgRegionStartInst *DRS = dyn_cast<DbgRegionStartInst>(BI)) + processRegionStart(DRS); + else if (DbgRegionEndInst *DRE = dyn_cast<DbgRegionEndInst>(BI)) + processRegionEnd(DRE); + else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + processDeclare(DDI); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + else if (MDDbgKind) { + if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI)) { + DILocation Loc(L); + DIScope S(Loc.getScope().getNode()); + if (S.isCompileUnit()) + addCompileUnit(DICompileUnit(S.getNode())); + else if (S.isSubprogram()) + processSubprogram(DISubprogram(S.getNode())); + else if (S.isLexicalBlock()) + processLexicalBlock(DILexicalBlock(S.getNode())); + } + } +#endif + } + + NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); + if (!NMD) + return; + + for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) { + DIGlobalVariable DIG(cast<MDNode>(NMD->getElement(i))); + if (addGlobalVariable(DIG)) { + addCompileUnit(DIG.getCompileUnit()); + processType(DIG.getType()); + } + } +} + +/// processType - Process DIType. +void DebugInfoFinder::processType(DIType DT) { + if (!addType(DT)) + return; + + addCompileUnit(DT.getCompileUnit()); + if (DT.isCompositeType()) { + DICompositeType DCT(DT.getNode()); + processType(DCT.getTypeDerivedFrom()); + DIArray DA = DCT.getTypeArray(); + if (!DA.isNull()) + for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) { + DIDescriptor D = DA.getElement(i); + DIType TypeE = DIType(D.getNode()); + if (!TypeE.isNull()) + processType(TypeE); + else + processSubprogram(DISubprogram(D.getNode())); + } + } else if (DT.isDerivedType()) { + DIDerivedType DDT(DT.getNode()); + if (!DDT.isNull()) + processType(DDT.getTypeDerivedFrom()); + } +} + +/// processLexicalBlock +void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { + if (LB.isNull()) + return; + DIScope Context = LB.getContext(); + if (Context.isLexicalBlock()) + return processLexicalBlock(DILexicalBlock(Context.getNode())); + else + return processSubprogram(DISubprogram(Context.getNode())); +} + +/// processSubprogram - Process DISubprogram. +void DebugInfoFinder::processSubprogram(DISubprogram SP) { + if (SP.isNull()) + return; + if (!addSubprogram(SP)) + return; + addCompileUnit(SP.getCompileUnit()); + processType(SP.getType()); +} + +/// processStopPoint - Process DbgStopPointInst. +void DebugInfoFinder::processStopPoint(DbgStopPointInst *SPI) { + MDNode *Context = dyn_cast<MDNode>(SPI->getContext()); + addCompileUnit(DICompileUnit(Context)); +} + +/// processFuncStart - Process DbgFuncStartInst. +void DebugInfoFinder::processFuncStart(DbgFuncStartInst *FSI) { + MDNode *SP = dyn_cast<MDNode>(FSI->getSubprogram()); + processSubprogram(DISubprogram(SP)); +} + +/// processRegionStart - Process DbgRegionStart. +void DebugInfoFinder::processRegionStart(DbgRegionStartInst *DRS) { + MDNode *SP = dyn_cast<MDNode>(DRS->getContext()); + processSubprogram(DISubprogram(SP)); +} + +/// processRegionEnd - Process DbgRegionEnd. +void DebugInfoFinder::processRegionEnd(DbgRegionEndInst *DRE) { + MDNode *SP = dyn_cast<MDNode>(DRE->getContext()); + processSubprogram(DISubprogram(SP)); +} + +/// processDeclare - Process DbgDeclareInst. +void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) { + DIVariable DV(cast<MDNode>(DDI->getVariable())); + if (DV.isNull()) + return; + + if (!NodesSeen.insert(DV.getNode())) + return; + + addCompileUnit(DV.getCompileUnit()); + processType(DV.getType()); +} + +/// addType - Add type into Tys. +bool DebugInfoFinder::addType(DIType DT) { + if (DT.isNull()) + return false; + + if (!NodesSeen.insert(DT.getNode())) + return false; + + TYs.push_back(DT.getNode()); + return true; +} + +/// addCompileUnit - Add compile unit into CUs. +bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) { + if (CU.isNull()) + return false; + + if (!NodesSeen.insert(CU.getNode())) + return false; + + CUs.push_back(CU.getNode()); + return true; +} + +/// addGlobalVariable - Add global variable into GVs. +bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) { + if (DIG.isNull()) + return false; + + if (!NodesSeen.insert(DIG.getNode())) + return false; + + GVs.push_back(DIG.getNode()); + return true; +} + +// addSubprogram - Add subprgoram into SPs. +bool DebugInfoFinder::addSubprogram(DISubprogram SP) { + if (SP.isNull()) + return false; + + if (!NodesSeen.insert(SP.getNode())) + return false; + + SPs.push_back(SP.getNode()); + return true; } namespace llvm { @@ -939,30 +1194,17 @@ namespace llvm { Value *findDbgGlobalDeclare(GlobalVariable *V) { const Module *M = V->getParent(); - const Type *Ty = M->getTypeByName("llvm.dbg.global_variable.type"); - if (!Ty) return 0; - - Ty = PointerType::get(Ty, 0); - - Value *Val = V->stripPointerCasts(); - for (Value::use_iterator I = Val->use_begin(), E = Val->use_end(); - I != E; ++I) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I)) { - if (CE->getOpcode() == Instruction::BitCast) { - Value *VV = CE; - - while (VV->hasOneUse()) - VV = *VV->use_begin(); + NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); + if (!NMD) + return 0; - if (VV->getType() == Ty) - return VV; - } - } + for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) { + DIGlobalVariable DIG(cast_or_null<MDNode>(NMD->getElement(i))); + if (DIG.isNull()) + continue; + if (DIG.getGlobal() == V) + return DIG.getNode(); } - - if (Val->getType() == Ty) - return Val; - return 0; } @@ -990,8 +1232,8 @@ namespace llvm { return 0; } - bool getLocationInfo(const Value *V, std::string &DisplayName, - std::string &Type, unsigned &LineNo, std::string &File, +bool getLocationInfo(const Value *V, std::string &DisplayName, + std::string &Type, unsigned &LineNo, std::string &File, std::string &Dir) { DICompileUnit Unit; DIType TypeD; @@ -999,81 +1241,56 @@ namespace llvm { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) { Value *DIGV = findDbgGlobalDeclare(GV); if (!DIGV) return false; - DIGlobalVariable Var(cast<GlobalVariable>(DIGV)); + DIGlobalVariable Var(cast<MDNode>(DIGV)); - Var.getDisplayName(DisplayName); + if (const char *D = Var.getDisplayName()) + DisplayName = D; LineNo = Var.getLineNumber(); Unit = Var.getCompileUnit(); TypeD = Var.getType(); } else { const DbgDeclareInst *DDI = findDbgDeclare(V); if (!DDI) return false; - DIVariable Var(cast<GlobalVariable>(DDI->getVariable())); + DIVariable Var(cast<MDNode>(DDI->getVariable())); - Var.getName(DisplayName); + if (const char *D = Var.getName()) + DisplayName = D; LineNo = Var.getLineNumber(); Unit = Var.getCompileUnit(); TypeD = Var.getType(); } - TypeD.getName(Type); - Unit.getFilename(File); - Unit.getDirectory(Dir); + if (const char *T = TypeD.getName()) + Type = T; + if (const char *F = Unit.getFilename()) + File = F; + if (const char *D = Unit.getDirectory()) + Dir = D; return true; } - /// CollectDebugInfoAnchors - Collect debugging information anchors. - void CollectDebugInfoAnchors(Module &M, - SmallVector<GlobalVariable *, 2> &CUs, - SmallVector<GlobalVariable *, 4> &GVs, - SmallVector<GlobalVariable *, 4> &SPs) { - - for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); - GVI != E; GVI++) { - GlobalVariable *GV = GVI; - if (GV->hasName() && strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0 - && GV->isConstant() && GV->hasInitializer()) { - DICompileUnit C(GV); - if (C.isNull() == false) { - CUs.push_back(GV); - continue; - } - DIGlobalVariable G(GV); - if (G.isNull() == false) { - GVs.push_back(GV); - continue; - } - DISubprogram S(GV); - if (S.isNull() == false) { - SPs.push_back(GV); - continue; - } - } - } - } - - /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug /// info intrinsic. - bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI, + bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI, CodeGenOpt::Level OptLev) { return DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLev); } - /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgFuncStartInst &FSI, CodeGenOpt::Level OptLev) { return DIDescriptor::ValidDebugInfo(FSI.getSubprogram(), OptLev); } - /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgRegionStartInst &RSI, CodeGenOpt::Level OptLev) { return DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLev); } - /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgRegionEndInst &REI, CodeGenOpt::Level OptLev) { @@ -1081,14 +1298,14 @@ namespace llvm { } - /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgDeclareInst &DI, CodeGenOpt::Level OptLev) { return DIDescriptor::ValidDebugInfo(DI.getVariable(), OptLev); } - /// ExtractDebugLocation - Extract debug location information + /// ExtractDebugLocation - Extract debug location information /// from llvm.dbg.stoppoint intrinsic. DebugLoc ExtractDebugLocation(DbgStopPointInst &SPI, DebugLocTracker &DebugLocInfo) { @@ -1096,7 +1313,7 @@ namespace llvm { Value *Context = SPI.getContext(); // If this location is already tracked then use it. - DebugLocTuple Tuple(cast<GlobalVariable>(Context), SPI.getLine(), + DebugLocTuple Tuple(cast<MDNode>(Context), NULL, SPI.getLine(), SPI.getColumn()); DenseMap<DebugLocTuple, unsigned>::iterator II = DebugLocInfo.DebugIdMap.find(Tuple); @@ -1107,23 +1324,48 @@ namespace llvm { unsigned Id = DebugLocInfo.DebugLocations.size(); DebugLocInfo.DebugLocations.push_back(Tuple); DebugLocInfo.DebugIdMap[Tuple] = Id; - + + return DebugLoc::get(Id); + } + + /// ExtractDebugLocation - Extract debug location information + /// from DILocation. + DebugLoc ExtractDebugLocation(DILocation &Loc, + DebugLocTracker &DebugLocInfo) { + DebugLoc DL; + MDNode *Context = Loc.getScope().getNode(); + MDNode *InlinedLoc = NULL; + if (!Loc.getOrigLocation().isNull()) + InlinedLoc = Loc.getOrigLocation().getNode(); + // If this location is already tracked then use it. + DebugLocTuple Tuple(Context, InlinedLoc, Loc.getLineNumber(), + Loc.getColumnNumber()); + DenseMap<DebugLocTuple, unsigned>::iterator II + = DebugLocInfo.DebugIdMap.find(Tuple); + if (II != DebugLocInfo.DebugIdMap.end()) + return DebugLoc::get(II->second); + + // Add a new location entry. + unsigned Id = DebugLocInfo.DebugLocations.size(); + DebugLocInfo.DebugLocations.push_back(Tuple); + DebugLocInfo.DebugIdMap[Tuple] = Id; + return DebugLoc::get(Id); } - /// ExtractDebugLocation - Extract debug location information + /// ExtractDebugLocation - Extract debug location information /// from llvm.dbg.func_start intrinsic. DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI, DebugLocTracker &DebugLocInfo) { DebugLoc DL; Value *SP = FSI.getSubprogram(); - DISubprogram Subprogram(cast<GlobalVariable>(SP)); + DISubprogram Subprogram(cast<MDNode>(SP)); unsigned Line = Subprogram.getLineNumber(); DICompileUnit CU(Subprogram.getCompileUnit()); // If this location is already tracked then use it. - DebugLocTuple Tuple(CU.getGV(), Line, /* Column */ 0); + DebugLocTuple Tuple(CU.getNode(), NULL, Line, /* Column */ 0); DenseMap<DebugLocTuple, unsigned>::iterator II = DebugLocInfo.DebugIdMap.find(Tuple); if (II != DebugLocInfo.DebugIdMap.end()) @@ -1133,13 +1375,13 @@ namespace llvm { unsigned Id = DebugLocInfo.DebugLocations.size(); DebugLocInfo.DebugLocations.push_back(Tuple); DebugLocInfo.DebugIdMap[Tuple] = Id; - + return DebugLoc::get(Id); } /// isInlinedFnStart - Return true if FSI is starting an inlined function. bool isInlinedFnStart(DbgFuncStartInst &FSI, const Function *CurrentFn) { - DISubprogram Subprogram(cast<GlobalVariable>(FSI.getSubprogram())); + DISubprogram Subprogram(cast<MDNode>(FSI.getSubprogram())); if (Subprogram.describes(CurrentFn)) return false; @@ -1148,11 +1390,10 @@ namespace llvm { /// isInlinedFnEnd - Return true if REI is ending an inlined function. bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn) { - DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext())); + DISubprogram Subprogram(cast<MDNode>(REI.getContext())); if (Subprogram.isNull() || Subprogram.describes(CurrentFn)) return false; return true; } - } diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp index 3fb65265472d..1c9159dfbfcc 100644 --- a/lib/Analysis/IPA/Andersens.cpp +++ b/lib/Analysis/IPA/Andersens.cpp @@ -60,9 +60,11 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/System/Atomic.h" @@ -84,7 +86,9 @@ #define FULL_UNIVERSAL 0 using namespace llvm; +#ifndef NDEBUG STATISTIC(NumIters , "Number of iterations to reach convergence"); +#endif STATISTIC(NumConstraints, "Number of constraints"); STATISTIC(NumNodes , "Number of nodes"); STATISTIC(NumUnified , "Number of variables unified"); @@ -507,7 +511,7 @@ namespace { #ifndef NDEBUG V->dump(); #endif - assert(0 && "Value does not have a node in the points-to graph!"); + llvm_unreachable("Value does not have a node in the points-to graph!"); } return I->second; } @@ -589,9 +593,12 @@ namespace { friend class InstVisitor<Andersens>; void visitReturnInst(ReturnInst &RI); void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); } - void visitCallInst(CallInst &CI) { visitCallSite(CallSite(&CI)); } + void visitCallInst(CallInst &CI) { + if (isMalloc(&CI)) visitAllocationInst(CI); + else visitCallSite(CallSite(&CI)); + } void visitCallSite(CallSite CS); - void visitAllocationInst(AllocationInst &AI); + void visitAllocationInst(Instruction &I); void visitLoadInst(LoadInst &LI); void visitStoreInst(StoreInst &SI); void visitGetElementPtrInst(GetElementPtrInst &GEP); @@ -606,7 +613,7 @@ namespace { //===------------------------------------------------------------------===// // Implement Analyize interface // - void print(std::ostream &O, const Module* M) const { + void print(raw_ostream &O, const Module*) const { PrintPointsToGraph(); } }; @@ -614,7 +621,8 @@ namespace { char Andersens::ID = 0; static RegisterPass<Andersens> -X("anders-aa", "Andersen's Interprocedural Alias Analysis", false, true); +X("anders-aa", "Andersen's Interprocedural Alias Analysis (experimental)", + false, true); static RegisterAnalysisGroup<AliasAnalysis> Y(X); // Initialize Timestamp Counter (static). @@ -786,6 +794,8 @@ void Andersens::IdentifyObjects(Module &M) { ValueNodes[&*II] = NumObjects++; if (AllocationInst *AI = dyn_cast<AllocationInst>(&*II)) ObjectNodes[AI] = NumObjects++; + else if (isMalloc(&*II)) + ObjectNodes[&*II] = NumObjects++; } // Calls to inline asm need to be added as well because the callee isn't @@ -825,11 +835,11 @@ unsigned Andersens::getNodeForConstantPointer(Constant *C) { case Instruction::BitCast: return getNodeForConstantPointer(CE->getOperand(0)); default: - cerr << "Constant Expr not yet handled: " << *CE << "\n"; - assert(0); + errs() << "Constant Expr not yet handled: " << *CE << "\n"; + llvm_unreachable(0); } } else { - assert(0 && "Unknown constant pointer!"); + llvm_unreachable("Unknown constant pointer!"); } return 0; } @@ -852,11 +862,11 @@ unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) { case Instruction::BitCast: return getNodeForConstantPointerTarget(CE->getOperand(0)); default: - cerr << "Constant Expr not yet handled: " << *CE << "\n"; - assert(0); + errs() << "Constant Expr not yet handled: " << *CE << "\n"; + llvm_unreachable(0); } } else { - assert(0 && "Unknown constant pointer!"); + llvm_unreachable("Unknown constant pointer!"); } return 0; } @@ -996,7 +1006,7 @@ bool Andersens::AnalyzeUsesOfFunction(Value *V) { if (!isa<PointerType>(V->getType())) return true; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (dyn_cast<LoadInst>(*UI)) { + if (isa<LoadInst>(*UI)) { return false; } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { if (V == SI->getOperand(1)) { @@ -1027,7 +1037,7 @@ bool Andersens::AnalyzeUsesOfFunction(Value *V) { } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) return true; // Allow comparison against null. - } else if (dyn_cast<FreeInst>(*UI)) { + } else if (isa<FreeInst>(*UI)) { return false; } else { return true; @@ -1060,7 +1070,7 @@ void Andersens::CollectConstraints(Module &M) { Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I), ObjectIndex)); - if (I->hasInitializer()) { + if (I->hasDefinitiveInitializer()) { AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer()); } else { // If it doesn't have an initializer (i.e. it's defined in another @@ -1152,15 +1162,15 @@ void Andersens::visitInstruction(Instruction &I) { return; default: // Is this something we aren't handling yet? - cerr << "Unknown instruction: " << I; - abort(); + errs() << "Unknown instruction: " << I; + llvm_unreachable(0); } } -void Andersens::visitAllocationInst(AllocationInst &AI) { - unsigned ObjectIndex = getObject(&AI); - GraphNodes[ObjectIndex].setValue(&AI); - Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(AI), +void Andersens::visitAllocationInst(Instruction &I) { + unsigned ObjectIndex = getObject(&I); + GraphNodes[ObjectIndex].setValue(&I); + Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(I), ObjectIndex)); } @@ -1243,7 +1253,7 @@ void Andersens::visitSelectInst(SelectInst &SI) { } void Andersens::visitVAArg(VAArgInst &I) { - assert(0 && "vaarg not handled yet!"); + llvm_unreachable("vaarg not handled yet!"); } /// AddConstraintsForCall - Add constraints for a call with actual arguments @@ -1395,12 +1405,6 @@ bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const { return Result; } -void dumpToDOUT(SparseBitVector<> *bitmap) { -#ifndef NDEBUG - dump(*bitmap, DOUT); -#endif -} - /// Clump together address taken variables so that the points-to sets use up /// less space and can be operated on faster. @@ -1424,7 +1428,7 @@ void Andersens::ClumpAddressTaken() { unsigned Pos = NewPos++; Translate[i] = Pos; NewGraphNodes.push_back(GraphNodes[i]); - DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n"); } // I believe this ends up being faster than making two vectors and splicing @@ -1434,7 +1438,7 @@ void Andersens::ClumpAddressTaken() { unsigned Pos = NewPos++; Translate[i] = Pos; NewGraphNodes.push_back(GraphNodes[i]); - DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n"); } } @@ -1443,7 +1447,7 @@ void Andersens::ClumpAddressTaken() { unsigned Pos = NewPos++; Translate[i] = Pos; NewGraphNodes.push_back(GraphNodes[i]); - DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n"); } } @@ -1515,7 +1519,7 @@ void Andersens::ClumpAddressTaken() { /// receive &D from E anyway. void Andersens::HVN() { - DOUT << "Beginning HVN\n"; + DEBUG(errs() << "Beginning HVN\n"); // Build a predecessor graph. This is like our constraint graph with the // edges going in the opposite direction, and there are edges for all the // constraints, instead of just copy constraints. We also build implicit @@ -1586,7 +1590,7 @@ void Andersens::HVN() { Node2DFS.clear(); Node2Deleted.clear(); Node2Visited.clear(); - DOUT << "Finished HVN\n"; + DEBUG(errs() << "Finished HVN\n"); } @@ -1710,7 +1714,7 @@ void Andersens::HVNValNum(unsigned NodeIndex) { /// and is equivalent to value numbering the collapsed constraint graph /// including evaluating unions. void Andersens::HU() { - DOUT << "Beginning HU\n"; + DEBUG(errs() << "Beginning HU\n"); // Build a predecessor graph. This is like our constraint graph with the // edges going in the opposite direction, and there are edges for all the // constraints, instead of just copy constraints. We also build implicit @@ -1790,7 +1794,7 @@ void Andersens::HU() { } // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller. Set2PEClass.clear(); - DOUT << "Finished HU\n"; + DEBUG(errs() << "Finished HU\n"); } @@ -1968,12 +1972,12 @@ void Andersens::RewriteConstraints() { // to anything. if (LHSLabel == 0) { DEBUG(PrintNode(&GraphNodes[LHSNode])); - DOUT << " is a non-pointer, ignoring constraint.\n"; + DEBUG(errs() << " is a non-pointer, ignoring constraint.\n"); continue; } if (RHSLabel == 0) { DEBUG(PrintNode(&GraphNodes[RHSNode])); - DOUT << " is a non-pointer, ignoring constraint.\n"; + DEBUG(errs() << " is a non-pointer, ignoring constraint.\n"); continue; } // This constraint may be useless, and it may become useless as we translate @@ -2021,19 +2025,19 @@ void Andersens::PrintLabels() const { if (i < FirstRefNode) { PrintNode(&GraphNodes[i]); } else if (i < FirstAdrNode) { - DOUT << "REF("; + DEBUG(errs() << "REF("); PrintNode(&GraphNodes[i-FirstRefNode]); - DOUT <<")"; + DEBUG(errs() <<")"); } else { - DOUT << "ADR("; + DEBUG(errs() << "ADR("); PrintNode(&GraphNodes[i-FirstAdrNode]); - DOUT <<")"; + DEBUG(errs() <<")"); } - DOUT << " has pointer label " << GraphNodes[i].PointerEquivLabel + DEBUG(errs() << " has pointer label " << GraphNodes[i].PointerEquivLabel << " and SCC rep " << VSSCCRep[i] << " and is " << (GraphNodes[i].Direct ? "Direct" : "Not direct") - << "\n"; + << "\n"); } } @@ -2047,7 +2051,7 @@ void Andersens::PrintLabels() const { /// operation are stored in SDT and are later used in SolveContraints() /// and UniteNodes(). void Andersens::HCD() { - DOUT << "Starting HCD.\n"; + DEBUG(errs() << "Starting HCD.\n"); HCDSCCRep.resize(GraphNodes.size()); for (unsigned i = 0; i < GraphNodes.size(); ++i) { @@ -2096,7 +2100,7 @@ void Andersens::HCD() { Node2Visited.clear(); Node2Deleted.clear(); HCDSCCRep.clear(); - DOUT << "HCD complete.\n"; + DEBUG(errs() << "HCD complete.\n"); } // Component of HCD: @@ -2168,7 +2172,7 @@ void Andersens::Search(unsigned Node) { /// Optimize the constraints by performing offline variable substitution and /// other optimizations. void Andersens::OptimizeConstraints() { - DOUT << "Beginning constraint optimization\n"; + DEBUG(errs() << "Beginning constraint optimization\n"); SDTActive = false; @@ -2252,7 +2256,7 @@ void Andersens::OptimizeConstraints() { // HCD complete. - DOUT << "Finished constraint optimization\n"; + DEBUG(errs() << "Finished constraint optimization\n"); FirstRefNode = 0; FirstAdrNode = 0; } @@ -2260,7 +2264,7 @@ void Andersens::OptimizeConstraints() { /// Unite pointer but not location equivalent variables, now that the constraint /// graph is built. void Andersens::UnitePointerEquivalences() { - DOUT << "Uniting remaining pointer equivalences\n"; + DEBUG(errs() << "Uniting remaining pointer equivalences\n"); for (unsigned i = 0; i < GraphNodes.size(); ++i) { if (GraphNodes[i].AddressTaken && GraphNodes[i].isRep()) { unsigned Label = GraphNodes[i].PointerEquivLabel; @@ -2269,7 +2273,7 @@ void Andersens::UnitePointerEquivalences() { UniteNodes(i, PENLEClass2Node[Label]); } } - DOUT << "Finished remaining pointer equivalences\n"; + DEBUG(errs() << "Finished remaining pointer equivalences\n"); PENLEClass2Node.clear(); } @@ -2425,7 +2429,7 @@ void Andersens::SolveConstraints() { std::vector<unsigned int> RSV; #endif while( !CurrWL->empty() ) { - DOUT << "Starting iteration #" << ++NumIters << "\n"; + DEBUG(errs() << "Starting iteration #" << ++NumIters << "\n"); Node* CurrNode; unsigned CurrNodeIndex; @@ -2728,11 +2732,11 @@ unsigned Andersens::UniteNodes(unsigned First, unsigned Second, SecondNode->OldPointsTo = NULL; NumUnified++; - DOUT << "Unified Node "; + DEBUG(errs() << "Unified Node "); DEBUG(PrintNode(FirstNode)); - DOUT << " and Node "; + DEBUG(errs() << " and Node "); DEBUG(PrintNode(SecondNode)); - DOUT << "\n"; + DEBUG(errs() << "\n"); if (SDTActive) if (SDT[Second] >= 0) { @@ -2777,17 +2781,17 @@ unsigned Andersens::FindNode(unsigned NodeIndex) const { void Andersens::PrintNode(const Node *N) const { if (N == &GraphNodes[UniversalSet]) { - cerr << "<universal>"; + errs() << "<universal>"; return; } else if (N == &GraphNodes[NullPtr]) { - cerr << "<nullptr>"; + errs() << "<nullptr>"; return; } else if (N == &GraphNodes[NullObject]) { - cerr << "<null>"; + errs() << "<null>"; return; } if (!N->getValue()) { - cerr << "artificial" << (intptr_t) N; + errs() << "artificial" << (intptr_t) N; return; } @@ -2796,85 +2800,85 @@ void Andersens::PrintNode(const Node *N) const { if (Function *F = dyn_cast<Function>(V)) { if (isa<PointerType>(F->getFunctionType()->getReturnType()) && N == &GraphNodes[getReturnNode(F)]) { - cerr << F->getName() << ":retval"; + errs() << F->getName() << ":retval"; return; } else if (F->getFunctionType()->isVarArg() && N == &GraphNodes[getVarargNode(F)]) { - cerr << F->getName() << ":vararg"; + errs() << F->getName() << ":vararg"; return; } } if (Instruction *I = dyn_cast<Instruction>(V)) - cerr << I->getParent()->getParent()->getName() << ":"; + errs() << I->getParent()->getParent()->getName() << ":"; else if (Argument *Arg = dyn_cast<Argument>(V)) - cerr << Arg->getParent()->getName() << ":"; + errs() << Arg->getParent()->getName() << ":"; if (V->hasName()) - cerr << V->getName(); + errs() << V->getName(); else - cerr << "(unnamed)"; + errs() << "(unnamed)"; - if (isa<GlobalValue>(V) || isa<AllocationInst>(V)) + if (isa<GlobalValue>(V) || isa<AllocationInst>(V) || isMalloc(V)) if (N == &GraphNodes[getObject(V)]) - cerr << "<mem>"; + errs() << "<mem>"; } void Andersens::PrintConstraint(const Constraint &C) const { if (C.Type == Constraint::Store) { - cerr << "*"; + errs() << "*"; if (C.Offset != 0) - cerr << "("; + errs() << "("; } PrintNode(&GraphNodes[C.Dest]); if (C.Type == Constraint::Store && C.Offset != 0) - cerr << " + " << C.Offset << ")"; - cerr << " = "; + errs() << " + " << C.Offset << ")"; + errs() << " = "; if (C.Type == Constraint::Load) { - cerr << "*"; + errs() << "*"; if (C.Offset != 0) - cerr << "("; + errs() << "("; } else if (C.Type == Constraint::AddressOf) - cerr << "&"; + errs() << "&"; PrintNode(&GraphNodes[C.Src]); if (C.Offset != 0 && C.Type != Constraint::Store) - cerr << " + " << C.Offset; + errs() << " + " << C.Offset; if (C.Type == Constraint::Load && C.Offset != 0) - cerr << ")"; - cerr << "\n"; + errs() << ")"; + errs() << "\n"; } void Andersens::PrintConstraints() const { - cerr << "Constraints:\n"; + errs() << "Constraints:\n"; for (unsigned i = 0, e = Constraints.size(); i != e; ++i) PrintConstraint(Constraints[i]); } void Andersens::PrintPointsToGraph() const { - cerr << "Points-to graph:\n"; + errs() << "Points-to graph:\n"; for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) { const Node *N = &GraphNodes[i]; if (FindNode(i) != i) { PrintNode(N); - cerr << "\t--> same as "; + errs() << "\t--> same as "; PrintNode(&GraphNodes[FindNode(i)]); - cerr << "\n"; + errs() << "\n"; } else { - cerr << "[" << (N->PointsTo->count()) << "] "; + errs() << "[" << (N->PointsTo->count()) << "] "; PrintNode(N); - cerr << "\t--> "; + errs() << "\t--> "; bool first = true; for (SparseBitVector<>::iterator bi = N->PointsTo->begin(); bi != N->PointsTo->end(); ++bi) { if (!first) - cerr << ", "; + errs() << ", "; PrintNode(&GraphNodes[*bi]); first = false; } - cerr << "\n"; + errs() << "\n"; } } } diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 6dabcdb94bf1..e2b288d1ba96 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -18,8 +18,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" -#include <ostream> +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { @@ -54,7 +53,7 @@ public: CallsExternalNode = new CallGraphNode(0); Root = 0; - // Add every function to the call graph... + // Add every function to the call graph. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) addToCallGraph(I); @@ -68,30 +67,21 @@ public: AU.setPreservesAll(); } - void print(std::ostream *o, const Module *M) const { - if (o) print(*o, M); - } - - virtual void print(std::ostream &o, const Module *M) const { - o << "CallGraph Root is: "; + virtual void print(raw_ostream &OS, const Module *) const { + OS << "CallGraph Root is: "; if (Function *F = getRoot()->getFunction()) - o << F->getName() << "\n"; - else - o << "<<null function: 0x" << getRoot() << ">>\n"; + OS << F->getName() << "\n"; + else { + OS << "<<null function: 0x" << getRoot() << ">>\n"; + } - CallGraph::print(o, M); + CallGraph::print(OS, 0); } virtual void releaseMemory() { destroy(); } - /// dump - Print out this call graph. - /// - inline void dump() const { - print(cerr, Mod); - } - CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; } CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; } @@ -179,21 +169,20 @@ void CallGraph::initialize(Module &M) { } void CallGraph::destroy() { - if (!FunctionMap.empty()) { - for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); - I != E; ++I) - delete I->second; - FunctionMap.clear(); - } + if (FunctionMap.empty()) return; + + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + delete I->second; + FunctionMap.clear(); } -void CallGraph::print(std::ostream &OS, const Module *M) const { +void CallGraph::print(raw_ostream &OS, Module*) const { for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) I->second->print(OS); } - void CallGraph::dump() const { - print(cerr, 0); + print(errs(), 0); } //===----------------------------------------------------------------------===// @@ -207,7 +196,7 @@ void CallGraph::dump() const { // is to dropAllReferences before calling this. // Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { - assert(CGN->CalledFunctions.empty() && "Cannot remove function from call " + assert(CGN->empty() && "Cannot remove function from call " "graph if it references other functions!"); Function *F = CGN->getFunction(); // Get the function for the call graph node delete CGN; // Delete the call graph node for this func @@ -217,20 +206,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { return F; } -// changeFunction - This method changes the function associated with this -// CallGraphNode, for use by transformations that need to change the prototype -// of a Function (thus they must create a new Function and move the old code -// over). -void CallGraph::changeFunction(Function *OldF, Function *NewF) { - iterator I = FunctionMap.find(OldF); - CallGraphNode *&New = FunctionMap[NewF]; - assert(I != FunctionMap.end() && I->second && !New && - "OldF didn't exist in CG or NewF already does!"); - New = I->second; - New->F = NewF; - FunctionMap.erase(I); -} - // getOrInsertFunction - This method is identical to calling operator[], but // it will insert a new CallGraphNode for the specified function if one does // not already exist. @@ -242,11 +217,13 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { return CGN = new CallGraphNode(const_cast<Function*>(F)); } -void CallGraphNode::print(std::ostream &OS) const { +void CallGraphNode::print(raw_ostream &OS) const { if (Function *F = getFunction()) - OS << "Call graph node for function: '" << F->getName() <<"'\n"; + OS << "Call graph node for function: '" << F->getName() << "'"; else - OS << "Call graph node <<null function: 0x" << this << ">>:\n"; + OS << "Call graph node <<null function>>"; + + OS << "<<0x" << this << ">> #uses=" << getNumReferences() << '\n'; for (const_iterator I = begin(), E = end(); I != E; ++I) if (Function *FI = I->second->getFunction()) @@ -256,7 +233,7 @@ void CallGraphNode::print(std::ostream &OS) const { OS << "\n"; } -void CallGraphNode::dump() const { print(cerr); } +void CallGraphNode::dump() const { print(errs()); } /// removeCallEdgeFor - This method removes the edge in the node for the /// specified call site. Note that this method takes linear time, so it @@ -264,8 +241,10 @@ void CallGraphNode::dump() const { print(cerr); } void CallGraphNode::removeCallEdgeFor(CallSite CS) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); - if (I->first == CS) { - CalledFunctions.erase(I); + if (I->first == CS.getInstruction()) { + I->second->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); return; } } @@ -278,6 +257,7 @@ void CallGraphNode::removeCallEdgeFor(CallSite CS) { void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) { for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i) if (CalledFunctions[i].second == Callee) { + Callee->DropRef(); CalledFunctions[i] = CalledFunctions.back(); CalledFunctions.pop_back(); --i; --e; @@ -290,21 +270,27 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callee to remove!"); CallRecord &CR = *I; - if (CR.second == Callee && !CR.first.getInstruction()) { - CalledFunctions.erase(I); + if (CR.second == Callee && CR.first == 0) { + Callee->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); return; } } } -/// replaceCallSite - Make the edge in the node for Old CallSite be for -/// New CallSite instead. Note that this method takes linear time, so it -/// should be used sparingly. -void CallGraphNode::replaceCallSite(CallSite Old, CallSite New) { +/// replaceCallEdge - This method replaces the edge in the node for the +/// specified call site with a new one. Note that this method takes linear +/// time, so it should be used sparingly. +void CallGraphNode::replaceCallEdge(CallSite CS, + CallSite NewCS, CallGraphNode *NewNode){ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { - assert(I != CalledFunctions.end() && "Cannot find callsite to replace!"); - if (I->first == Old) { - I->first = New; + assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); + if (I->first == CS.getInstruction()) { + I->second->DropRef(); + I->first = NewCS.getInstruction(); + I->second = NewNode; + NewNode->AddRef(); return; } } diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 3880d0a10bb6..a96a5c591f83 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -15,22 +15,25 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "cgscc-passmgr" #include "llvm/CallGraphSCCPass.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/PassManagers.h" #include "llvm/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// // CGPassManager // -/// CGPassManager manages FPPassManagers and CalLGraphSCCPasses. +/// CGPassManager manages FPPassManagers and CallGraphSCCPasses. namespace { class CGPassManager : public ModulePass, public PMDataManager { - public: static char ID; explicit CGPassManager(int Depth) @@ -56,7 +59,7 @@ public: // Print passes managed by this manager void dumpPassStructure(unsigned Offset) { - llvm::cerr << std::string(Offset*2, ' ') << "Call Graph SCC Pass Manager\n"; + errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); P->dumpPassStructure(Offset + 1); @@ -65,56 +68,275 @@ public: } Pass *getContainedPass(unsigned N) { - assert ( N < PassVector.size() && "Pass number out of range!"); - Pass *FP = static_cast<Pass *>(PassVector[N]); - return FP; + assert(N < PassVector.size() && "Pass number out of range!"); + return static_cast<Pass *>(PassVector[N]); } virtual PassManagerType getPassManagerType() const { return PMT_CallGraphPassManager; } + +private: + bool RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate); + void RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, CallGraph &CG, + bool IsCheckingMode); }; -} +} // end anonymous namespace. char CGPassManager::ID = 0; + +bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate) { + bool Changed = false; + if (CallGraphSCCPass *CGSP = dynamic_cast<CallGraphSCCPass*>(P)) { + if (!CallGraphUpToDate) { + RefreshCallGraph(CurSCC, CG, false); + CallGraphUpToDate = true; + } + + Timer *T = StartPassTimer(CGSP); + Changed = CGSP->runOnSCC(CurSCC); + StopPassTimer(CGSP, T); + + // After the CGSCCPass is done, when assertions are enabled, use + // RefreshCallGraph to verify that the callgraph was correctly updated. +#ifndef NDEBUG + if (Changed) + RefreshCallGraph(CurSCC, CG, true); +#endif + + return Changed; + } + + FPPassManager *FPP = dynamic_cast<FPPassManager *>(P); + assert(FPP && "Invalid CGPassManager member"); + + // Run pass P on all functions in the current SCC. + for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) { + if (Function *F = CurSCC[i]->getFunction()) { + dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); + Timer *T = StartPassTimer(FPP); + Changed |= FPP->runOnFunction(*F); + StopPassTimer(FPP, T); + } + } + + // The function pass(es) modified the IR, they may have clobbered the + // callgraph. + if (Changed && CallGraphUpToDate) { + DEBUG(errs() << "CGSCCPASSMGR: Pass Dirtied SCC: " + << P->getPassName() << '\n'); + CallGraphUpToDate = false; + } + return Changed; +} + + +/// RefreshCallGraph - Scan the functions in the specified CFG and resync the +/// callgraph with the call sites found in it. This is used after +/// FunctionPasses have potentially munged the callgraph, and can be used after +/// CallGraphSCC passes to verify that they correctly updated the callgraph. +/// +void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, + CallGraph &CG, bool CheckingMode) { + DenseMap<Value*, CallGraphNode*> CallSites; + + DEBUG(errs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() + << " nodes:\n"; + for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) + CurSCC[i]->dump(); + ); + + bool MadeChange = false; + + // Scan all functions in the SCC. + for (unsigned sccidx = 0, e = CurSCC.size(); sccidx != e; ++sccidx) { + CallGraphNode *CGN = CurSCC[sccidx]; + Function *F = CGN->getFunction(); + if (F == 0 || F->isDeclaration()) continue; + + // Walk the function body looking for call sites. Sync up the call sites in + // CGN with those actually in the function. + + // Get the set of call sites currently in the function. + for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { + // If this call site is null, then the function pass deleted the call + // entirely and the WeakVH nulled it out. + if (I->first == 0 || + // If we've already seen this call site, then the FunctionPass RAUW'd + // one call with another, which resulted in two "uses" in the edge + // list of the same call. + CallSites.count(I->first) || + + // If the call edge is not from a call or invoke, then the function + // pass RAUW'd a call with another value. This can happen when + // constant folding happens of well known functions etc. + CallSite::get(I->first).getInstruction() == 0) { + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // Just remove the edge from the set of callees, keep track of whether + // I points to the last element of the vector. + bool WasLast = I + 1 == E; + CGN->removeCallEdge(I); + + // If I pointed to the last element of the vector, we have to bail out: + // iterator checking rejects comparisons of the resultant pointer with + // end. + if (WasLast) + break; + E = CGN->end(); + continue; + } + + assert(!CallSites.count(I->first) && + "Call site occurs in node multiple times"); + CallSites.insert(std::make_pair(I->first, I->second)); + ++I; + } + + // Loop over all of the instructions in the function, getting the callsites. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + CallSite CS = CallSite::get(I); + if (!CS.getInstruction() || isa<DbgInfoIntrinsic>(I)) continue; + + // If this call site already existed in the callgraph, just verify it + // matches up to expectations and remove it from CallSites. + DenseMap<Value*, CallGraphNode*>::iterator ExistingIt = + CallSites.find(CS.getInstruction()); + if (ExistingIt != CallSites.end()) { + CallGraphNode *ExistingNode = ExistingIt->second; + + // Remove from CallSites since we have now seen it. + CallSites.erase(ExistingIt); + + // Verify that the callee is right. + if (ExistingNode->getFunction() == CS.getCalledFunction()) + continue; + + // If we are in checking mode, we are not allowed to actually mutate + // the callgraph. If this is a case where we can infer that the + // callgraph is less precise than it could be (e.g. an indirect call + // site could be turned direct), don't reject it in checking mode, and + // don't tweak it to be more precise. + if (CheckingMode && CS.getCalledFunction() && + ExistingNode->getFunction() == 0) + continue; + + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If not, we either went from a direct call to indirect, indirect to + // direct, or direct to different direct. + CallGraphNode *CalleeNode; + if (Function *Callee = CS.getCalledFunction()) + CalleeNode = CG.getOrInsertFunction(Callee); + else + CalleeNode = CG.getCallsExternalNode(); + + // Update the edge target in CGN. + for (CallGraphNode::iterator I = CGN->begin(); ; ++I) { + assert(I != CGN->end() && "Didn't find call entry"); + if (I->first == CS.getInstruction()) { + I->second = CalleeNode; + break; + } + } + MadeChange = true; + continue; + } + + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If the call site didn't exist in the CGN yet, add it. We assume that + // newly introduced call sites won't be indirect. This could be fixed + // in the future. + CallGraphNode *CalleeNode; + if (Function *Callee = CS.getCalledFunction()) + CalleeNode = CG.getOrInsertFunction(Callee); + else + CalleeNode = CG.getCallsExternalNode(); + + CGN->addCalledFunction(CS, CalleeNode); + MadeChange = true; + } + + // After scanning this function, if we still have entries in callsites, then + // they are dangling pointers. WeakVH should save us for this, so abort if + // this happens. + assert(CallSites.empty() && "Dangling pointers found in call sites map"); + + // Periodically do an explicit clear to remove tombstones when processing + // large scc's. + if ((sccidx & 15) == 0) + CallSites.clear(); + } + + DEBUG(if (MadeChange) { + errs() << "CGSCCPASSMGR: Refreshed SCC is now:\n"; + for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) + CurSCC[i]->dump(); + } else { + errs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; + } + ); +} + /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. bool CGPassManager::runOnModule(Module &M) { CallGraph &CG = getAnalysis<CallGraph>(); bool Changed = doInitialization(CG); - // Walk SCC - for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); - I != E; ++I) { - - // Run all passes on current SCC - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - Pass *P = getContainedPass(Index); - - dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, ""); + std::vector<CallGraphNode*> CurSCC; + + // Walk the callgraph in bottom-up SCC order. + for (scc_iterator<CallGraph*> CGI = scc_begin(&CG), E = scc_end(&CG); + CGI != E;) { + // Copy the current SCC and increment past it so that the pass can hack + // on the SCC if it wants to without invalidating our iterator. + CurSCC = *CGI; + ++CGI; + + + // CallGraphUpToDate - Keep track of whether the callgraph is known to be + // up-to-date or not. The CGSSC pass manager runs two types of passes: + // CallGraphSCC Passes and other random function passes. Because other + // random function passes are not CallGraph aware, they may clobber the + // call graph by introducing new calls or deleting other ones. This flag + // is set to false when we run a function pass so that we know to clean up + // the callgraph when we need to run a CGSCCPass again. + bool CallGraphUpToDate = true; + + // Run all passes on current SCC. + for (unsigned PassNo = 0, e = getNumContainedPasses(); + PassNo != e; ++PassNo) { + Pass *P = getContainedPass(PassNo); + + // If we're in -debug-pass=Executions mode, construct the SCC node list, + // otherwise avoid constructing this string as it is expensive. + if (isPassDebuggingExecutionsOrMore()) { + std::string Functions; +#ifndef NDEBUG + raw_string_ostream OS(Functions); + for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) { + if (i) OS << ", "; + CurSCC[i]->print(OS); + } + OS.flush(); +#endif + dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions); + } dumpRequiredSet(P); initializeAnalysisImpl(P); - StartPassTimer(P); - if (CallGraphSCCPass *CGSP = dynamic_cast<CallGraphSCCPass *>(P)) - Changed |= CGSP->runOnSCC(*I); // TODO : What if CG is changed ? - else { - FPPassManager *FPP = dynamic_cast<FPPassManager *>(P); - assert (FPP && "Invalid CGPassManager member"); - - // Run pass P on all functions current SCC - std::vector<CallGraphNode*> &SCC = *I; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); - if (F) { - dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getNameStart()); - Changed |= FPP->runOnFunction(*F); - } - } - } - StopPassTimer(P); + // Actually run this pass on the current SCC. + Changed |= RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate); if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); @@ -125,6 +347,11 @@ bool CGPassManager::runOnModule(Module &M) { recordAvailableAnalysis(P); removeDeadPasses(P, "", ON_CG_MSG); } + + // If the callgraph was left out of date (because the last pass run was a + // functionpass), refresh it before we move on to the next SCC. + if (!CallGraphUpToDate) + RefreshCallGraph(CurSCC, CG, false); } Changed |= doFinalization(CG); return Changed; diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index 920ee374555f..c4fb0b9a4e3d 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -92,13 +92,12 @@ bool FindUsedTypes::runOnModule(Module &m) { // passed in, then the types are printed symbolically if possible, using the // symbol table from the module. // -void FindUsedTypes::print(std::ostream &OS, const Module *M) const { - raw_os_ostream RO(OS); - RO << "Types in use by this module:\n"; +void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { + OS << "Types in use by this module:\n"; for (std::set<const Type *>::const_iterator I = UsedTypes.begin(), E = UsedTypes.end(); I != E; ++I) { - RO << " "; - WriteTypeSymbolic(RO, *I, M); - RO << '\n'; + OS << " "; + WriteTypeSymbolic(OS, *I, M); + OS << '\n'; } } diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 2e9884aa01b4..f5c110841292 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -23,6 +23,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InstIterator.h" @@ -236,6 +237,9 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true; + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) { + if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) + return true; } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { // Make sure that this is just the function being called, not that it is // passing into the function. @@ -299,7 +303,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Check the value being stored. Value *Ptr = SI->getOperand(0)->getUnderlyingObject(); - if (isa<MallocInst>(Ptr)) { + if (isa<MallocInst>(Ptr) || isMalloc(Ptr)) { // Okay, easy case. } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) { Function *F = CI->getCalledFunction(); @@ -435,7 +439,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (cast<StoreInst>(*II).isVolatile()) // Treat volatile stores as reading memory somewhere. FunctionEffect |= Ref; - } else if (isa<MallocInst>(*II) || isa<FreeInst>(*II)) { + } else if (isa<MallocInst>(*II) || isa<FreeInst>(*II) || + isMalloc(&cast<Instruction>(*II))) { FunctionEffect |= ModRef; } diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index caeb14bef373..543e017fc9dd 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -19,7 +19,6 @@ #include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/ADT/STLExtras.h" @@ -39,7 +38,7 @@ Pass *llvm::createIVUsersPass() { /// containsAddRecFromDifferentLoop - Determine whether expression S involves a /// subexpression that is an AddRec from a loop other than L. An outer loop /// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { +static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { // This is very common, put it first. if (isa<SCEVConstant>(S)) return false; @@ -54,7 +53,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop)) + if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) return false; } return true; @@ -80,10 +79,10 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { /// a mix of loop invariant and loop variant expressions. The start cannot, /// however, contain an AddRec from a different loop, unless that loop is an /// outer loop of the current loop. -static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop, - const SCEV* &Start, const SCEV* &Stride, +static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, + const SCEV *&Start, const SCEV *&Stride, ScalarEvolution *SE, DominatorTree *DT) { - const SCEV* TheAddRec = Start; // Initialize to zero. + const SCEV *TheAddRec = Start; // Initialize to zero. // If the outer level is an AddExpr, the operands are all start values except // for a nested AddRecExpr. @@ -109,9 +108,9 @@ static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop, // Use getSCEVAtScope to attempt to simplify other loops out of // the picture. - const SCEV* AddRecStart = AddRec->getStart(); + const SCEV *AddRecStart = AddRec->getStart(); AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); - const SCEV* AddRecStride = AddRec->getStepRecurrence(*SE); + const SCEV *AddRecStride = AddRec->getStepRecurrence(*SE); // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other // than an outer loop of the current loop, reject it. LSR has no concept of @@ -122,15 +121,15 @@ static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop, Start = SE->getAddExpr(Start, AddRecStart); - // If stride is an instruction, make sure it dominates the loop preheader. + // If stride is an instruction, make sure it properly dominates the header. // Otherwise we could end up with a use before def situation. if (!isa<SCEVConstant>(AddRecStride)) { - BasicBlock *Preheader = L->getLoopPreheader(); - if (!AddRecStride->dominates(Preheader, DT)) + BasicBlock *Header = L->getHeader(); + if (!AddRecStride->properlyDominates(Header, DT)) return false; - DOUT << "[" << L->getHeader()->getName() - << "] Variable stride: " << *AddRec << "\n"; + DEBUG(errs() << "[" << L->getHeader()->getName() + << "] Variable stride: " << *AddRec << "\n"); } Stride = AddRecStride; @@ -196,13 +195,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return true; // Instruction already handled. // Get the symbolic expression for this instruction. - const SCEV* ISE = SE->getSCEV(I); + const SCEV *ISE = SE->getSCEV(I); if (isa<SCEVCouldNotCompute>(ISE)) return false; // Get the start and stride for this expression. Loop *UseLoop = LI->getLoopFor(I->getParent()); - const SCEV* Start = SE->getIntegerSCEV(0, ISE->getType()); - const SCEV* Stride = Start; + const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType()); + const SCEV *Stride = Start; if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT)) return false; // Non-reducible symbolic expression, bail out. @@ -228,14 +227,14 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (LI->getLoopFor(User->getParent()) != L) { if (isa<PHINode>(User) || Processed.count(User) || !AddUsersIfInteresting(User)) { - DOUT << "FOUND USER in other loop: " << *User - << " OF SCEV: " << *ISE << "\n"; + DEBUG(errs() << "FOUND USER in other loop: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } } else if (Processed.count(User) || !AddUsersIfInteresting(User)) { - DOUT << "FOUND USER: " << *User - << " OF SCEV: " << *ISE << "\n"; + DEBUG(errs() << "FOUND USER: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } @@ -254,10 +253,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) { // The value used will be incremented by the stride more than we are // expecting, so subtract this off. - const SCEV* NewStart = SE->getMinusSCEV(Start, Stride); + const SCEV *NewStart = SE->getMinusSCEV(Start, Stride); StrideUses->addUser(NewStart, User, I); StrideUses->Users.back().setIsUseOfPostIncrementedValue(true); - DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; + DEBUG(errs() << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); } else { StrideUses->addUser(Start, User, I); } @@ -295,9 +294,9 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { /// getReplacementExpr - Return a SCEV expression which computes the /// value of the OperandValToReplace of the given IVStrideUse. -const SCEV* IVUsers::getReplacementExpr(const IVStrideUse &U) const { +const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { // Start with zero. - const SCEV* RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); + const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); // Create the basic add recurrence. RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L); // Add the offset in a separate step, because it may be loop-variant. @@ -308,7 +307,7 @@ const SCEV* IVUsers::getReplacementExpr(const IVStrideUse &U) const { RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride); // Evaluate the expression out of the loop, if possible. if (!L->contains(U.getUser()->getParent())) { - const SCEV* ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); + const SCEV *ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); if (ExitVal->isLoopInvariant(L)) RetVal = ExitVal; } @@ -325,7 +324,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << ":\n"; for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { - std::map<const SCEV*, IVUsersOfOneStride*>::const_iterator SI = + std::map<const SCEV *, IVUsersOfOneStride*>::const_iterator SI = IVUsesByStride.find(StrideOrder[Stride]); assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n"; @@ -340,15 +339,11 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << " (post-inc)"; OS << " in "; UI->getUser()->print(OS); + OS << '\n'; } } } -void IVUsers::print(std::ostream &o, const Module *M) const { - raw_os_ostream OS(o); - print(OS, M); -} - void IVUsers::dump() const { print(errs()); } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp new file mode 100644 index 000000000000..3b0d2c90aeb5 --- /dev/null +++ b/lib/Analysis/InlineCost.cpp @@ -0,0 +1,338 @@ +//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inline cost analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Support/CallSite.h" +#include "llvm/CallingConv.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/SmallPtrSet.h" +using namespace llvm; + +// CountCodeReductionForConstant - Figure out an approximation for how many +// instructions will be constant folded if the specified value is constant. +// +unsigned InlineCostAnalyzer::FunctionInfo:: + CountCodeReductionForConstant(Value *V) { + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (isa<BranchInst>(*UI)) + Reduction += 40; // Eliminating a conditional branch is a big win + else if (SwitchInst *SI = dyn_cast<SwitchInst>(*UI)) + // Eliminating a switch is a big win, proportional to the number of edges + // deleted. + Reduction += (SI->getNumSuccessors()-1) * 40; + else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + // Turning an indirect call into a direct call is a BIG win + Reduction += CI->getCalledValue() == V ? 500 : 0; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { + // Turning an indirect call into a direct call is a BIG win + Reduction += II->getCalledValue() == V ? 500 : 0; + } else { + // Figure out if this instruction will be removed due to simple constant + // propagation. + Instruction &Inst = cast<Instruction>(**UI); + + // We can't constant propagate instructions which have effects or + // read memory. + // + // FIXME: It would be nice to capture the fact that a load from a + // pointer-to-constant-global is actually a *really* good thing to zap. + // Unfortunately, we don't know the pointer that may get propagated here, + // so we can't make this decision. + if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || + isa<AllocationInst>(Inst)) + continue; + + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) { + // We will get to remove this instruction... + Reduction += 7; + + // And any other instructions that use it which become constants + // themselves. + Reduction += CountCodeReductionForConstant(&Inst); + } + } + + return Reduction; +} + +// CountCodeReductionForAlloca - Figure out an approximation of how much smaller +// the function will be if it is inlined into a context where an argument +// becomes an alloca. +// +unsigned InlineCostAnalyzer::FunctionInfo:: + CountCodeReductionForAlloca(Value *V) { + if (!isa<PointerType>(V->getType())) return 0; // Not a pointer + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + Instruction *I = cast<Instruction>(*UI); + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + Reduction += 10; + else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (!GEP->hasAllConstantIndices()) + Reduction += CountCodeReductionForAlloca(GEP)+15; + } else { + // If there is some other strange instruction, we're not going to be able + // to do much if we inline this. + return 0; + } + } + + return Reduction; +} + +/// analyzeBasicBlock - Fill in the current structure with information gleaned +/// from the specified block. +void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { + ++NumBlocks; + + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + if (isa<PHINode>(II)) continue; // PHI nodes don't count. + + // Special handling for calls. + if (isa<CallInst>(II) || isa<InvokeInst>(II)) { + if (isa<DbgInfoIntrinsic>(II)) + continue; // Debug intrinsics don't count as size. + + CallSite CS = CallSite::get(const_cast<Instruction*>(&*II)); + + // If this function contains a call to setjmp or _setjmp, never inline + // it. This is a hack because we depend on the user marking their local + // variables as volatile if they are live across a setjmp call, and they + // probably won't do this in callers. + if (Function *F = CS.getCalledFunction()) + if (F->isDeclaration() && + (F->getName() == "setjmp" || F->getName() == "_setjmp")) + NeverInline = true; + + // Calls often compile into many machine instructions. Bump up their + // cost to reflect this. + if (!isa<IntrinsicInst>(II)) + NumInsts += InlineConstants::CallPenalty; + } + + // These, too, are calls. + if (isa<MallocInst>(II) || isa<FreeInst>(II)) + NumInsts += InlineConstants::CallPenalty; + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (!AI->isStaticAlloca()) + this->usesDynamicAlloca = true; + } + + if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType())) + ++NumVectorInsts; + + // Noop casts, including ptr <-> int, don't count. + if (const CastInst *CI = dyn_cast<CastInst>(II)) { + if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || + isa<PtrToIntInst>(CI)) + continue; + } else if (const GetElementPtrInst *GEPI = + dyn_cast<GetElementPtrInst>(II)) { + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + if (GEPI->hasAllConstantIndices()) + continue; + } + + if (isa<ReturnInst>(II)) + ++NumRets; + + ++NumInsts; + } +} + +/// analyzeFunction - Fill in the current structure with information gleaned +/// from the specified function. +void CodeMetrics::analyzeFunction(Function *F) { + // Look at the size of the callee. + for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + analyzeBasicBlock(&*BB); +} + +/// analyzeFunction - Fill in the current structure with information gleaned +/// from the specified function. +void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { + Metrics.analyzeFunction(F); + + // A function with exactly one return has it removed during the inlining + // process (see InlineFunction), so don't count it. + // FIXME: This knowledge should really be encoded outside of FunctionInfo. + if (Metrics.NumRets==1) + --Metrics.NumInsts; + + // Check out all of the arguments to the function, figuring out how much + // code can be eliminated if one of the arguments is a constant. + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I), + CountCodeReductionForAlloca(I))); +} + +// getInlineCost - The heuristic used to determine if we should inline the +// function call or not. +// +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, + SmallPtrSet<const Function *, 16> &NeverInline) { + Instruction *TheCall = CS.getInstruction(); + Function *Callee = CS.getCalledFunction(); + Function *Caller = TheCall->getParent()->getParent(); + + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline. + if (Callee->mayBeOverridden() || + Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee)) + return llvm::InlineCost::getNever(); + + // InlineCost - This value measures how good of an inline candidate this call + // site is to inline. A lower inline cost make is more likely for the call to + // be inlined. This value may go negative. + // + int InlineCost = 0; + + // If there is only one call of the function, and it has internal linkage, + // make it almost guaranteed to be inlined. + // + if (Callee->hasLocalLinkage() && Callee->hasOneUse()) + InlineCost += InlineConstants::LastCallToStaticBonus; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (Callee->getCallingConv() == CallingConv::Cold) + InlineCost += InlineConstants::ColdccPenalty; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + if (isa<UnreachableInst>(II->getNormalDest()->begin())) + InlineCost += InlineConstants::NoreturnPenalty; + } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) + InlineCost += InlineConstants::NoreturnPenalty; + + // Get information about the callee... + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.Metrics.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + // If we should never inline this, return a huge cost. + if (CalleeFI.Metrics.NeverInline) + return InlineCost::getNever(); + + // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we + // could move this up and avoid computing the FunctionInfo for + // things we are going to just return always inline for. This + // requires handling setjmp somewhere else, however. + if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) + return InlineCost::getAlways(); + + if (CalleeFI.Metrics.usesDynamicAlloca) { + // Get infomation about the caller... + FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; + + // If we haven't calculated this information yet, do so now. + if (CallerFI.Metrics.NumBlocks == 0) + CallerFI.analyzeFunction(Caller); + + // Don't inline a callee with dynamic alloca into a caller without them. + // Functions containing dynamic alloca's are inefficient in various ways; + // don't create more inefficiency. + if (!CallerFI.Metrics.usesDynamicAlloca) + return InlineCost::getNever(); + } + + // Add to the inline quality for properties that make the call valuable to + // inline. This includes factors that indicate that the result of inlining + // the function will be optimizable. Currently this just looks at arguments + // passed into the function. + // + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I, ++ArgNo) { + // Each argument passed in has a cost at both the caller and the callee + // sides. This favors functions that take many arguments over functions + // that take few arguments. + InlineCost -= 20; + + // If this is a function being passed in, it is very likely that we will be + // able to turn an indirect function call into a direct function call. + if (isa<Function>(I)) + InlineCost -= 100; + + // If an alloca is passed in, inlining this function is likely to allow + // significant future optimization possibilities (like scalar promotion, and + // scalarization), so encourage the inlining of the function. + // + else if (isa<AllocaInst>(I)) { + if (ArgNo < CalleeFI.ArgumentWeights.size()) + InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight; + + // If this is a constant being passed into the function, use the argument + // weights calculated for the callee to determine how much will be folded + // away with this information. + } else if (isa<Constant>(I)) { + if (ArgNo < CalleeFI.ArgumentWeights.size()) + InlineCost -= CalleeFI.ArgumentWeights[ArgNo].ConstantWeight; + } + } + + // Now that we have considered all of the factors that make the call site more + // likely to be inlined, look at factors that make us not want to inline it. + + // Don't inline into something too big, which would make it bigger. + // "size" here is the number of basic blocks, not instructions. + // + InlineCost += Caller->size()/15; + + // Look at the size of the callee. Each instruction counts as 5. + InlineCost += CalleeFI.Metrics.NumInsts*5; + + return llvm::InlineCost::get(InlineCost); +} + +// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a +// higher threshold to determine if the function call should be inlined. +float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + + // Get information about the callee... + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.Metrics.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + float Factor = 1.0f; + // Single BB functions are often written to be inlined. + if (CalleeFI.Metrics.NumBlocks == 1) + Factor += 0.5f; + + // Be more aggressive if the function contains a good chunk (if it mades up + // at least 10% of the instructions) of vector instructions. + if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) + Factor += 2.0f; + else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10) + Factor += 1.5f; + return Factor; +} diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index 2b34ad3b070d..83724caf5210 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -16,8 +16,9 @@ #include "llvm/Pass.h" #include "llvm/Function.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstVisitor.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -46,8 +47,8 @@ namespace { #include "llvm/Instruction.def" void visitInstruction(Instruction &I) { - cerr << "Instruction Count does not know about " << I; - abort(); + errs() << "Instruction Count does not know about " << I; + llvm_unreachable(0); } public: static char ID; // Pass identification, replacement for typeid @@ -58,7 +59,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } - virtual void print(std::ostream &O, const Module *M) const {} + virtual void print(raw_ostream &O, const Module *M) const {} }; } diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp index 16b194723071..ca9cdcaf2464 100644 --- a/lib/Analysis/Interval.cpp +++ b/lib/Analysis/Interval.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/Interval.h" #include "llvm/BasicBlock.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; @@ -29,29 +30,29 @@ bool Interval::isLoop() const { // There is a loop in this interval iff one of the predecessors of the header // node lives in the interval. for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode); - I != E; ++I) { - if (contains(*I)) return true; - } + I != E; ++I) + if (contains(*I)) + return true; return false; } -void Interval::print(std::ostream &o) const { - o << "-------------------------------------------------------------\n" +void Interval::print(raw_ostream &OS) const { + OS << "-------------------------------------------------------------\n" << "Interval Contents:\n"; // Print out all of the basic blocks in the interval... for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) - o << **I << "\n"; + OS << **I << "\n"; - o << "Interval Predecessors:\n"; + OS << "Interval Predecessors:\n"; for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(), E = Predecessors.end(); I != E; ++I) - o << **I << "\n"; + OS << **I << "\n"; - o << "Interval Successors:\n"; + OS << "Interval Successors:\n"; for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(), E = Successors.end(); I != E; ++I) - o << **I << "\n"; + OS << **I << "\n"; } diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp index cb8a85da552a..1f17b77a5b96 100644 --- a/lib/Analysis/IntervalPartition.cpp +++ b/lib/Analysis/IntervalPartition.cpp @@ -32,7 +32,7 @@ void IntervalPartition::releaseMemory() { RootInterval = 0; } -void IntervalPartition::print(std::ostream &O, const Module*) const { +void IntervalPartition::print(raw_ostream &O, const Module*) const { for(unsigned i = 0, e = Intervals.size(); i != e; ++i) Intervals[i]->print(O); } diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp index 971e6e7accb4..741965929890 100644 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -16,7 +16,6 @@ #include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Function.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetData.h" using namespace llvm; // Register this pass... @@ -37,7 +36,6 @@ LibCallAliasAnalysis::~LibCallAliasAnalysis() { void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AliasAnalysis::getAnalysisUsage(AU); - AU.addRequired<TargetData>(); AU.setPreservesAll(); // Does not transform code } diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index 29850471f7dc..e0060c3e89b1 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -57,9 +57,6 @@ const LibCallFunctionInfo *LibCallInfo::getFunctionInfo(Function *F) const { } // Look up this function in the string map. - const char *ValueName = F->getNameStart(); - StringMap<const LibCallFunctionInfo*>::iterator I = - Map->find(ValueName, ValueName+F->getNameLen()); - return I != Map->end() ? I->second : 0; + return Map->lookup(F->getName()); } diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp index f6057839266f..32d22662c341 100644 --- a/lib/Analysis/LoopDependenceAnalysis.cpp +++ b/lib/Analysis/LoopDependenceAnalysis.cpp @@ -15,18 +15,33 @@ // // TODO: adapt as implementation progresses. // +// TODO: document lingo (pair, subscript, index) +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lda" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopDependenceAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Instructions.h" +#include "llvm/Operator.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" using namespace llvm; +STATISTIC(NumAnswered, "Number of dependence queries answered"); +STATISTIC(NumAnalysed, "Number of distinct dependence pairs analysed"); +STATISTIC(NumDependent, "Number of pairs with dependent accesses"); +STATISTIC(NumIndependent, "Number of pairs with independent accesses"); +STATISTIC(NumUnknown, "Number of pairs with unknown accesses"); + LoopPass *llvm::createLoopDependenceAnalysisPass() { return new LoopDependenceAnalysis(); } @@ -44,14 +59,14 @@ static inline bool IsMemRefInstr(const Value *V) { return I && (I->mayReadFromMemory() || I->mayWriteToMemory()); } -static void GetMemRefInstrs( - const Loop *L, SmallVectorImpl<Instruction*> &memrefs) { +static void GetMemRefInstrs(const Loop *L, + SmallVectorImpl<Instruction*> &Memrefs) { for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); - b != be; ++b) + b != be; ++b) for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end(); - i != ie; ++i) + i != ie; ++i) if (IsMemRefInstr(i)) - memrefs.push_back(i); + Memrefs.push_back(i); } static bool IsLoadOrStoreInst(Value *I) { @@ -63,53 +78,223 @@ static Value *GetPointerOperand(Value *I) { return i->getPointerOperand(); if (StoreInst *i = dyn_cast<StoreInst>(I)) return i->getPointerOperand(); - assert(0 && "Value is no load or store instruction!"); + llvm_unreachable("Value is no load or store instruction!"); // Never reached. return 0; } +static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA, + const Value *A, + const Value *B) { + const Value *aObj = A->getUnderlyingObject(); + const Value *bObj = B->getUnderlyingObject(); + return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()), + bObj, AA->getTypeStoreSize(bObj->getType())); +} + +static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) { + return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L); +} + //===----------------------------------------------------------------------===// // Dependence Testing //===----------------------------------------------------------------------===// -bool LoopDependenceAnalysis::isDependencePair(const Value *x, - const Value *y) const { - return IsMemRefInstr(x) && - IsMemRefInstr(y) && - (cast<const Instruction>(x)->mayWriteToMemory() || - cast<const Instruction>(y)->mayWriteToMemory()); +bool LoopDependenceAnalysis::isDependencePair(const Value *A, + const Value *B) const { + return IsMemRefInstr(A) && + IsMemRefInstr(B) && + (cast<const Instruction>(A)->mayWriteToMemory() || + cast<const Instruction>(B)->mayWriteToMemory()); +} + +bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A, + Value *B, + DependencePair *&P) { + void *insertPos = 0; + FoldingSetNodeID id; + id.AddPointer(A); + id.AddPointer(B); + + P = Pairs.FindNodeOrInsertPos(id, insertPos); + if (P) return true; + + P = PairAllocator.Allocate<DependencePair>(); + new (P) DependencePair(id, A, B); + Pairs.InsertNode(P, insertPos); + return false; +} + +void LoopDependenceAnalysis::getLoops(const SCEV *S, + DenseSet<const Loop*>* Loops) const { + // Refactor this into an SCEVVisitor, if efficiency becomes a concern. + for (const Loop *L = this->L; L != 0; L = L->getParentLoop()) + if (!S->isLoopInvariant(L)) + Loops->insert(L); +} + +bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const { + DenseSet<const Loop*> loops; + getLoops(S, &loops); + return loops.empty(); +} + +bool LoopDependenceAnalysis::isAffine(const SCEV *S) const { + const SCEVAddRecExpr *rec = dyn_cast<SCEVAddRecExpr>(S); + return isLoopInvariant(S) || (rec && rec->isAffine()); +} + +bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const { + return isLoopInvariant(A) && isLoopInvariant(B); +} + +bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const { + DenseSet<const Loop*> loops; + getLoops(A, &loops); + getLoops(B, &loops); + return loops.size() == 1; +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseZIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!"); + return A == B ? Dependent : Independent; } -bool LoopDependenceAnalysis::depends(Value *src, Value *dst) { - assert(isDependencePair(src, dst) && "Values form no dependence pair!"); - DOUT << "== LDA test ==\n" << *src << *dst; +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseSIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + return Unknown; // TODO: Implement. +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseMIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + return Unknown; // TODO: Implement. +} - // We only analyse loads and stores; for possible memory accesses by e.g. - // free, call, or invoke instructions we conservatively assume dependence. - if (!IsLoadOrStoreInst(src) || !IsLoadOrStoreInst(dst)) - return true; +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseSubscript(const SCEV *A, + const SCEV *B, + Subscript *S) const { + DEBUG(errs() << " Testing subscript: " << *A << ", " << *B << "\n"); - Value *srcPtr = GetPointerOperand(src); - Value *dstPtr = GetPointerOperand(dst); - const Value *srcObj = srcPtr->getUnderlyingObject(); - const Value *dstObj = dstPtr->getUnderlyingObject(); - AliasAnalysis::AliasResult alias = AA->alias( - srcObj, AA->getTargetData().getTypeStoreSize(srcObj->getType()), - dstObj, AA->getTargetData().getTypeStoreSize(dstObj->getType())); + if (A == B) { + DEBUG(errs() << " -> [D] same SCEV\n"); + return Dependent; + } - // If we don't know whether or not the two objects alias, assume dependence. - if (alias == AliasAnalysis::MayAlias) - return true; + if (!isAffine(A) || !isAffine(B)) { + DEBUG(errs() << " -> [?] not affine\n"); + return Unknown; + } - // If the objects noalias, they are distinct, accesses are independent. - if (alias == AliasAnalysis::NoAlias) - return false; + if (isZIVPair(A, B)) + return analyseZIV(A, B, S); - // TODO: the underlying objects MustAlias, test for dependence + if (isSIVPair(A, B)) + return analyseSIV(A, B, S); - // We couldn't establish a more precise result, so we have to conservatively - // assume full dependence. - return true; + return analyseMIV(A, B, S); +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analysePair(DependencePair *P) const { + DEBUG(errs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n"); + + // We only analyse loads and stores but no possible memory accesses by e.g. + // free, call, or invoke instructions. + if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) { + DEBUG(errs() << "--> [?] no load/store\n"); + return Unknown; + } + + Value *aPtr = GetPointerOperand(P->A); + Value *bPtr = GetPointerOperand(P->B); + + switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) { + case AliasAnalysis::MayAlias: + // We can not analyse objects if we do not know about their aliasing. + DEBUG(errs() << "---> [?] may alias\n"); + return Unknown; + + case AliasAnalysis::NoAlias: + // If the objects noalias, they are distinct, accesses are independent. + DEBUG(errs() << "---> [I] no alias\n"); + return Independent; + + case AliasAnalysis::MustAlias: + break; // The underlying objects alias, test accesses for dependence. + } + + const GEPOperator *aGEP = dyn_cast<GEPOperator>(aPtr); + const GEPOperator *bGEP = dyn_cast<GEPOperator>(bPtr); + + if (!aGEP || !bGEP) + return Unknown; + + // FIXME: Is filtering coupled subscripts necessary? + + // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding + // trailing zeroes to the smaller GEP, if needed. + typedef SmallVector<std::pair<const SCEV*, const SCEV*>, 4> GEPOpdPairsTy; + GEPOpdPairsTy opds; + for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(), + aEnd = aGEP->idx_end(), + bIdx = bGEP->idx_begin(), + bEnd = bGEP->idx_end(); + aIdx != aEnd && bIdx != bEnd; + aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) { + const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE); + const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE); + opds.push_back(std::make_pair(aSCEV, bSCEV)); + } + + if (!opds.empty() && opds[0].first != opds[0].second) { + // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting + // + // TODO: this could be relaxed by adding the size of the underlying object + // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we + // know that x is a [100 x i8]*, we could modify the first subscript to be + // (i, 200-i) instead of (i, -i). + return Unknown; + } + + // Now analyse the collected operand pairs (skipping the GEP ptr offsets). + for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end(); + i != end; ++i) { + Subscript subscript; + DependenceResult result = analyseSubscript(i->first, i->second, &subscript); + if (result != Dependent) { + // We either proved independence or failed to analyse this subscript. + // Further subscripts will not improve the situation, so abort early. + return result; + } + P->Subscripts.push_back(subscript); + } + // We successfully analysed all subscripts but failed to prove independence. + return Dependent; +} + +bool LoopDependenceAnalysis::depends(Value *A, Value *B) { + assert(isDependencePair(A, B) && "Values form no dependence pair!"); + ++NumAnswered; + + DependencePair *p; + if (!findOrInsertDependencePair(A, B, p)) { + // The pair is not cached, so analyse it. + ++NumAnalysed; + switch (p->Result = analysePair(p)) { + case Dependent: ++NumDependent; break; + case Independent: ++NumIndependent; break; + case Unknown: ++NumUnknown; break; + } + } + return p->Result != Independent; } //===----------------------------------------------------------------------===// @@ -123,14 +308,19 @@ bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) { return false; } +void LoopDependenceAnalysis::releaseMemory() { + Pairs.clear(); + PairAllocator.Reset(); +} + void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive<AliasAnalysis>(); AU.addRequiredTransitive<ScalarEvolution>(); } -static void PrintLoopInfo( - raw_ostream &OS, LoopDependenceAnalysis *LDA, const Loop *L) { +static void PrintLoopInfo(raw_ostream &OS, + LoopDependenceAnalysis *LDA, const Loop *L) { if (!L->empty()) return; // ignore non-innermost loops SmallVector<Instruction*, 8> memrefs; @@ -142,14 +332,14 @@ static void PrintLoopInfo( OS << " Load/store instructions: " << memrefs.size() << "\n"; for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(), - end = memrefs.end(); x != end; ++x) - OS << "\t" << (x - memrefs.begin()) << ": " << **x; + end = memrefs.end(); x != end; ++x) + OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n"; OS << " Pairwise dependence results:\n"; for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(), - end = memrefs.end(); x != end; ++x) + end = memrefs.end(); x != end; ++x) for (SmallVector<Instruction*, 8>::const_iterator y = x + 1; - y != end; ++y) + y != end; ++y) if (LDA->isDependencePair(*x, *y)) OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin()) << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent") @@ -160,8 +350,3 @@ void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const { // TODO: doc why const_cast is safe PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L); } - -void LoopDependenceAnalysis::print(std::ostream &OS, const Module *M) const { - raw_os_ostream os(OS); - print(os, M); -} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index bb535894efab..ce2d29f331b6 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -20,12 +20,22 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/CommandLine.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include <algorithm> using namespace llvm; +// Always verify loopinfo if expensive checking is enabled. +#ifdef XDEBUG +bool VerifyLoopInfo = true; +#else +bool VerifyLoopInfo = false; +#endif +static cl::opt<bool,true> +VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), + cl::desc("Verify loop info (time consuming)")); + char LoopInfo::ID = 0; static RegisterPass<LoopInfo> X("loops", "Natural Loop Information", true, true); @@ -34,6 +44,338 @@ X("loops", "Natural Loop Information", true, true); // Loop implementation // +/// isLoopInvariant - Return true if the specified value is loop invariant +/// +bool Loop::isLoopInvariant(Value *V) const { + if (Instruction *I = dyn_cast<Instruction>(V)) + return isLoopInvariant(I); + return true; // All non-instructions are loop invariant +} + +/// isLoopInvariant - Return true if the specified instruction is +/// loop-invariant. +/// +bool Loop::isLoopInvariant(Instruction *I) const { + return !contains(I->getParent()); +} + +/// makeLoopInvariant - If the given value is an instruciton inside of the +/// loop and it can be hoisted, do so to make it trivially loop-invariant. +/// Return true if the value after any hoisting is loop invariant. This +/// function can be used as a slightly more aggressive replacement for +/// isLoopInvariant. +/// +/// If InsertPt is specified, it is the point to hoist instructions to. +/// If null, the terminator of the loop preheader is used. +/// +bool Loop::makeLoopInvariant(Value *V, bool &Changed, + Instruction *InsertPt) const { + if (Instruction *I = dyn_cast<Instruction>(V)) + return makeLoopInvariant(I, Changed, InsertPt); + return true; // All non-instructions are loop-invariant. +} + +/// makeLoopInvariant - If the given instruction is inside of the +/// loop and it can be hoisted, do so to make it trivially loop-invariant. +/// Return true if the instruction after any hoisting is loop invariant. This +/// function can be used as a slightly more aggressive replacement for +/// isLoopInvariant. +/// +/// If InsertPt is specified, it is the point to hoist instructions to. +/// If null, the terminator of the loop preheader is used. +/// +bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, + Instruction *InsertPt) const { + // Test if the value is already loop-invariant. + if (isLoopInvariant(I)) + return true; + if (!I->isSafeToSpeculativelyExecute()) + return false; + if (I->mayReadFromMemory()) + return false; + // Determine the insertion point, unless one was given. + if (!InsertPt) { + BasicBlock *Preheader = getLoopPreheader(); + // Without a preheader, hoisting is not feasible. + if (!Preheader) + return false; + InsertPt = Preheader->getTerminator(); + } + // Don't hoist instructions with loop-variant operands. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt)) + return false; + // Hoist. + I->moveBefore(InsertPt); + Changed = true; + return true; +} + +/// getCanonicalInductionVariable - Check to see if the loop has a canonical +/// induction variable: an integer recurrence that starts at 0 and increments +/// by one each time through the loop. If so, return the phi node that +/// corresponds to it. +/// +/// The IndVarSimplify pass transforms loops to have a canonical induction +/// variable. +/// +PHINode *Loop::getCanonicalInductionVariable() const { + BasicBlock *H = getHeader(); + + BasicBlock *Incoming = 0, *Backedge = 0; + typedef GraphTraits<Inverse<BasicBlock*> > InvBlockTraits; + InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(H); + assert(PI != InvBlockTraits::child_end(H) && + "Loop must have at least one backedge!"); + Backedge = *PI++; + if (PI == InvBlockTraits::child_end(H)) return 0; // dead loop + Incoming = *PI++; + if (PI != InvBlockTraits::child_end(H)) return 0; // multiple backedges? + + if (contains(Incoming)) { + if (contains(Backedge)) + return 0; + std::swap(Incoming, Backedge); + } else if (!contains(Backedge)) + return 0; + + // Loop over all of the PHI nodes, looking for a canonical indvar. + for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + if (ConstantInt *CI = + dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming))) + if (CI->isNullValue()) + if (Instruction *Inc = + dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge))) + if (Inc->getOpcode() == Instruction::Add && + Inc->getOperand(0) == PN) + if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1))) + if (CI->equalsInt(1)) + return PN; + } + return 0; +} + +/// getCanonicalInductionVariableIncrement - Return the LLVM value that holds +/// the canonical induction variable value for the "next" iteration of the +/// loop. This always succeeds if getCanonicalInductionVariable succeeds. +/// +Instruction *Loop::getCanonicalInductionVariableIncrement() const { + if (PHINode *PN = getCanonicalInductionVariable()) { + bool P1InLoop = contains(PN->getIncomingBlock(1)); + return cast<Instruction>(PN->getIncomingValue(P1InLoop)); + } + return 0; +} + +/// getTripCount - Return a loop-invariant LLVM value indicating the number of +/// times the loop will be executed. Note that this means that the backedge +/// of the loop executes N-1 times. If the trip-count cannot be determined, +/// this returns null. +/// +/// The IndVarSimplify pass transforms loops to have a form that this +/// function easily understands. +/// +Value *Loop::getTripCount() const { + // Canonical loops will end with a 'cmp ne I, V', where I is the incremented + // canonical induction variable and V is the trip count of the loop. + Instruction *Inc = getCanonicalInductionVariableIncrement(); + if (Inc == 0) return 0; + PHINode *IV = cast<PHINode>(Inc->getOperand(0)); + + BasicBlock *BackedgeBlock = + IV->getIncomingBlock(contains(IV->getIncomingBlock(1))); + + if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator())) + if (BI->isConditional()) { + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { + if (ICI->getOperand(0) == Inc) { + if (BI->getSuccessor(0) == getHeader()) { + if (ICI->getPredicate() == ICmpInst::ICMP_NE) + return ICI->getOperand(1); + } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { + return ICI->getOperand(1); + } + } + } + } + + return 0; +} + +/// getSmallConstantTripCount - Returns the trip count of this loop as a +/// normal unsigned value, if possible. Returns 0 if the trip count is unknown +/// of not constant. Will also return 0 if the trip count is very large +/// (>= 2^32) +unsigned Loop::getSmallConstantTripCount() const { + Value* TripCount = this->getTripCount(); + if (TripCount) { + if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) { + // Guard against huge trip counts. + if (TripCountC->getValue().getActiveBits() <= 32) { + return (unsigned)TripCountC->getZExtValue(); + } + } + } + return 0; +} + +/// getSmallConstantTripMultiple - Returns the largest constant divisor of the +/// trip count of this loop as a normal unsigned value, if possible. This +/// means that the actual trip count is always a multiple of the returned +/// value (don't forget the trip count could very well be zero as well!). +/// +/// Returns 1 if the trip count is unknown or not guaranteed to be the +/// multiple of a constant (which is also the case if the trip count is simply +/// constant, use getSmallConstantTripCount for that case), Will also return 1 +/// if the trip count is very large (>= 2^32). +unsigned Loop::getSmallConstantTripMultiple() const { + Value* TripCount = this->getTripCount(); + // This will hold the ConstantInt result, if any + ConstantInt *Result = NULL; + if (TripCount) { + // See if the trip count is constant itself + Result = dyn_cast<ConstantInt>(TripCount); + // if not, see if it is a multiplication + if (!Result) + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) { + switch (BO->getOpcode()) { + case BinaryOperator::Mul: + Result = dyn_cast<ConstantInt>(BO->getOperand(1)); + break; + default: + break; + } + } + } + // Guard against huge trip counts. + if (Result && Result->getValue().getActiveBits() <= 32) { + return (unsigned)Result->getZExtValue(); + } else { + return 1; + } +} + +/// isLCSSAForm - Return true if the Loop is in LCSSA form +bool Loop::isLCSSAForm() const { + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end()); + + for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { + BasicBlock *BB = *BI; + for (BasicBlock ::iterator I = BB->begin(), E = BB->end(); I != E;++I) + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(*UI)) { + UserBB = P->getIncomingBlock(UI); + } + + // Check the current block, as a fast-path. Most values are used in + // the same block they are defined in. + if (UserBB != BB && !LoopBBs.count(UserBB)) + return false; + } + } + + return true; +} + +/// isLoopSimplifyForm - Return true if the Loop is in the form that +/// the LoopSimplify form transforms loops to, which is sometimes called +/// normal form. +bool Loop::isLoopSimplifyForm() const { + // Normal-form loops have a preheader. + if (!getLoopPreheader()) + return false; + // Normal-form loops have a single backedge. + if (!getLoopLatch()) + return false; + // Each predecessor of each exit block of a normal loop is contained + // within the loop. + SmallVector<BasicBlock *, 4> ExitBlocks; + getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + for (pred_iterator PI = pred_begin(ExitBlocks[i]), + PE = pred_end(ExitBlocks[i]); PI != PE; ++PI) + if (!contains(*PI)) + return false; + // All the requirements are met. + return true; +} + +/// getUniqueExitBlocks - Return all unique successor blocks of this loop. +/// These are the blocks _outside of the current loop_ which are branched to. +/// This assumes that loop is in canonical form. +/// +void +Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { + assert(isLoopSimplifyForm() && + "getUniqueExitBlocks assumes the loop is in canonical form!"); + + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end()); + std::sort(LoopBBs.begin(), LoopBBs.end()); + + SmallVector<BasicBlock *, 32> switchExitBlocks; + + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) { + + BasicBlock *current = *BI; + switchExitBlocks.clear(); + + typedef GraphTraits<BasicBlock *> BlockTraits; + typedef GraphTraits<Inverse<BasicBlock *> > InvBlockTraits; + for (BlockTraits::ChildIteratorType I = + BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); + I != E; ++I) { + // If block is inside the loop then it is not a exit block. + if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) + continue; + + InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(*I); + BasicBlock *firstPred = *PI; + + // If current basic block is this exit block's first predecessor + // then only insert exit block in to the output ExitBlocks vector. + // This ensures that same exit block is not inserted twice into + // ExitBlocks vector. + if (current != firstPred) + continue; + + // If a terminator has more then two successors, for example SwitchInst, + // then it is possible that there are multiple edges from current block + // to one exit block. + if (std::distance(BlockTraits::child_begin(current), + BlockTraits::child_end(current)) <= 2) { + ExitBlocks.push_back(*I); + continue; + } + + // In case of multiple edges from current block to exit block, collect + // only one edge in ExitBlocks. Use switchExitBlocks to keep track of + // duplicate edges. + if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I) + == switchExitBlocks.end()) { + switchExitBlocks.push_back(*I); + ExitBlocks.push_back(*I); + } + } + } +} + +/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one +/// block, return that block. Otherwise return null. +BasicBlock *Loop::getUniqueExitBlock() const { + SmallVector<BasicBlock *, 8> UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + if (UniqueExitBlocks.size() == 1) + return UniqueExitBlocks[0]; + return 0; +} + //===----------------------------------------------------------------------===// // LoopInfo implementation // @@ -43,7 +385,29 @@ bool LoopInfo::runOnFunction(Function &) { return false; } +void LoopInfo::verifyAnalysis() const { + // LoopInfo is a FunctionPass, but verifying every loop in the function + // each time verifyAnalysis is called is very expensive. The + // -verify-loop-info option can enable this. In order to perform some + // checking by default, LoopPass has been taught to call verifyLoop + // manually during loop pass sequences. + + if (!VerifyLoopInfo) return; + + for (iterator I = begin(), E = end(); I != E; ++I) { + assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); + (*I)->verifyLoopNest(); + } + + // TODO: check BBMap consistency. +} + void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<DominatorTree>(); } + +void LoopInfo::print(raw_ostream &OS, const Module*) const { + LI.print(OS); +} + diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index ee03556f2741..43463cd8ef1c 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -21,7 +21,6 @@ using namespace llvm; // char LPPassManager::ID = 0; -/// LPPassManager manages FPPassManagers and CalLGraphSCCPasses. LPPassManager::LPPassManager(int Depth) : FunctionPass(&ID), PMDataManager(Depth) { @@ -111,17 +110,21 @@ void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { else LI->addTopLevelLoop(L); + insertLoopIntoQueue(L); +} + +void LPPassManager::insertLoopIntoQueue(Loop *L) { // Insert L into loop queue if (L == CurrentLoop) redoLoop(L); - else if (!ParentLoop) + else if (!L->getParentLoop()) // This is top level loop. LQ.push_front(L); else { - // Insert L after ParentLoop + // Insert L after the parent loop. for (std::deque<Loop *>::iterator I = LQ.begin(), E = LQ.end(); I != E; ++I) { - if (*I == ParentLoop) { + if (*I == L->getParentLoop()) { // deque does not support insert after. ++I; LQ.insert(I, 1, L); @@ -217,41 +220,66 @@ bool LPPassManager::runOnFunction(Function &F) { skipThisLoop = false; redoThisLoop = false; - // Run all passes on current SCC + // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); - dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, ""); + dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, + CurrentLoop->getHeader()->getNameStr()); dumpRequiredSet(P); initializeAnalysisImpl(P); LoopPass *LP = dynamic_cast<LoopPass *>(P); + assert(LP && "Invalid LPPassManager member"); { PassManagerPrettyStackEntry X(LP, *CurrentLoop->getHeader()); - StartPassTimer(P); - assert(LP && "Invalid LPPassManager member"); + Timer *T = StartPassTimer(P); Changed |= LP->runOnLoop(CurrentLoop, *this); - StopPassTimer(P); + StopPassTimer(P, T); } if (Changed) - dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, ""); + dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, + skipThisLoop ? "<deleted>" : + CurrentLoop->getHeader()->getNameStr()); dumpPreservedSet(P); - verifyPreservedAnalysis(LP); + if (!skipThisLoop) { + // Manually check that this loop is still healthy. This is done + // instead of relying on LoopInfo::verifyLoop since LoopInfo + // is a function pass and it's really expensive to verify every + // loop in the function every time. That level of checking can be + // enabled with the -verify-loop-info option. + Timer *T = StartPassTimer(LI); + CurrentLoop->verifyLoop(); + StopPassTimer(LI, T); + + // Then call the regular verifyAnalysis functions. + verifyPreservedAnalysis(LP); + } + removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); - removeDeadPasses(P, "", ON_LOOP_MSG); - - // If dominator information is available then verify the info if requested. - verifyDomInfo(*LP, F); + removeDeadPasses(P, + skipThisLoop ? "<deleted>" : + CurrentLoop->getHeader()->getNameStr(), + ON_LOOP_MSG); if (skipThisLoop) // Do not run other passes on this loop. break; } + // If the loop was deleted, release all the loop passes. This frees up + // some memory, and avoids trouble with the pass manager trying to call + // verifyAnalysis on them. + if (skipThisLoop) + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + freePass(P, "<deleted>", ON_LOOP_MSG); + } + // Pop the loop from queue after running all passes. LQ.pop_back(); @@ -272,7 +300,7 @@ bool LPPassManager::runOnFunction(Function &F) { /// Print passes managed by this manager void LPPassManager::dumpPassStructure(unsigned Offset) { - llvm::cerr << std::string(Offset*2, ' ') << "Loop Pass Manager\n"; + errs().indent(Offset*2) << "Loop Pass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); P->dumpPassStructure(Offset + 1); diff --git a/lib/Analysis/MallocHelper.cpp b/lib/Analysis/MallocHelper.cpp new file mode 100644 index 000000000000..89051d178838 --- /dev/null +++ b/lib/Analysis/MallocHelper.cpp @@ -0,0 +1,230 @@ +//===-- MallocHelper.cpp - Functions to identify malloc calls -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions identifies calls to malloc, bitcasts of malloc +// calls, and the types and array sizes associated with them. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MallocHelper.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ConstantFolding.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// malloc Call Utility Functions. +// + +/// isMalloc - Returns true if the the value is either a malloc call or a +/// bitcast of the result of a malloc call. +bool llvm::isMalloc(const Value* I) { + return extractMallocCall(I) || extractMallocCallFromBitCast(I); +} + +static bool isMallocCall(const CallInst *CI) { + if (!CI) + return false; + + const Module* M = CI->getParent()->getParent()->getParent(); + Function *MallocFunc = M->getFunction("malloc"); + + if (CI->getOperand(0) != MallocFunc) + return false; + + // Check malloc prototype. + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // attribute will exist. + const FunctionType *FTy = MallocFunc->getFunctionType(); + if (FTy->getNumParams() != 1) + return false; + if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) { + if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64) + return false; + return true; + } + + return false; +} + +/// extractMallocCall - Returns the corresponding CallInst if the instruction +/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we +/// ignore InvokeInst here. +const CallInst* llvm::extractMallocCall(const Value* I) { + const CallInst *CI = dyn_cast<CallInst>(I); + return (isMallocCall(CI)) ? CI : NULL; +} + +CallInst* llvm::extractMallocCall(Value* I) { + CallInst *CI = dyn_cast<CallInst>(I); + return (isMallocCall(CI)) ? CI : NULL; +} + +static bool isBitCastOfMallocCall(const BitCastInst* BCI) { + if (!BCI) + return false; + + return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0))); +} + +/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the +/// instruction is a bitcast of the result of a malloc call. +CallInst* llvm::extractMallocCallFromBitCast(Value* I) { + BitCastInst *BCI = dyn_cast<BitCastInst>(I); + return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0)) + : NULL; +} + +const CallInst* llvm::extractMallocCallFromBitCast(const Value* I) { + const BitCastInst *BCI = dyn_cast<BitCastInst>(I); + return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0)) + : NULL; +} + +static bool isArrayMallocHelper(const CallInst *CI, LLVMContext &Context, + const TargetData* TD) { + if (!CI) + return false; + + const Type* T = getMallocAllocatedType(CI); + + // We can only indentify an array malloc if we know the type of the malloc + // call. + if (!T) return false; + + Value* MallocArg = CI->getOperand(1); + Constant *ElementSize = ConstantExpr::getSizeOf(T); + ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, + MallocArg->getType()); + Constant *FoldedElementSize = ConstantFoldConstantExpression( + cast<ConstantExpr>(ElementSize), + Context, TD); + + + if (isa<ConstantExpr>(MallocArg)) + return (MallocArg != ElementSize); + + BinaryOperator *BI = dyn_cast<BinaryOperator>(MallocArg); + if (!BI) + return false; + + if (BI->getOpcode() == Instruction::Mul) + // ArraySize * ElementSize + if (BI->getOperand(1) == ElementSize || + (FoldedElementSize && BI->getOperand(1) == FoldedElementSize)) + return true; + + // TODO: Detect case where MallocArg mul has been transformed to shl. + + return false; +} + +/// isArrayMalloc - Returns the corresponding CallInst if the instruction +/// matches the malloc call IR generated by CallInst::CreateMalloc(). This +/// means that it is a malloc call with one bitcast use AND the malloc call's +/// size argument is: +/// 1. a constant not equal to the malloc's allocated type +/// or +/// 2. the result of a multiplication by the malloc's allocated type +/// Otherwise it returns NULL. +/// The unique bitcast is needed to determine the type/size of the array +/// allocation. +CallInst* llvm::isArrayMalloc(Value* I, LLVMContext &Context, + const TargetData* TD) { + CallInst *CI = extractMallocCall(I); + return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL; +} + +const CallInst* llvm::isArrayMalloc(const Value* I, LLVMContext &Context, + const TargetData* TD) { + const CallInst *CI = extractMallocCall(I); + return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL; +} + +/// getMallocType - Returns the PointerType resulting from the malloc call. +/// This PointerType is the result type of the call's only bitcast use. +/// If there is no unique bitcast use, then return NULL. +const PointerType* llvm::getMallocType(const CallInst* CI) { + assert(isMalloc(CI) && "GetMallocType and not malloc call"); + + const BitCastInst* BCI = NULL; + + // Determine if CallInst has a bitcast use. + for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end(); + UI != E; ) + if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)))) + break; + + // Malloc call has 1 bitcast use and no other uses, so type is the bitcast's + // destination type. + if (BCI && CI->hasOneUse()) + return cast<PointerType>(BCI->getDestTy()); + + // Malloc call was not bitcast, so type is the malloc function's return type. + if (!BCI) + return cast<PointerType>(CI->getType()); + + // Type could not be determined. + return NULL; +} + +/// getMallocAllocatedType - Returns the Type allocated by malloc call. This +/// Type is the result type of the call's only bitcast use. If there is no +/// unique bitcast use, then return NULL. +const Type* llvm::getMallocAllocatedType(const CallInst* CI) { + const PointerType* PT = getMallocType(CI); + return PT ? PT->getElementType() : NULL; +} + +/// isConstantOne - Return true only if val is constant int 1. +static bool isConstantOne(Value *val) { + return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne(); +} + +/// getMallocArraySize - Returns the array size of a malloc call. The array +/// size is computated in 1 of 3 ways: +/// 1. If the element type if of size 1, then array size is the argument to +/// malloc. +/// 2. Else if the malloc's argument is a constant, the array size is that +/// argument divided by the element type's size. +/// 3. Else the malloc argument must be a multiplication and the array size is +/// the first operand of the multiplication. +/// This function returns constant 1 if: +/// 1. The malloc call's allocated type cannot be determined. +/// 2. IR wasn't created by a call to CallInst::CreateMalloc() with a non-NULL +/// ArraySize. +Value* llvm::getMallocArraySize(CallInst* CI, LLVMContext &Context, + const TargetData* TD) { + // Match CreateMalloc's use of constant 1 array-size for non-array mallocs. + if (!isArrayMalloc(CI, Context, TD)) + return ConstantInt::get(CI->getOperand(1)->getType(), 1); + + Value* MallocArg = CI->getOperand(1); + assert(getMallocAllocatedType(CI) && "getMallocArraySize and no type"); + Constant *ElementSize = ConstantExpr::getSizeOf(getMallocAllocatedType(CI)); + ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, + MallocArg->getType()); + + Constant* CO = dyn_cast<Constant>(MallocArg); + BinaryOperator* BO = dyn_cast<BinaryOperator>(MallocArg); + assert((isConstantOne(ElementSize) || CO || BO) && + "getMallocArraySize and malformed malloc IR"); + + if (isConstantOne(ElementSize)) + return MallocArg; + + if (CO) + return CO->getOperand(0); + + // TODO: Detect case where MallocArg mul has been transformed to shl. + + assert(BO && "getMallocArraySize not constant but not multiplication either"); + return BO->getOperand(0); +} diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 3b2102955f33..d6400757a513 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -16,16 +16,15 @@ #define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Function.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/PredIteratorCache.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetData.h" using namespace llvm; STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); @@ -71,12 +70,10 @@ void MemoryDependenceAnalysis::releaseMemory() { void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive<AliasAnalysis>(); - AU.addRequiredTransitive<TargetData>(); } bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis<AliasAnalysis>(); - TD = &getAnalysis<TargetData>(); if (PredCache == 0) PredCache.reset(new PredIteratorCache()); return false; @@ -112,10 +109,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, uint64_t PointerSize = 0; if (StoreInst *S = dyn_cast<StoreInst>(Inst)) { Pointer = S->getPointerOperand(); - PointerSize = TD->getTypeStoreSize(S->getOperand(0)->getType()); + PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType()); } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { Pointer = V->getOperand(0); - PointerSize = TD->getTypeStoreSize(V->getType()); + PointerSize = AA->getTypeStoreSize(V->getType()); } else if (FreeInst *F = dyn_cast<FreeInst>(Inst)) { Pointer = F->getPointerOperand(); @@ -185,7 +182,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // a load depends on another must aliased load from the same value. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { Value *Pointer = LI->getPointerOperand(); - uint64_t PointerSize = TD->getTypeStoreSize(LI->getType()); + uint64_t PointerSize = AA->getTypeStoreSize(LI->getType()); // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = @@ -211,7 +208,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. Value *Pointer = SI->getPointerOperand(); - uint64_t PointerSize = TD->getTypeStoreSize(SI->getOperand(0)->getType()); + uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType()); // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = @@ -228,15 +225,19 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // the allocation, return Def. This means that there is no dependence and // the access can be optimized based on that. For example, a load could // turn into undef. - if (AllocationInst *AI = dyn_cast<AllocationInst>(Inst)) { + // Note: Only determine this to be a malloc if Inst is the malloc call, not + // a subsequent bitcast of the malloc call result. There can be stores to + // the malloced memory between the malloc call and its bitcast uses, and we + // need to continue scanning until the malloc call. + if (isa<AllocationInst>(Inst) || extractMallocCall(Inst)) { Value *AccessPtr = MemPtr->getUnderlyingObject(); - if (AccessPtr == AI || - AA->alias(AI, 1, AccessPtr, 1) == AliasAnalysis::MustAlias) - return MemDepResult::getDef(AI); + if (AccessPtr == Inst || + AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); continue; } - + // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) { case AliasAnalysis::NoModRef: @@ -302,7 +303,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); else { MemPtr = SI->getPointerOperand(); - MemSize = TD->getTypeStoreSize(SI->getOperand(0)->getType()); + MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType()); } } else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) { // If this is a volatile load, don't mess around with it. Just return the @@ -311,7 +312,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); else { MemPtr = LI->getPointerOperand(); - MemSize = TD->getTypeStoreSize(LI->getType()); + MemSize = AA->getTypeStoreSize(LI->getType()); } } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { CallSite QueryCS = CallSite::get(QueryInst); @@ -513,7 +514,7 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, // We know that the pointer value is live into FromBB find the def/clobbers // from presecessors. const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType(); - uint64_t PointeeSize = TD->getTypeStoreSize(EltTy); + uint64_t PointeeSize = AA->getTypeStoreSize(EltTy); // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying @@ -599,6 +600,42 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, return Dep; } +/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain +/// number of elements in the array that are already properly ordered. This is +/// optimized for the case when only a few entries are added. +static void +SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, + unsigned NumSortedEntries) { + switch (Cache.size() - NumSortedEntries) { + case 0: + // done, no new entries. + break; + case 2: { + // Two new entries, insert the last one into place. + MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back(); + Cache.pop_back(); + MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.end()-1, Val); + Cache.insert(Entry, Val); + // FALL THROUGH. + } + case 1: + // One new entry, Just insert the new value at the appropriate position. + if (Cache.size() != 1) { + MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back(); + Cache.pop_back(); + MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.end(), Val); + Cache.insert(Entry, Val); + } + break; + default: + // Added many values, do a full scale sort. + std::sort(Cache.begin(), Cache.end()); + break; + } +} + /// getNonLocalPointerDepFromBB - Perform a dependency query based on /// pointer/pointeesize starting at the end of StartBB. Add any clobber/def @@ -731,10 +768,22 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // If we do need to do phi translation, then there are a bunch of different // cases, because we have to find a Value* live in the predecessor block. We // know that PtrInst is defined in this block at least. + + // We may have added values to the cache list before this PHI translation. + // If so, we haven't done anything to ensure that the cache remains sorted. + // Sort it now (if needed) so that recursive invocations of + // getNonLocalPointerDepFromBB and other routines that could reuse the cache + // value will only see properly sorted cache arrays. + if (Cache && NumSortedEntries != Cache->size()) { + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); + NumSortedEntries = Cache->size(); + } // If this is directly a PHI node, just use the incoming values for each // pred as the phi translated version. if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) { + Cache = 0; + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { BasicBlock *Pred = *PI; Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred); @@ -759,15 +808,6 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, goto PredTranslationFailure; } - // We may have added values to the cache list before this PHI - // translation. If so, we haven't done anything to ensure that the - // cache remains sorted. Sort it now (if needed) so that recursive - // invocations of getNonLocalPointerDepFromBB that could reuse the cache - // value will only see properly sorted cache arrays. - if (Cache && NumSortedEntries != Cache->size()) - std::sort(Cache->begin(), Cache->end()); - Cache = 0; - // FIXME: it is entirely possible that PHI translating will end up with // the same value. Consider PHI translating something like: // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* @@ -779,7 +819,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, Result, Visited)) goto PredTranslationFailure; } - + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->second; @@ -806,11 +846,8 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->second; NumSortedEntries = Cache->size(); - } else if (NumSortedEntries != Cache->size()) { - std::sort(Cache->begin(), Cache->end()); - NumSortedEntries = Cache->size(); } - + // Since we did phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all @@ -841,33 +878,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, } // Okay, we're done now. If we added new values to the cache, re-sort it. - switch (Cache->size()-NumSortedEntries) { - case 0: - // done, no new entries. - break; - case 2: { - // Two new entries, insert the last one into place. - NonLocalDepEntry Val = Cache->back(); - Cache->pop_back(); - NonLocalDepInfo::iterator Entry = - std::upper_bound(Cache->begin(), Cache->end()-1, Val); - Cache->insert(Entry, Val); - // FALL THROUGH. - } - case 1: - // One new entry, Just insert the new value at the appropriate position. - if (Cache->size() != 1) { - NonLocalDepEntry Val = Cache->back(); - Cache->pop_back(); - NonLocalDepInfo::iterator Entry = - std::upper_bound(Cache->begin(), Cache->end(), Val); - Cache->insert(Entry, Val); - } - break; - default: - // Added many values, do a full scale sort. - std::sort(Cache->begin(), Cache->end()); - } + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); DEBUG(AssertSorted(*Cache)); return false; } diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp new file mode 100644 index 000000000000..43f4af36d81c --- /dev/null +++ b/lib/Analysis/PointerTracking.cpp @@ -0,0 +1,265 @@ +//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements tracking of pointer bounds. +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MallocHelper.h" +#include "llvm/Analysis/PointerTracking.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Value.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +char PointerTracking::ID = 0; +PointerTracking::PointerTracking() : FunctionPass(&ID) {} + +bool PointerTracking::runOnFunction(Function &F) { + predCache.clear(); + assert(analyzing.empty()); + FF = &F; + TD = getAnalysisIfAvailable<TargetData>(); + SE = &getAnalysis<ScalarEvolution>(); + LI = &getAnalysis<LoopInfo>(); + DT = &getAnalysis<DominatorTree>(); + return false; +} + +void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive<DominatorTree>(); + AU.addRequiredTransitive<LoopInfo>(); + AU.addRequiredTransitive<ScalarEvolution>(); + AU.setPreservesAll(); +} + +bool PointerTracking::doInitialization(Module &M) { + const Type *PTy = Type::getInt8PtrTy(M.getContext()); + + // Find calloc(i64, i64) or calloc(i32, i32). + callocFunc = M.getFunction("calloc"); + if (callocFunc) { + const FunctionType *Ty = callocFunc->getFunctionType(); + + std::vector<const Type*> args, args2; + args.push_back(Type::getInt64Ty(M.getContext())); + args.push_back(Type::getInt64Ty(M.getContext())); + args2.push_back(Type::getInt32Ty(M.getContext())); + args2.push_back(Type::getInt32Ty(M.getContext())); + const FunctionType *Calloc1Type = + FunctionType::get(PTy, args, false); + const FunctionType *Calloc2Type = + FunctionType::get(PTy, args2, false); + if (Ty != Calloc1Type && Ty != Calloc2Type) + callocFunc = 0; // Give up + } + + // Find realloc(i8*, i64) or realloc(i8*, i32). + reallocFunc = M.getFunction("realloc"); + if (reallocFunc) { + const FunctionType *Ty = reallocFunc->getFunctionType(); + std::vector<const Type*> args, args2; + args.push_back(PTy); + args.push_back(Type::getInt64Ty(M.getContext())); + args2.push_back(PTy); + args2.push_back(Type::getInt32Ty(M.getContext())); + + const FunctionType *Realloc1Type = + FunctionType::get(PTy, args, false); + const FunctionType *Realloc2Type = + FunctionType::get(PTy, args2, false); + if (Ty != Realloc1Type && Ty != Realloc2Type) + reallocFunc = 0; // Give up + } + return false; +} + +// Calculates the number of elements allocated for pointer P, +// the type of the element is stored in Ty. +const SCEV *PointerTracking::computeAllocationCount(Value *P, + const Type *&Ty) const { + Value *V = P->stripPointerCasts(); + if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) { + Value *arraySize = AI->getArraySize(); + Ty = AI->getAllocatedType(); + // arraySize elements of type Ty. + return SE->getSCEV(arraySize); + } + + if (CallInst *CI = extractMallocCall(V)) { + Value *arraySize = getMallocArraySize(CI, P->getContext(), TD); + Ty = getMallocAllocatedType(CI); + if (!Ty || !arraySize) return SE->getCouldNotCompute(); + // arraySize elements of type Ty. + return SE->getSCEV(arraySize); + } + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + if (GV->hasDefinitiveInitializer()) { + Constant *C = GV->getInitializer(); + if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { + Ty = ATy->getElementType(); + return SE->getConstant(Type::getInt32Ty(P->getContext()), + ATy->getNumElements()); + } + } + Ty = GV->getType(); + return SE->getConstant(Type::getInt32Ty(P->getContext()), 1); + //TODO: implement more tracking for globals + } + + if (CallInst *CI = dyn_cast<CallInst>(V)) { + CallSite CS(CI); + Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + const Loop *L = LI->getLoopFor(CI->getParent()); + if (F == callocFunc) { + Ty = Type::getInt8Ty(P->getContext()); + // calloc allocates arg0*arg1 bytes. + return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)), + SE->getSCEV(CS.getArgument(1))), + L); + } else if (F == reallocFunc) { + Ty = Type::getInt8Ty(P->getContext()); + // realloc allocates arg1 bytes. + return SE->getSCEVAtScope(CS.getArgument(1), L); + } + } + + return SE->getCouldNotCompute(); +} + +// Calculates the number of elements of type Ty allocated for P. +const SCEV *PointerTracking::computeAllocationCountForType(Value *P, + const Type *Ty) + const { + const Type *elementTy; + const SCEV *Count = computeAllocationCount(P, elementTy); + if (isa<SCEVCouldNotCompute>(Count)) + return Count; + if (elementTy == Ty) + return Count; + + if (!TD) // need TargetData from this point forward + return SE->getCouldNotCompute(); + + uint64_t elementSize = TD->getTypeAllocSize(elementTy); + uint64_t wantSize = TD->getTypeAllocSize(Ty); + if (elementSize == wantSize) + return Count; + if (elementSize % wantSize) //fractional counts not possible + return SE->getCouldNotCompute(); + return SE->getMulExpr(Count, SE->getConstant(Count->getType(), + elementSize/wantSize)); +} + +const SCEV *PointerTracking::getAllocationElementCount(Value *V) const { + // We only deal with pointers. + const PointerType *PTy = cast<PointerType>(V->getType()); + return computeAllocationCountForType(V, PTy->getElementType()); +} + +const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const { + return computeAllocationCountForType(V, Type::getInt8Ty(V->getContext())); +} + +// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too +enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L, + Predicate Pred, + const SCEV *A, + const SCEV *B) const { + if (SE->isLoopGuardedByCond(L, Pred, A, B)) + return AlwaysTrue; + Pred = ICmpInst::getSwappedPredicate(Pred); + if (SE->isLoopGuardedByCond(L, Pred, B, A)) + return AlwaysTrue; + + Pred = ICmpInst::getInversePredicate(Pred); + if (SE->isLoopGuardedByCond(L, Pred, B, A)) + return AlwaysFalse; + Pred = ICmpInst::getSwappedPredicate(Pred); + if (SE->isLoopGuardedByCond(L, Pred, A, B)) + return AlwaysTrue; + return Unknown; +} + +enum SolverResult PointerTracking::checkLimits(const SCEV *Offset, + const SCEV *Limit, + BasicBlock *BB) +{ + //FIXME: merge implementation + return Unknown; +} + +void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base, + const SCEV *&Limit, + const SCEV *&Offset) const +{ + Pointer = Pointer->stripPointerCasts(); + Base = Pointer->getUnderlyingObject(); + Limit = getAllocationSizeInBytes(Base); + if (isa<SCEVCouldNotCompute>(Limit)) { + Base = 0; + Offset = Limit; + return; + } + + Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base)); + if (isa<SCEVCouldNotCompute>(Offset)) { + Base = 0; + Limit = Offset; + } +} + +void PointerTracking::print(raw_ostream &OS, const Module* M) const { + // Calling some PT methods may cause caches to be updated, however + // this should be safe for the same reason its safe for SCEV. + PointerTracking &PT = *const_cast<PointerTracking*>(this); + for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) { + if (!isa<PointerType>(I->getType())) + continue; + Value *Base; + const SCEV *Limit, *Offset; + getPointerOffset(&*I, Base, Limit, Offset); + if (!Base) + continue; + + if (Base == &*I) { + const SCEV *S = getAllocationElementCount(Base); + OS << *Base << " ==> " << *S << " elements, "; + OS << *Limit << " bytes allocated\n"; + continue; + } + OS << &*I << " -- base: " << *Base; + OS << " offset: " << *Offset; + + enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent()); + switch (res) { + case AlwaysTrue: + OS << " always safe\n"; + break; + case AlwaysFalse: + OS << " always unsafe\n"; + break; + case Unknown: + OS << " <<unknown>>\n"; + break; + } + } +} + +static RegisterPass<PointerTracking> X("pointertracking", + "Track pointer bounds", false, true); diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index 4853c2ac87b7..69d6b47bbee4 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -33,15 +33,19 @@ F("postdomtree", "Post-Dominator Tree Construction", true, true); bool PostDominatorTree::runOnFunction(Function &F) { DT->recalculate(F); - DEBUG(DT->dump()); + DEBUG(DT->print(errs())); return false; } -PostDominatorTree::~PostDominatorTree() -{ +PostDominatorTree::~PostDominatorTree() { delete DT; } +void PostDominatorTree::print(raw_ostream &OS, const Module *) const { + DT->print(OS); +} + + FunctionPass* llvm::createPostDomTree() { return new PostDominatorTree(); } diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp new file mode 100644 index 000000000000..c585c1dced04 --- /dev/null +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -0,0 +1,310 @@ +//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a concrete implementation of profiling information that +// estimates the profiling information in a very crude and unimaginative way. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-estimator" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +using namespace llvm; + +static cl::opt<double> +LoopWeight( + "profile-estimator-loop-weight", cl::init(10), + cl::value_desc("loop-weight"), + cl::desc("Number of loop executions used for profile-estimator") +); + +namespace { + class VISIBILITY_HIDDEN ProfileEstimatorPass : + public FunctionPass, public ProfileInfo { + double ExecCount; + LoopInfo *LI; + std::set<BasicBlock*> BBToVisit; + std::map<Loop*,double> LoopExitWeights; + public: + static char ID; // Class identification, replacement for typeinfo + explicit ProfileEstimatorPass(const double execcount = 0) + : FunctionPass(&ID), ExecCount(execcount) { + if (execcount == 0) ExecCount = LoopWeight; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<LoopInfo>(); + } + + virtual const char *getPassName() const { + return "Profiling information estimator"; + } + + /// run - Estimate the profile information from the specified file. + virtual bool runOnFunction(Function &F); + + virtual void recurseBasicBlock(BasicBlock *BB); + + void inline printEdgeWeight(Edge); + }; +} // End of anonymous namespace + +char ProfileEstimatorPass::ID = 0; +static RegisterPass<ProfileEstimatorPass> +X("profile-estimator", "Estimate profiling information", false, true); + +static RegisterAnalysisGroup<ProfileInfo> Y(X); + +namespace llvm { + const PassInfo *ProfileEstimatorPassID = &X; + + FunctionPass *createProfileEstimatorPass() { + return new ProfileEstimatorPass(); + } + + /// createProfileEstimatorPass - This function returns a Pass that estimates + /// profiling information using the given loop execution count. + Pass *createProfileEstimatorPass(const unsigned execcount) { + return new ProfileEstimatorPass(execcount); + } +} + +static double ignoreMissing(double w) { + if (w == ProfileInfo::MissingValue) return 0; + return w; +} + +static void inline printEdgeError(ProfileInfo::Edge e, const char *M) { + DEBUG(errs() << "-- Edge " << e << " is not calculated, " << M << "\n"); +} + +void inline ProfileEstimatorPass::printEdgeWeight(Edge E) { + DEBUG(errs() << "-- Weight of Edge " << E << ":" + << format("%g", getEdgeWeight(E)) << "\n"); +} + +// recurseBasicBlock() - This calculates the ProfileInfo estimation for a +// single block and then recurses into the successors. +// The algorithm preserves the flow condition, meaning that the sum of the +// weight of the incoming edges must be equal the block weight which must in +// turn be equal to the sume of the weights of the outgoing edges. +// Since the flow of an block is deterimined from the current state of the +// flow, once an edge has a flow assigned this flow is never changed again, +// otherwise it would be possible to violate the flow condition in another +// block. +void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { + + // Break the recursion if this BasicBlock was already visited. + if (BBToVisit.find(BB) == BBToVisit.end()) return; + + // Read the LoopInfo for this block. + bool BBisHeader = LI->isLoopHeader(BB); + Loop* BBLoop = LI->getLoopFor(BB); + + // To get the block weight, read all incoming edges. + double BBWeight = 0; + std::set<BasicBlock*> ProcessedPreds; + for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { + // If this block was not considered already, add weight. + Edge edge = getEdge(*bbi,BB); + double w = getEdgeWeight(edge); + if (ProcessedPreds.insert(*bbi).second) { + BBWeight += ignoreMissing(w); + } + // If this block is a loop header and the predecessor is contained in this + // loop, thus the edge is a backedge, continue and do not check if the + // value is valid. + if (BBisHeader && BBLoop->contains(*bbi)) { + printEdgeError(edge, "but is backedge, continueing"); + continue; + } + // If the edges value is missing (and this is no loop header, and this is + // no backedge) return, this block is currently non estimatable. + if (w == MissingValue) { + printEdgeError(edge, "returning"); + return; + } + } + if (getExecutionCount(BB) != MissingValue) { + BBWeight = getExecutionCount(BB); + } + + // Fetch all necessary information for current block. + SmallVector<Edge, 8> ExitEdges; + SmallVector<Edge, 8> Edges; + if (BBLoop) { + BBLoop->getExitEdges(ExitEdges); + } + + // If this is a loop header, consider the following: + // Exactly the flow that is entering this block, must exit this block too. So + // do the following: + // *) get all the exit edges, read the flow that is already leaving this + // loop, remember the edges that do not have any flow on them right now. + // (The edges that have already flow on them are most likely exiting edges of + // other loops, do not touch those flows because the previously caclulated + // loopheaders would not be exact anymore.) + // *) In case there is not a single exiting edge left, create one at the loop + // latch to prevent the flow from building up in the loop. + // *) Take the flow that is not leaving the loop already and distribute it on + // the remaining exiting edges. + // (This ensures that all flow that enters the loop also leaves it.) + // *) Increase the flow into the loop by increasing the weight of this block. + // There is at least one incoming backedge that will bring us this flow later + // on. (So that the flow condition in this node is valid again.) + if (BBisHeader) { + double incoming = BBWeight; + // Subtract the flow leaving the loop. + std::set<Edge> ProcessedExits; + for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(), + ee = ExitEdges.end(); ei != ee; ++ei) { + if (ProcessedExits.insert(*ei).second) { + double w = getEdgeWeight(*ei); + if (w == MissingValue) { + Edges.push_back(*ei); + } else { + incoming -= w; + } + } + } + // If no exit edges, create one: + if (Edges.size() == 0) { + BasicBlock *Latch = BBLoop->getLoopLatch(); + if (Latch) { + Edge edge = getEdge(Latch,0); + EdgeInformation[BB->getParent()][edge] = BBWeight; + printEdgeWeight(edge); + edge = getEdge(Latch, BB); + EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount; + printEdgeWeight(edge); + } + } + // Distribute remaining weight onto the exit edges. + for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); + ei != ee; ++ei) { + EdgeInformation[BB->getParent()][*ei] += incoming/Edges.size(); + printEdgeWeight(*ei); + } + // Increase flow into the loop. + BBWeight *= (ExecCount+1); + } + + BlockInformation[BB->getParent()][BB] = BBWeight; + // Up until now we considered only the loop exiting edges, now we have a + // definite block weight and must ditribute this onto the outgoing edges. + // Since there may be already flow attached to some of the edges, read this + // flow first and remember the edges that have still now flow attached. + Edges.clear(); + std::set<BasicBlock*> ProcessedSuccs; + + succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + // Also check for (BB,0) edges that may already contain some flow. (But only + // in case there are no successors.) + if (bbi == bbe) { + Edge edge = getEdge(BB,0); + EdgeInformation[BB->getParent()][edge] = BBWeight; + printEdgeWeight(edge); + } + for ( ; bbi != bbe; ++bbi ) { + if (ProcessedSuccs.insert(*bbi).second) { + Edge edge = getEdge(BB,*bbi); + double w = getEdgeWeight(edge); + if (w != MissingValue) { + BBWeight -= getEdgeWeight(edge); + } else { + Edges.push_back(edge); + } + } + } + + // Finally we know what flow is still not leaving the block, distribute this + // flow onto the empty edges. + for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); + ei != ee; ++ei) { + EdgeInformation[BB->getParent()][*ei] += BBWeight/Edges.size(); + printEdgeWeight(*ei); + } + + // This block is visited, mark this before the recursion. + BBToVisit.erase(BB); + + // Recurse into successors. + for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } +} + +bool ProfileEstimatorPass::runOnFunction(Function &F) { + if (F.isDeclaration()) return false; + + // Fetch LoopInfo and clear ProfileInfo for this function. + LI = &getAnalysis<LoopInfo>(); + FunctionInformation.erase(&F); + BlockInformation[&F].clear(); + EdgeInformation[&F].clear(); + + // Mark all blocks as to visit. + for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi) + BBToVisit.insert(bi); + + DEBUG(errs() << "Working on function " << F.getNameStr() << "\n"); + + // Since the entry block is the first one and has no predecessors, the edge + // (0,entry) is inserted with the starting weight of 1. + BasicBlock *entry = &F.getEntryBlock(); + BlockInformation[&F][entry] = 1; + Edge edge = getEdge(0,entry); + EdgeInformation[&F][edge] = 1; + printEdgeWeight(edge); + + // Since recurseBasicBlock() maybe returns with a block which was not fully + // estimated, use recurseBasicBlock() until everything is calculated. + recurseBasicBlock(entry); + while (BBToVisit.size() > 0) { + // Remember number of open blocks, this is later used to check if progress + // was made. + unsigned size = BBToVisit.size(); + + // Try to calculate all blocks in turn. + for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(), + be = BBToVisit.end(); bi != be; ++bi) { + recurseBasicBlock(*bi); + // If at least one block was finished, break because iterator may be + // invalid. + if (BBToVisit.size() < size) break; + } + + // If there was not a single block resovled, make some assumptions. + if (BBToVisit.size() == size) { + BasicBlock *BB = *(BBToVisit.begin()); + // Since this BB was not calculated because of missing incoming edges, + // set these edges to zero. + for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi) { + Edge e = getEdge(*bbi,BB); + double w = getEdgeWeight(e); + if (w == MissingValue) { + EdgeInformation[&F][e] = 0; + DEBUG(errs() << "Assuming edge weight: "); + printEdgeWeight(e); + } + } + } + } + + return false; +} diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index a0965b66da81..9efdd23081c4 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -17,6 +17,9 @@ #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" #include <set> using namespace llvm; @@ -26,56 +29,149 @@ char ProfileInfo::ID = 0; ProfileInfo::~ProfileInfo() {} -unsigned ProfileInfo::getExecutionCount(BasicBlock *BB) const { - pred_iterator PI = pred_begin(BB), PE = pred_end(BB); +const double ProfileInfo::MissingValue = -1; + +double ProfileInfo::getExecutionCount(const BasicBlock *BB) { + std::map<const Function*, BlockCounts>::iterator J = + BlockInformation.find(BB->getParent()); + if (J != BlockInformation.end()) { + BlockCounts::iterator I = J->second.find(BB); + if (I != J->second.end()) + return I->second; + } + + pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB); // Are there zero predecessors of this block? if (PI == PE) { // If this is the entry block, look for the Null -> Entry edge. if (BB == &BB->getParent()->getEntryBlock()) - return getEdgeWeight(0, BB); + return getEdgeWeight(getEdge(0, BB)); else return 0; // Otherwise, this is a dead block. } // Otherwise, if there are predecessors, the execution count of this block is - // the sum of the edge frequencies from the incoming edges. Note that if - // there are multiple edges from a predecessor to this block that we don't - // want to count its weight multiple times. For this reason, we keep track of - // the predecessors we've seen and only count them if we haven't run into them - // yet. - // - // We don't want to create an std::set unless we are dealing with a block that - // has a LARGE number of in-edges. Handle the common case of having only a - // few in-edges with special code. - // - BasicBlock *FirstPred = *PI; - unsigned Count = getEdgeWeight(FirstPred, BB); - ++PI; - if (PI == PE) return Count; // Quick exit for single predecessor blocks - - BasicBlock *SecondPred = *PI; - if (SecondPred != FirstPred) Count += getEdgeWeight(SecondPred, BB); - ++PI; - if (PI == PE) return Count; // Quick exit for two predecessor blocks - - BasicBlock *ThirdPred = *PI; - if (ThirdPred != FirstPred && ThirdPred != SecondPred) - Count += getEdgeWeight(ThirdPred, BB); - ++PI; - if (PI == PE) return Count; // Quick exit for three predecessor blocks - - std::set<BasicBlock*> ProcessedPreds; - ProcessedPreds.insert(FirstPred); - ProcessedPreds.insert(SecondPred); - ProcessedPreds.insert(ThirdPred); + // the sum of the edge frequencies from the incoming edges. + std::set<const BasicBlock*> ProcessedPreds; + double Count = 0; for (; PI != PE; ++PI) - if (ProcessedPreds.insert(*PI).second) - Count += getEdgeWeight(*PI, BB); + if (ProcessedPreds.insert(*PI).second) { + double w = getEdgeWeight(getEdge(*PI, BB)); + if (w == MissingValue) { + Count = MissingValue; + break; + } + Count += w; + } + + if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count; + return Count; +} + +double ProfileInfo::getExecutionCount(const Function *F) { + std::map<const Function*, double>::iterator J = + FunctionInformation.find(F); + if (J != FunctionInformation.end()) + return J->second; + + // isDeclaration() is checked here and not at start of function to allow + // functions without a body still to have a execution count. + if (F->isDeclaration()) return MissingValue; + + double Count = getExecutionCount(&F->getEntryBlock()); + if (Count != MissingValue) FunctionInformation[F] = Count; return Count; } +/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB. +/// This checks all edges of the function the blocks reside in and replaces the +/// occurences of RmBB with DestBB. +void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, + const BasicBlock *DestBB) { + DEBUG(errs() << "Replacing " << RmBB->getNameStr() + << " with " << DestBB->getNameStr() << "\n"); + const Function *F = DestBB->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + for (EdgeWeights::iterator I = J->second.begin(), E = J->second.end(); + I != E; ++I) { + Edge e = I->first; + Edge newedge; bool foundedge = false; + if (e.first == RmBB) { + newedge = getEdge(DestBB, e.second); + foundedge = true; + } + if (e.second == RmBB) { + newedge = getEdge(e.first, DestBB); + foundedge = true; + } + if (foundedge) { + double w = getEdgeWeight(e); + EdgeInformation[F][newedge] = w; + DEBUG(errs() << "Replacing " << e << " with " << newedge << "\n"); + J->second.erase(e); + } + } +} + +/// Splits an edge in the ProfileInfo and redirects flow over NewBB. +/// Since its possible that there is more than one edge in the CFG from FristBB +/// to SecondBB its necessary to redirect the flow proporionally. +void ProfileInfo::splitEdge(const BasicBlock *FirstBB, + const BasicBlock *SecondBB, + const BasicBlock *NewBB, + bool MergeIdenticalEdges) { + const Function *F = FirstBB->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + // Generate edges and read current weight. + Edge e = getEdge(FirstBB, SecondBB); + Edge n1 = getEdge(FirstBB, NewBB); + Edge n2 = getEdge(NewBB, SecondBB); + EdgeWeights &ECs = J->second; + double w = ECs[e]; + + int succ_count = 0; + if (!MergeIdenticalEdges) { + // First count the edges from FristBB to SecondBB, if there is more than + // one, only slice out a proporional part for NewBB. + for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB); + BBI != BBE; ++BBI) { + if (*BBI == SecondBB) succ_count++; + } + // When the NewBB is completely new, increment the count by one so that + // the counts are properly distributed. + if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++; + } else { + // When the edges are merged anyway, then redirect all flow. + succ_count = 1; + } + // We know now how many edges there are from FirstBB to SecondBB, reroute a + // proportional part of the edge weight over NewBB. + double neww = w / succ_count; + ECs[n1] += neww; + ECs[n2] += neww; + BlockInformation[F][NewBB] += neww; + if (succ_count == 1) { + ECs.erase(e); + } else { + ECs[e] -= neww; + } +} + +raw_ostream& llvm::operator<<(raw_ostream &O, ProfileInfo::Edge E) { + O << "("; + O << (E.first ? E.first->getNameStr() : "0"); + O << ","; + O << (E.second ? E.second->getNameStr() : "0"); + return O << ")"; +} //===----------------------------------------------------------------------===// // NoProfile ProfileInfo implementation diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp index adb2bdc42549..25481b2ee671 100644 --- a/lib/Analysis/ProfileInfoLoader.cpp +++ b/lib/Analysis/ProfileInfoLoader.cpp @@ -16,7 +16,7 @@ #include "llvm/Analysis/ProfileInfoTypes.h" #include "llvm/Module.h" #include "llvm/InstrTypes.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include <cstdio> #include <cstdlib> #include <map> @@ -26,10 +26,17 @@ using namespace llvm; // static inline unsigned ByteSwap(unsigned Var, bool Really) { if (!Really) return Var; - return ((Var & (255<< 0)) << 24) | - ((Var & (255<< 8)) << 8) | - ((Var & (255<<16)) >> 8) | - ((Var & (255<<24)) >> 24); + return ((Var & (255U<< 0U)) << 24U) | + ((Var & (255U<< 8U)) << 8U) | + ((Var & (255U<<16U)) >> 8U) | + ((Var & (255U<<24U)) >> 24U); +} + +static unsigned AddCounts(unsigned A, unsigned B) { + // If either value is undefined, use the other. + if (A == ProfileInfoLoader::Uncounted) return B; + if (B == ProfileInfoLoader::Uncounted) return A; + return A + B; } static void ReadProfilingBlock(const char *ToolName, FILE *F, @@ -38,7 +45,7 @@ static void ReadProfilingBlock(const char *ToolName, FILE *F, // Read the number of entries... unsigned NumEntries; if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) { - cerr << ToolName << ": data packet truncated!\n"; + errs() << ToolName << ": data packet truncated!\n"; perror(0); exit(1); } @@ -49,35 +56,41 @@ static void ReadProfilingBlock(const char *ToolName, FILE *F, // Read in the block of data... if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) { - cerr << ToolName << ": data packet truncated!\n"; + errs() << ToolName << ": data packet truncated!\n"; perror(0); exit(1); } - // Make sure we have enough space... + // Make sure we have enough space... The space is initialised to -1 to + // facitiltate the loading of missing values for OptimalEdgeProfiling. if (Data.size() < NumEntries) - Data.resize(NumEntries); + Data.resize(NumEntries, ProfileInfoLoader::Uncounted); // Accumulate the data we just read into the data. if (!ShouldByteSwap) { - for (unsigned i = 0; i != NumEntries; ++i) - Data[i] += TempSpace[i]; + for (unsigned i = 0; i != NumEntries; ++i) { + Data[i] = AddCounts(TempSpace[i], Data[i]); + } } else { - for (unsigned i = 0; i != NumEntries; ++i) - Data[i] += ByteSwap(TempSpace[i], true); + for (unsigned i = 0; i != NumEntries; ++i) { + Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]); + } } } +const unsigned ProfileInfoLoader::Uncounted = ~0U; + // ProfileInfoLoader ctor - Read the specified profiling data file, exiting the // program if the file is invalid or broken. // ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, const std::string &Filename, - Module &TheModule) : - M(TheModule), Warned(false) { - FILE *F = fopen(Filename.c_str(), "r"); + Module &TheModule) : + Filename(Filename), + M(TheModule), Warned(false) { + FILE *F = fopen(Filename.c_str(), "rb"); if (F == 0) { - cerr << ToolName << ": Error opening '" << Filename << "': "; + errs() << ToolName << ": Error opening '" << Filename << "': "; perror(0); exit(1); } @@ -95,7 +108,7 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, case ArgumentInfo: { unsigned ArgLength; if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) { - cerr << ToolName << ": arguments packet truncated!\n"; + errs() << ToolName << ": arguments packet truncated!\n"; perror(0); exit(1); } @@ -106,7 +119,7 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, if (ArgLength) if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) { - cerr << ToolName << ": arguments packet truncated!\n"; + errs() << ToolName << ": arguments packet truncated!\n"; perror(0); exit(1); } @@ -126,12 +139,16 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); break; + case OptEdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts); + break; + case BBTraceInfo: ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace); break; default: - cerr << ToolName << ": Unknown packet type #" << PacketType << "!\n"; + errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n"; exit(1); } } @@ -139,139 +156,3 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, fclose(F); } - -// getFunctionCounts - This method is used by consumers of function counting -// information. If we do not directly have function count information, we -// compute it from other, more refined, types of profile information. -// -void ProfileInfoLoader::getFunctionCounts(std::vector<std::pair<Function*, - unsigned> > &Counts) { - if (FunctionCounts.empty()) { - if (hasAccurateBlockCounts()) { - // Synthesize function frequency information from the number of times - // their entry blocks were executed. - std::vector<std::pair<BasicBlock*, unsigned> > BlockCounts; - getBlockCounts(BlockCounts); - - for (unsigned i = 0, e = BlockCounts.size(); i != e; ++i) - if (&BlockCounts[i].first->getParent()->getEntryBlock() == - BlockCounts[i].first) - Counts.push_back(std::make_pair(BlockCounts[i].first->getParent(), - BlockCounts[i].second)); - } else { - cerr << "Function counts are not available!\n"; - } - return; - } - - unsigned Counter = 0; - for (Module::iterator I = M.begin(), E = M.end(); - I != E && Counter != FunctionCounts.size(); ++I) - if (!I->isDeclaration()) - Counts.push_back(std::make_pair(I, FunctionCounts[Counter++])); -} - -// getBlockCounts - This method is used by consumers of block counting -// information. If we do not directly have block count information, we -// compute it from other, more refined, types of profile information. -// -void ProfileInfoLoader::getBlockCounts(std::vector<std::pair<BasicBlock*, - unsigned> > &Counts) { - if (BlockCounts.empty()) { - if (hasAccurateEdgeCounts()) { - // Synthesize block count information from edge frequency information. - // The block execution frequency is equal to the sum of the execution - // frequency of all outgoing edges from a block. - // - // If a block has no successors, this will not be correct, so we have to - // special case it. :( - std::vector<std::pair<Edge, unsigned> > EdgeCounts; - getEdgeCounts(EdgeCounts); - - std::map<BasicBlock*, unsigned> InEdgeFreqs; - - BasicBlock *LastBlock = 0; - TerminatorInst *TI = 0; - for (unsigned i = 0, e = EdgeCounts.size(); i != e; ++i) { - if (EdgeCounts[i].first.first != LastBlock) { - LastBlock = EdgeCounts[i].first.first; - TI = LastBlock->getTerminator(); - Counts.push_back(std::make_pair(LastBlock, 0)); - } - Counts.back().second += EdgeCounts[i].second; - unsigned SuccNum = EdgeCounts[i].first.second; - if (SuccNum >= TI->getNumSuccessors()) { - if (!Warned) { - cerr << "WARNING: profile info doesn't seem to match" - << " the program!\n"; - Warned = true; - } - } else { - // If this successor has no successors of its own, we will never - // compute an execution count for that block. Remember the incoming - // edge frequencies to add later. - BasicBlock *Succ = TI->getSuccessor(SuccNum); - if (Succ->getTerminator()->getNumSuccessors() == 0) - InEdgeFreqs[Succ] += EdgeCounts[i].second; - } - } - - // Now we have to accumulate information for those blocks without - // successors into our table. - for (std::map<BasicBlock*, unsigned>::iterator I = InEdgeFreqs.begin(), - E = InEdgeFreqs.end(); I != E; ++I) { - unsigned i = 0; - for (; i != Counts.size() && Counts[i].first != I->first; ++i) - /*empty*/; - if (i == Counts.size()) Counts.push_back(std::make_pair(I->first, 0)); - Counts[i].second += I->second; - } - - } else { - cerr << "Block counts are not available!\n"; - } - return; - } - - unsigned Counter = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - Counts.push_back(std::make_pair(BB, BlockCounts[Counter++])); - if (Counter == BlockCounts.size()) - return; - } -} - -// getEdgeCounts - This method is used by consumers of edge counting -// information. If we do not directly have edge count information, we compute -// it from other, more refined, types of profile information. -// -void ProfileInfoLoader::getEdgeCounts(std::vector<std::pair<Edge, - unsigned> > &Counts) { - if (EdgeCounts.empty()) { - cerr << "Edge counts not available, and no synthesis " - << "is implemented yet!\n"; - return; - } - - unsigned Counter = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (unsigned i = 0, e = BB->getTerminator()->getNumSuccessors(); - i != e; ++i) { - Counts.push_back(std::make_pair(Edge(BB, i), EdgeCounts[Counter++])); - if (Counter == EdgeCounts.size()) - return; - } -} - -// getBBTrace - This method is used by consumers of basic-block trace -// information. -// -void ProfileInfoLoader::getBBTrace(std::vector<BasicBlock *> &Trace) { - if (BBTrace.empty ()) { - cerr << "Basic block trace is not available!\n"; - return; - } - cerr << "Basic block trace loading is not implemented yet!\n"; -} diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 0a8a87bd0f97..89d90bca2166 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -11,18 +11,27 @@ // loads the information from a profile dump file. // //===----------------------------------------------------------------------===// - +#define DEBUG_TYPE "profile-loader" #include "llvm/BasicBlock.h" #include "llvm/InstrTypes.h" +#include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ProfileInfoLoader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallSet.h" +#include <set> using namespace llvm; +STATISTIC(NumEdgesRead, "The # of edges read."); + static cl::opt<std::string> ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"), cl::value_desc("filename"), @@ -31,6 +40,9 @@ ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"), namespace { class VISIBILITY_HIDDEN LoaderPass : public ModulePass, public ProfileInfo { std::string Filename; + std::set<Edge> SpanningTree; + std::set<const BasicBlock*> BBisUnvisited; + unsigned ReadCount; public: static char ID; // Class identification, replacement for typeinfo explicit LoaderPass(const std::string &filename = "") @@ -46,6 +58,12 @@ namespace { return "Profiling information loader"; } + // recurseBasicBlock() - Calculates the edge weights for as much basic + // blocks as possbile. + virtual void recurseBasicBlock(const BasicBlock *BB); + virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, unsigned &); + virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&); + /// run - Load the profile information from the specified file. virtual bool runOnModule(Module &M); }; @@ -66,25 +84,210 @@ Pass *llvm::createProfileLoaderPass(const std::string &Filename) { return new LoaderPass(Filename); } +void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, + unsigned &uncalc, unsigned &count) { + double w; + if ((w = getEdgeWeight(edge)) == MissingValue) { + tocalc = edge; + uncalc++; + } else { + count+=w; + } +} + +// recurseBasicBlock - Visits all neighbours of a block and then tries to +// calculate the missing edge values. +void LoaderPass::recurseBasicBlock(const BasicBlock *BB) { + + // break recursion if already visited + if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return; + BBisUnvisited.erase(BB); + if (!BB) return; + + for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } + for (pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } + + Edge edgetocalc; + unsigned uncalculated = 0; + + // collect weights of all incoming and outgoing edges, rememer edges that + // have no value + unsigned incount = 0; + SmallSet<const BasicBlock*,8> pred_visited; + pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + if (bbi==bbe) { + readEdgeOrRemember(getEdge(0, BB),edgetocalc,uncalculated,incount); + } + for (;bbi != bbe; ++bbi) { + if (pred_visited.insert(*bbi)) { + readEdgeOrRemember(getEdge(*bbi, BB),edgetocalc,uncalculated,incount); + } + } + + unsigned outcount = 0; + SmallSet<const BasicBlock*,8> succ_visited; + succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); + if (sbbi==sbbe) { + readEdgeOrRemember(getEdge(BB, 0),edgetocalc,uncalculated,outcount); + } + for (;sbbi != sbbe; ++sbbi) { + if (succ_visited.insert(*sbbi)) { + readEdgeOrRemember(getEdge(BB, *sbbi),edgetocalc,uncalculated,outcount); + } + } + + // if exactly one edge weight was missing, calculate it and remove it from + // spanning tree + if (uncalculated == 1) { + if (incount < outcount) { + EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount; + } else { + EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount; + } + DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": " + << format("%g", getEdgeWeight(edgetocalc)) << "\n"); + SpanningTree.erase(edgetocalc); + } +} + +void LoaderPass::readEdge(ProfileInfo::Edge e, + std::vector<unsigned> &ECs) { + if (ReadCount < ECs.size()) { + double weight = ECs[ReadCount++]; + if (weight != ProfileInfoLoader::Uncounted) { + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also representable + // in double. + EdgeInformation[getFunction(e)][e] += (double)weight; + + DEBUG(errs() << "--Read Edge Counter for " << e + << " (# "<< (ReadCount-1) << "): " + << (unsigned)getEdgeWeight(e) << "\n"); + } else { + // This happens only if reading optimal profiling information, not when + // reading regular profiling information. + SpanningTree.insert(e); + } + } +} + bool LoaderPass::runOnModule(Module &M) { ProfileInfoLoader PIL("profile-loader", Filename, M); - EdgeCounts.clear(); - bool PrintedWarning = false; - - std::vector<std::pair<ProfileInfoLoader::Edge, unsigned> > ECs; - PIL.getEdgeCounts(ECs); - for (unsigned i = 0, e = ECs.size(); i != e; ++i) { - BasicBlock *BB = ECs[i].first.first; - unsigned SuccNum = ECs[i].first.second; - TerminatorInst *TI = BB->getTerminator(); - if (SuccNum >= TI->getNumSuccessors()) { - if (!PrintedWarning) { - cerr << "WARNING: profile information is inconsistent with " + + EdgeInformation.clear(); + std::vector<unsigned> Counters = PIL.getRawEdgeCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n"); + readEdge(getEdge(0,&F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); + } + } + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " << "the current program!\n"; - PrintedWarning = true; + } + NumEdgesRead = ReadCount; + } + + Counters = PIL.getRawOptimalEdgeCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n"); + readEdge(getEdge(0,&F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + readEdge(getEdge(BB,0), Counters); + } + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); + } } - } else { - EdgeCounts[std::make_pair(BB, TI->getSuccessor(SuccNum))]+= ECs[i].second; + while (SpanningTree.size() > 0) { +#if 0 + unsigned size = SpanningTree.size(); +#endif + BBisUnvisited.clear(); + for (std::set<Edge>::iterator ei = SpanningTree.begin(), + ee = SpanningTree.end(); ei != ee; ++ei) { + BBisUnvisited.insert(ei->first); + BBisUnvisited.insert(ei->second); + } + while (BBisUnvisited.size() > 0) { + recurseBasicBlock(*BBisUnvisited.begin()); + } +#if 0 + if (SpanningTree.size() == size) { + DEBUG(errs()<<"{"); + for (std::set<Edge>::iterator ei = SpanningTree.begin(), + ee = SpanningTree.end(); ei != ee; ++ei) { + DEBUG(errs()<<"("<<(ei->first?ei->first->getName():"0")<<"," + <<(ei->second?ei->second->getName():"0")<<"),"); + } + assert(0 && "No edge calculated!"); + } +#endif + } + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + NumEdgesRead = ReadCount; + } + + BlockInformation.clear(); + Counters = PIL.getRawBlockCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (ReadCount < Counters.size()) + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also + // representable in double. + BlockInformation[F][BB] = (double)Counters[ReadCount++]; + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + } + + FunctionInformation.clear(); + Counters = PIL.getRawFunctionCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + if (ReadCount < Counters.size()) + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also + // representable in double. + FunctionInformation[F] = (double)Counters[ReadCount++]; + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } } diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp new file mode 100644 index 000000000000..9766da5992df --- /dev/null +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -0,0 +1,343 @@ +//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that checks profiling information for +// plausibility. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-verifier" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" +#include <set> +using namespace llvm; + +static cl::opt<bool,false> +ProfileVerifierDisableAssertions("profile-verifier-noassert", + cl::desc("Disable assertions")); + +namespace { + class VISIBILITY_HIDDEN ProfileVerifierPass : public FunctionPass { + + struct DetailedBlockInfo { + const BasicBlock *BB; + double BBWeight; + double inWeight; + int inCount; + double outWeight; + int outCount; + }; + + ProfileInfo *PI; + std::set<const BasicBlock*> BBisVisited; + std::set<const Function*> FisVisited; + bool DisableAssertions; + + // When debugging is enabled, the verifier prints a whole slew of debug + // information, otherwise its just the assert. These are all the helper + // functions. + bool PrintedDebugTree; + std::set<const BasicBlock*> BBisPrinted; + void debugEntry(DetailedBlockInfo*); + void printDebugInfo(const BasicBlock *BB); + + public: + static char ID; // Class identification, replacement for typeinfo + + explicit ProfileVerifierPass () : FunctionPass(&ID) { + DisableAssertions = ProfileVerifierDisableAssertions; + } + explicit ProfileVerifierPass (bool da) : FunctionPass(&ID), + DisableAssertions(da) { + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<ProfileInfo>(); + } + + const char *getPassName() const { + return "Profiling information verifier"; + } + + /// run - Verify the profile information. + bool runOnFunction(Function &F); + void recurseBasicBlock(const BasicBlock*); + + bool exitReachable(const Function*); + double ReadOrAssert(ProfileInfo::Edge); + void CheckValue(bool, const char*, DetailedBlockInfo*); + }; +} // End of anonymous namespace + +char ProfileVerifierPass::ID = 0; +static RegisterPass<ProfileVerifierPass> +X("profile-verifier", "Verify profiling information", false, true); + +namespace llvm { + FunctionPass *createProfileVerifierPass() { + return new ProfileVerifierPass(ProfileVerifierDisableAssertions); + } +} + +void ProfileVerifierPass::printDebugInfo(const BasicBlock *BB) { + + if (BBisPrinted.find(BB) != BBisPrinted.end()) return; + + double BBWeight = PI->getExecutionCount(BB); + if (BBWeight == ProfileInfo::MissingValue) { BBWeight = 0; } + double inWeight = 0; + int inCount = 0; + std::set<const BasicBlock*> ProcessedPreds; + for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedPreds.insert(*bbi).second) { + ProfileInfo::Edge E = PI->getEdge(*bbi,BB); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; } + errs() << "calculated in-edge " << E << ": " << EdgeWeight << "\n"; + inWeight += EdgeWeight; + inCount++; + } + } + double outWeight = 0; + int outCount = 0; + std::set<const BasicBlock*> ProcessedSuccs; + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedSuccs.insert(*bbi).second) { + ProfileInfo::Edge E = PI->getEdge(BB,*bbi); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; } + errs() << "calculated out-edge " << E << ": " << EdgeWeight << "\n"; + outWeight += EdgeWeight; + outCount++; + } + } + errs()<<"Block "<<BB->getNameStr()<<" in "<<BB->getParent()->getNameStr() + <<",BBWeight="<<BBWeight<<",inWeight="<<inWeight<<",inCount="<<inCount + <<",outWeight="<<outWeight<<",outCount"<<outCount<<"\n"; + + // mark as visited and recurse into subnodes + BBisPrinted.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + printDebugInfo(*bbi); + } +} + +void ProfileVerifierPass::debugEntry (DetailedBlockInfo *DI) { + errs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in " + << DI->BB->getParent()->getNameStr() << ":"; + errs() << "BBWeight=" << DI->BBWeight << ","; + errs() << "inWeight=" << DI->inWeight << ","; + errs() << "inCount=" << DI->inCount << ","; + errs() << "outWeight=" << DI->outWeight << ","; + errs() << "outCount=" << DI->outCount << "\n"; + if (!PrintedDebugTree) { + PrintedDebugTree = true; + printDebugInfo(&(DI->BB->getParent()->getEntryBlock())); + } +} + +// This compares A and B but considering maybe small differences. +static bool Equals(double A, double B) { + double maxRelativeError = 0.0000001; + if (A == B) + return true; + double relativeError; + if (fabs(B) > fabs(A)) + relativeError = fabs((A - B) / B); + else + relativeError = fabs((A - B) / A); + if (relativeError <= maxRelativeError) return true; + return false; +} + +// This checks if the function "exit" is reachable from an given function +// via calls, this is necessary to check if a profile is valid despite the +// counts not fitting exactly. +bool ProfileVerifierPass::exitReachable(const Function *F) { + if (!F) return false; + + if (FisVisited.count(F)) return false; + + Function *Exit = F->getParent()->getFunction("exit"); + if (Exit == F) { + return true; + } + + FisVisited.insert(F); + bool exits = false; + for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (const CallInst *CI = dyn_cast<CallInst>(&*I)) { + exits |= exitReachable(CI->getCalledFunction()); + if (exits) break; + } + } + return exits; +} + +#define ASSERTMESSAGE(M) \ + errs() << (M) << "\n"; \ + if (!DisableAssertions) assert(0 && (M)); + +double ProfileVerifierPass::ReadOrAssert(ProfileInfo::Edge E) { + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfo::MissingValue) { + errs() << "Edge " << E << " in Function " + << ProfileInfo::getFunction(E)->getNameStr() << ": "; + ASSERTMESSAGE("ASSERT:Edge has missing value"); + return 0; + } else { + return EdgeWeight; + } +} + +void ProfileVerifierPass::CheckValue(bool Error, const char *Message, + DetailedBlockInfo *DI) { + if (Error) { + DEBUG(debugEntry(DI)); + errs() << "Block " << DI->BB->getNameStr() << " in Function " + << DI->BB->getParent()->getNameStr() << ": "; + ASSERTMESSAGE(Message); + } + return; +} + +// This calculates the Information for a block and then recurses into the +// successors. +void ProfileVerifierPass::recurseBasicBlock(const BasicBlock *BB) { + + // Break the recursion by remembering all visited blocks. + if (BBisVisited.find(BB) != BBisVisited.end()) return; + + // Use a data structure to store all the information, this can then be handed + // to debug printers. + DetailedBlockInfo DI; + DI.BB = BB; + DI.outCount = DI.inCount = DI.inWeight = DI.outWeight = 0; + + // Read predecessors. + std::set<const BasicBlock*> ProcessedPreds; + pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB); + // If there are none, check for (0,BB) edge. + if (bpi == bpe) { + DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); + DI.inCount++; + } + for (;bpi != bpe; ++bpi) { + if (ProcessedPreds.insert(*bpi).second) { + DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB)); + DI.inCount++; + } + } + + // Read successors. + std::set<const BasicBlock*> ProcessedSuccs; + succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + // If there is an (0,BB) edge, consider it too. (This is done not only when + // there are no successors, but every time; not every function contains + // return blocks with no successors (think loop latch as return block)). + double w = PI->getEdgeWeight(PI->getEdge(BB,0)); + if (w != ProfileInfo::MissingValue) { + DI.outWeight += w; + DI.outCount++; + } + for (;bbi != bbe; ++bbi) { + if (ProcessedSuccs.insert(*bbi).second) { + DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi)); + DI.outCount++; + } + } + + // Read block weight. + DI.BBWeight = PI->getExecutionCount(BB); + CheckValue(DI.BBWeight == ProfileInfo::MissingValue, + "ASSERT:BasicBlock has missing value", &DI); + + // Check if this block is a setjmp target. + bool isSetJmpTarget = false; + if (DI.outWeight > DI.inWeight) { + for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { + Function *F = CI->getCalledFunction(); + if (F && (F->getNameStr() == "_setjmp")) { + isSetJmpTarget = true; break; + } + } + } + } + // Check if this block is eventually reaching exit. + bool isExitReachable = false; + if (DI.inWeight > DI.outWeight) { + for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { + FisVisited.clear(); + isExitReachable |= exitReachable(CI->getCalledFunction()); + if (isExitReachable) break; + } + } + } + + if (DI.inCount > 0 && DI.outCount == 0) { + // If this is a block with no successors. + if (!isSetJmpTarget) { + CheckValue(!Equals(DI.inWeight,DI.BBWeight), + "ASSERT:inWeight and BBWeight do not match", &DI); + } + } else if (DI.inCount == 0 && DI.outCount > 0) { + // If this is a block with no predecessors. + if (!isExitReachable) + CheckValue(!Equals(DI.BBWeight,DI.outWeight), + "ASSERT:BBWeight and outWeight do not match", &DI); + } else { + // If this block has successors and predecessors. + if (DI.inWeight > DI.outWeight && !isExitReachable) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "ASSERT:inWeight and outWeight do not match", &DI); + if (DI.inWeight < DI.outWeight && !isSetJmpTarget) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "ASSERT:inWeight and outWeight do not match", &DI); + } + + + // Mark this block as visited, rescurse into successors. + BBisVisited.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + recurseBasicBlock(*bbi); + } +} + +bool ProfileVerifierPass::runOnFunction(Function &F) { + PI = &getAnalysis<ProfileInfo>(); + + // Prepare global variables. + PrintedDebugTree = false; + BBisVisited.clear(); + + // Fetch entry block and recurse into it. + const BasicBlock *entry = &F.getEntryBlock(); + recurseBasicBlock(entry); + + if (!DisableAssertions) + assert((PI->getExecutionCount(&F)==PI->getExecutionCount(entry)) && + "Function count and entry block count do not match"); + return false; +} diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt new file mode 100644 index 000000000000..c40109027299 --- /dev/null +++ b/lib/Analysis/README.txt @@ -0,0 +1,18 @@ +Analysis Opportunities: + +//===---------------------------------------------------------------------===// + +In test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll, the +ScalarEvolution expression for %r is this: + + {1,+,3,+,2}<loop> + +Outside the loop, this could be evaluated simply as (%n * %n), however +ScalarEvolution currently evaluates it as + + (-2 + (2 * (trunc i65 (((zext i64 (-2 + %n) to i65) * (zext i64 (-1 + %n) to i65)) /u 2) to i64)) + (3 * %n)) + +In addition to being much more complicated, it involves i65 arithmetic, +which is very inefficient when expanded into code. + +//===---------------------------------------------------------------------===// diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 408156265d24..62f3aa1dcae4 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -14,9 +14,8 @@ // There are several aspects to this library. First is the representation of // scalar expressions, which are represented as subclasses of the SCEV class. // These classes are used to represent certain types of subexpressions that we -// can handle. These classes are reference counted, managed by the const SCEV* -// class. We only create one SCEV of a particular shape, so pointer-comparisons -// for equality are legal. +// can handle. We only create one SCEV of a particular shape, so +// pointer-comparisons for equality are legal. // // One important aspect of the SCEV objects is that they are never cyclic, even // if there is a cycle in the dataflow for an expression (ie, a PHI node). If @@ -64,7 +63,10 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" @@ -74,12 +76,14 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ConstantRange.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include <algorithm> using namespace llvm; @@ -118,11 +122,6 @@ void SCEV::dump() const { errs() << '\n'; } -void SCEV::print(std::ostream &o) const { - raw_os_ostream OS(o); - print(OS); -} - bool SCEV::isZero() const { if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) return SC->getValue()->isZero(); @@ -142,33 +141,26 @@ bool SCEV::isAllOnesValue() const { } SCEVCouldNotCompute::SCEVCouldNotCompute() : - SCEV(scCouldNotCompute) {} - -void SCEVCouldNotCompute::Profile(FoldingSetNodeID &ID) const { - assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); -} + SCEV(FoldingSetNodeID(), scCouldNotCompute) {} bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const { - assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); return false; } const Type *SCEVCouldNotCompute::getType() const { - assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); return 0; } bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const { - assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); return false; } -const SCEV * -SCEVCouldNotCompute::replaceSymbolicValuesWithConcrete( - const SCEV *Sym, - const SCEV *Conc, - ScalarEvolution &SE) const { - return this; +bool SCEVCouldNotCompute::hasOperand(const SCEV *) const { + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return false; } void SCEVCouldNotCompute::print(raw_ostream &OS) const { @@ -179,30 +171,26 @@ bool SCEVCouldNotCompute::classof(const SCEV *S) { return S->getSCEVType() == scCouldNotCompute; } -const SCEV* ScalarEvolution::getConstant(ConstantInt *V) { +const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { FoldingSetNodeID ID; ID.AddInteger(scConstant); ID.AddPointer(V); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate<SCEVConstant>(); - new (S) SCEVConstant(V); + new (S) SCEVConstant(ID, V); UniqueSCEVs.InsertNode(S, IP); return S; } -const SCEV* ScalarEvolution::getConstant(const APInt& Val) { - return getConstant(ConstantInt::get(Val)); +const SCEV *ScalarEvolution::getConstant(const APInt& Val) { + return getConstant(ConstantInt::get(getContext(), Val)); } -const SCEV* +const SCEV * ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { - return getConstant(ConstantInt::get(cast<IntegerType>(Ty), V, isSigned)); -} - -void SCEVConstant::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(scConstant); - ID.AddPointer(V); + return getConstant( + ConstantInt::get(cast<IntegerType>(Ty), V, isSigned)); } const Type *SCEVConstant::getType() const { return V->getType(); } @@ -211,22 +199,21 @@ void SCEVConstant::print(raw_ostream &OS) const { WriteAsOperand(OS, V, false); } -SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy, - const SCEV* op, const Type *ty) - : SCEV(SCEVTy), Op(op), Ty(ty) {} - -void SCEVCastExpr::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(getSCEVType()); - ID.AddPointer(Op); - ID.AddPointer(Ty); -} +SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeID &ID, + unsigned SCEVTy, const SCEV *op, const Type *ty) + : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { return Op->dominates(BB, DT); } -SCEVTruncateExpr::SCEVTruncateExpr(const SCEV* op, const Type *ty) - : SCEVCastExpr(scTruncate, op, ty) { +bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + return Op->properlyDominates(BB, DT); +} + +SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scTruncate, op, ty) { assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot truncate non-integer value!"); @@ -236,8 +223,9 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const { OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; } -SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEV* op, const Type *ty) - : SCEVCastExpr(scZeroExtend, op, ty) { +SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scZeroExtend, op, ty) { assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot zero extend non-integer value!"); @@ -247,8 +235,9 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const { OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; } -SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEV* op, const Type *ty) - : SCEVCastExpr(scSignExtend, op, ty) { +SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scSignExtend, op, ty) { assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot sign extend non-integer value!"); @@ -267,46 +256,6 @@ void SCEVCommutativeExpr::print(raw_ostream &OS) const { OS << ")"; } -const SCEV * -SCEVCommutativeExpr::replaceSymbolicValuesWithConcrete( - const SCEV *Sym, - const SCEV *Conc, - ScalarEvolution &SE) const { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const SCEV* H = - getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); - if (H != getOperand(i)) { - SmallVector<const SCEV*, 8> NewOps; - NewOps.reserve(getNumOperands()); - for (unsigned j = 0; j != i; ++j) - NewOps.push_back(getOperand(j)); - NewOps.push_back(H); - for (++i; i != e; ++i) - NewOps.push_back(getOperand(i)-> - replaceSymbolicValuesWithConcrete(Sym, Conc, SE)); - - if (isa<SCEVAddExpr>(this)) - return SE.getAddExpr(NewOps); - else if (isa<SCEVMulExpr>(this)) - return SE.getMulExpr(NewOps); - else if (isa<SCEVSMaxExpr>(this)) - return SE.getSMaxExpr(NewOps); - else if (isa<SCEVUMaxExpr>(this)) - return SE.getUMaxExpr(NewOps); - else - assert(0 && "Unknown commutative expr!"); - } - } - return this; -} - -void SCEVNAryExpr::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(getSCEVType()); - ID.AddInteger(Operands.size()); - for (unsigned i = 0, e = Operands.size(); i != e; ++i) - ID.AddPointer(Operands[i]); -} - bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { if (!getOperand(i)->dominates(BB, DT)) @@ -315,16 +264,22 @@ bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { return true; } -void SCEVUDivExpr::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(scUDivExpr); - ID.AddPointer(LHS); - ID.AddPointer(RHS); +bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (!getOperand(i)->properlyDominates(BB, DT)) + return false; + } + return true; } bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { return LHS->dominates(BB, DT) && RHS->dominates(BB, DT); } +bool SCEVUDivExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + return LHS->properlyDominates(BB, DT) && RHS->properlyDominates(BB, DT); +} + void SCEVUDivExpr::print(raw_ostream &OS) const { OS << "(" << *LHS << " /u " << *RHS << ")"; } @@ -338,38 +293,6 @@ const Type *SCEVUDivExpr::getType() const { return RHS->getType(); } -void SCEVAddRecExpr::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(scAddRecExpr); - ID.AddInteger(Operands.size()); - for (unsigned i = 0, e = Operands.size(); i != e; ++i) - ID.AddPointer(Operands[i]); - ID.AddPointer(L); -} - -const SCEV * -SCEVAddRecExpr::replaceSymbolicValuesWithConcrete(const SCEV *Sym, - const SCEV *Conc, - ScalarEvolution &SE) const { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const SCEV* H = - getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); - if (H != getOperand(i)) { - SmallVector<const SCEV*, 8> NewOps; - NewOps.reserve(getNumOperands()); - for (unsigned j = 0; j != i; ++j) - NewOps.push_back(getOperand(j)); - NewOps.push_back(H); - for (++i; i != e; ++i) - NewOps.push_back(getOperand(i)-> - replaceSymbolicValuesWithConcrete(Sym, Conc, SE)); - - return SE.getAddRecExpr(NewOps, L); - } - } - return this; -} - - bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const { // Add recurrences are never invariant in the function-body (null loop). if (!QueryLoop) @@ -396,9 +319,13 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const { OS << "}<" << L->getHeader()->getName() + ">"; } -void SCEVUnknown::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(scUnknown); - ID.AddPointer(V); +void SCEVFieldOffsetExpr::print(raw_ostream &OS) const { + // LLVM struct fields don't have names, so just print the field number. + OS << "offsetof(" << *STy << ", " << FieldNo << ")"; +} + +void SCEVAllocSizeExpr::print(raw_ostream &OS) const { + OS << "sizeof(" << *AllocTy << ")"; } bool SCEVUnknown::isLoopInvariant(const Loop *L) const { @@ -417,6 +344,12 @@ bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const { return true; } +bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + if (Instruction *I = dyn_cast<Instruction>(getValue())) + return DT->properlyDominates(I->getParent(), BB); + return true; +} + const Type *SCEVUnknown::getType() const { return V->getType(); } @@ -429,6 +362,41 @@ void SCEVUnknown::print(raw_ostream &OS) const { // SCEV Utilities //===----------------------------------------------------------------------===// +static bool CompareTypes(const Type *A, const Type *B) { + if (A->getTypeID() != B->getTypeID()) + return A->getTypeID() < B->getTypeID(); + if (const IntegerType *AI = dyn_cast<IntegerType>(A)) { + const IntegerType *BI = cast<IntegerType>(B); + return AI->getBitWidth() < BI->getBitWidth(); + } + if (const PointerType *AI = dyn_cast<PointerType>(A)) { + const PointerType *BI = cast<PointerType>(B); + return CompareTypes(AI->getElementType(), BI->getElementType()); + } + if (const ArrayType *AI = dyn_cast<ArrayType>(A)) { + const ArrayType *BI = cast<ArrayType>(B); + if (AI->getNumElements() != BI->getNumElements()) + return AI->getNumElements() < BI->getNumElements(); + return CompareTypes(AI->getElementType(), BI->getElementType()); + } + if (const VectorType *AI = dyn_cast<VectorType>(A)) { + const VectorType *BI = cast<VectorType>(B); + if (AI->getNumElements() != BI->getNumElements()) + return AI->getNumElements() < BI->getNumElements(); + return CompareTypes(AI->getElementType(), BI->getElementType()); + } + if (const StructType *AI = dyn_cast<StructType>(A)) { + const StructType *BI = cast<StructType>(B); + if (AI->getNumElements() != BI->getNumElements()) + return AI->getNumElements() < BI->getNumElements(); + for (unsigned i = 0, e = AI->getNumElements(); i != e; ++i) + if (CompareTypes(AI->getElementType(i), BI->getElementType(i)) || + CompareTypes(BI->getElementType(i), AI->getElementType(i))) + return CompareTypes(AI->getElementType(i), BI->getElementType(i)); + } + return false; +} + namespace { /// SCEVComplexityCompare - Return true if the complexity of the LHS is less /// than the complexity of the RHS. This comparator is used to canonicalize @@ -439,6 +407,10 @@ namespace { explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {} bool operator()(const SCEV *LHS, const SCEV *RHS) const { + // Fast-path: SCEVs are uniqued so we can do a quick equality check. + if (LHS == RHS) + return false; + // Primarily, sort the SCEVs by their getSCEVType(). if (LHS->getSCEVType() != RHS->getSCEVType()) return LHS->getSCEVType() < RHS->getSCEVType(); @@ -495,6 +467,8 @@ namespace { // Compare constant values. if (const SCEVConstant *LC = dyn_cast<SCEVConstant>(LHS)) { const SCEVConstant *RC = cast<SCEVConstant>(RHS); + if (LC->getValue()->getBitWidth() != RC->getValue()->getBitWidth()) + return LC->getValue()->getBitWidth() < RC->getValue()->getBitWidth(); return LC->getValue()->getValue().ult(RC->getValue()->getValue()); } @@ -539,7 +513,22 @@ namespace { return operator()(LC->getOperand(), RC->getOperand()); } - assert(0 && "Unknown SCEV kind!"); + // Compare offsetof expressions. + if (const SCEVFieldOffsetExpr *LA = dyn_cast<SCEVFieldOffsetExpr>(LHS)) { + const SCEVFieldOffsetExpr *RA = cast<SCEVFieldOffsetExpr>(RHS); + if (CompareTypes(LA->getStructType(), RA->getStructType()) || + CompareTypes(RA->getStructType(), LA->getStructType())) + return CompareTypes(LA->getStructType(), RA->getStructType()); + return LA->getFieldNo() < RA->getFieldNo(); + } + + // Compare sizeof expressions by the allocation type. + if (const SCEVAllocSizeExpr *LA = dyn_cast<SCEVAllocSizeExpr>(LHS)) { + const SCEVAllocSizeExpr *RA = cast<SCEVAllocSizeExpr>(RHS); + return CompareTypes(LA->getAllocType(), RA->getAllocType()); + } + + llvm_unreachable("Unknown SCEV kind!"); return false; } }; @@ -555,7 +544,7 @@ namespace { /// this to depend on where the addresses of various SCEV objects happened to /// land in memory. /// -static void GroupByComplexity(SmallVectorImpl<const SCEV*> &Ops, +static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, LoopInfo *LI) { if (Ops.size() < 2) return; // Noop if (Ops.size() == 2) { @@ -598,9 +587,9 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV*> &Ops, /// BinomialCoefficient - Compute BC(It, K). The result has width W. /// Assume, K > 0. -static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K, - ScalarEvolution &SE, - const Type* ResultTy) { +static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, + ScalarEvolution &SE, + const Type* ResultTy) { // Handle the simplest case efficiently. if (K == 1) return SE.getTruncateOrZeroExtend(It, ResultTy); @@ -690,16 +679,17 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K, MultiplyFactor = MultiplyFactor.trunc(W); // Calculate the product, at width T+W - const IntegerType *CalculationTy = IntegerType::get(CalculationBits); - const SCEV* Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); + const IntegerType *CalculationTy = IntegerType::get(SE.getContext(), + CalculationBits); + const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); for (unsigned i = 1; i != K; ++i) { - const SCEV* S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType())); + const SCEV *S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType())); Dividend = SE.getMulExpr(Dividend, SE.getTruncateOrZeroExtend(S, CalculationTy)); } // Divide by 2^T - const SCEV* DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); + const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); // Truncate the result, and divide by K! / 2^T. @@ -716,14 +706,14 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K, /// /// where BC(It, k) stands for binomial coefficient. /// -const SCEV* SCEVAddRecExpr::evaluateAtIteration(const SCEV* It, - ScalarEvolution &SE) const { - const SCEV* Result = getStart(); +const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, + ScalarEvolution &SE) const { + const SCEV *Result = getStart(); for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { // The computation is correct in the face of overflow provided that the // multiplication is performed _after_ the evaluation of the binomial // coefficient. - const SCEV* Coeff = BinomialCoefficient(It, i, SE, getType()); + const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType()); if (isa<SCEVCouldNotCompute>(Coeff)) return Coeff; @@ -736,14 +726,21 @@ const SCEV* SCEVAddRecExpr::evaluateAtIteration(const SCEV* It, // SCEV Expression folder implementations //===----------------------------------------------------------------------===// -const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op, - const Type *Ty) { +const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && "This is not a truncating conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); Ty = getEffectiveSCEVType(Ty); + FoldingSetNodeID ID; + ID.AddInteger(scTruncate); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) return getConstant( @@ -763,26 +760,23 @@ const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op, // If the input value is a chrec scev, truncate the chrec's operands. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { - SmallVector<const SCEV*, 4> Operands; + SmallVector<const SCEV *, 4> Operands; for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); return getAddRecExpr(Operands, AddRec->getLoop()); } - FoldingSetNodeID ID; - ID.AddInteger(scTruncate); - ID.AddPointer(Op); - ID.AddPointer(Ty); - void *IP = 0; + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate<SCEVTruncateExpr>(); - new (S) SCEVTruncateExpr(Op, Ty); + new (S) SCEVTruncateExpr(ID, Op, Ty); UniqueSCEVs.InsertNode(S, IP); return S; } -const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, - const Type *Ty) { +const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -801,12 +795,33 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) return getZeroExtendExpr(SZ->getOperand(), Ty); + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scZeroExtend); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can zero extend all of the // operands (often constants). This allows analysis of something like // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->hasNoUnsignedWrap()) + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is @@ -815,28 +830,25 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - const SCEV* MaxBECount = getMaxBackedgeTakenCount(AR->getLoop()); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); if (!isa<SCEVCouldNotCompute>(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. - const SCEV* Start = AR->getStart(); - const SCEV* Step = AR->getStepRecurrence(*this); // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. - const SCEV* CastedMaxBECount = + const SCEV *CastedMaxBECount = getTruncateOrZeroExtend(MaxBECount, Start->getType()); - const SCEV* RecastedMaxBECount = + const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { - const Type *WideTy = - IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. - const SCEV* ZMul = + const SCEV *ZMul = getMulExpr(CastedMaxBECount, getTruncateOrZeroExtend(Step, Start->getType())); - const SCEV* Add = getAddExpr(Start, ZMul); - const SCEV* OperandExtendedAdd = + const SCEV *Add = getAddExpr(Start, ZMul); + const SCEV *OperandExtendedAdd = getAddExpr(getZeroExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getZeroExtendExpr(Step, WideTy))); @@ -844,11 +856,11 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getZeroExtendExpr(Step, Ty), - AR->getLoop()); + L); // Similar to above, only this time treat the step value as signed. // This covers loops that count down. - const SCEV* SMul = + const SCEV *SMul = getMulExpr(CastedMaxBECount, getTruncateOrSignExtend(Step, Start->getType())); Add = getAddExpr(Start, SMul); @@ -860,25 +872,50 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - AR->getLoop()); + L); + } + + // If the backedge is guarded by a comparison with the pre-inc value + // the addrec is safe. Also, if the entry is guarded by a comparison + // with the start value and the backedge is guarded by a comparison + // with the post-inc value, the addrec is safe. + if (isKnownPositive(Step)) { + const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - + getUnsignedRange(Step).getUnsignedMax()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || + (isLoopGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + } else if (isKnownNegative(Step)) { + const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - + getSignedRange(Step).getSignedMin()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) && + (isLoopGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) || + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); } } } - FoldingSetNodeID ID; - ID.AddInteger(scZeroExtend); - ID.AddPointer(Op); - ID.AddPointer(Ty); - void *IP = 0; + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate<SCEVZeroExtendExpr>(); - new (S) SCEVZeroExtendExpr(Op, Ty); + new (S) SCEVZeroExtendExpr(ID, Op, Ty); UniqueSCEVs.InsertNode(S, IP); return S; } -const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, - const Type *Ty) { +const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -897,12 +934,33 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) return getSignExtendExpr(SS->getOperand(), Ty); + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scSignExtend); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the // operands (often constants). This allows analysis of something like // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->hasNoSignedWrap()) + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is @@ -911,28 +969,25 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - const SCEV* MaxBECount = getMaxBackedgeTakenCount(AR->getLoop()); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); if (!isa<SCEVCouldNotCompute>(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. - const SCEV* Start = AR->getStart(); - const SCEV* Step = AR->getStepRecurrence(*this); // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. - const SCEV* CastedMaxBECount = + const SCEV *CastedMaxBECount = getTruncateOrZeroExtend(MaxBECount, Start->getType()); - const SCEV* RecastedMaxBECount = + const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { - const Type *WideTy = - IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. - const SCEV* SMul = + const SCEV *SMul = getMulExpr(CastedMaxBECount, getTruncateOrSignExtend(Step, Start->getType())); - const SCEV* Add = getAddExpr(Start, SMul); - const SCEV* OperandExtendedAdd = + const SCEV *Add = getAddExpr(Start, SMul); + const SCEV *OperandExtendedAdd = getAddExpr(getSignExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getSignExtendExpr(Step, WideTy))); @@ -940,19 +995,60 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, // Return the expression with the addrec on the outside. return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - AR->getLoop()); + L); + + // Similar to above, only this time treat the step value as unsigned. + // This covers loops that count up with an unsigned step. + const SCEV *UMul = + getMulExpr(CastedMaxBECount, + getTruncateOrZeroExtend(Step, Start->getType())); + Add = getAddExpr(Start, UMul); + OperandExtendedAdd = + getAddExpr(getSignExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getZeroExtendExpr(Step, WideTy))); + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + } + + // If the backedge is guarded by a comparison with the pre-inc value + // the addrec is safe. Also, if the entry is guarded by a comparison + // with the start value and the backedge is guarded by a comparison + // with the post-inc value, the addrec is safe. + if (isKnownPositive(Step)) { + const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) - + getSignedRange(Step).getSignedMax()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) || + (isLoopGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + } else if (isKnownNegative(Step)) { + const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) - + getSignedRange(Step).getSignedMin()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) || + (isLoopGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); } } } - FoldingSetNodeID ID; - ID.AddInteger(scSignExtend); - ID.AddPointer(Op); - ID.AddPointer(Ty); - void *IP = 0; + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate<SCEVSignExtendExpr>(); - new (S) SCEVSignExtendExpr(Op, Ty); + new (S) SCEVSignExtendExpr(ID, Op, Ty); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -960,8 +1056,8 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, /// getAnyExtendExpr - Return a SCEV for the given operand extended with /// unspecified bits out to the given type. /// -const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op, - const Type *Ty) { +const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -975,19 +1071,19 @@ const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op, // Peel off a truncate cast. if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) { - const SCEV* NewOp = T->getOperand(); + const SCEV *NewOp = T->getOperand(); if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) return getAnyExtendExpr(NewOp, Ty); return getTruncateOrNoop(NewOp, Ty); } // Next try a zext cast. If the cast is folded, use it. - const SCEV* ZExt = getZeroExtendExpr(Op, Ty); + const SCEV *ZExt = getZeroExtendExpr(Op, Ty); if (!isa<SCEVZeroExtendExpr>(ZExt)) return ZExt; // Next try a sext cast. If the cast is folded, use it. - const SCEV* SExt = getSignExtendExpr(Op, Ty); + const SCEV *SExt = getSignExtendExpr(Op, Ty); if (!isa<SCEVSignExtendExpr>(SExt)) return SExt; @@ -1025,10 +1121,10 @@ const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op, /// is also used as a check to avoid infinite recursion. /// static bool -CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M, - SmallVector<const SCEV*, 8> &NewOps, +CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, + SmallVector<const SCEV *, 8> &NewOps, APInt &AccumulatedConstant, - const SmallVectorImpl<const SCEV*> &Ops, + const SmallVectorImpl<const SCEV *> &Ops, const APInt &Scale, ScalarEvolution &SE) { bool Interesting = false; @@ -1049,9 +1145,9 @@ CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M, } else { // A multiplication of a constant with some other value. Update // the map. - SmallVector<const SCEV*, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); - const SCEV* Key = SE.getMulExpr(MulOps); - std::pair<DenseMap<const SCEV*, APInt>::iterator, bool> Pair = + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); + const SCEV *Key = SE.getMulExpr(MulOps); + std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = M.insert(std::make_pair(Key, NewScale)); if (Pair.second) { NewOps.push_back(Pair.first->first); @@ -1069,7 +1165,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M, AccumulatedConstant += Scale * C->getValue()->getValue(); } else { // An ordinary operand. Update the map. - std::pair<DenseMap<const SCEV*, APInt>::iterator, bool> Pair = + std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = M.insert(std::make_pair(Ops[i], Scale)); if (Pair.second) { NewOps.push_back(Pair.first->first); @@ -1095,7 +1191,8 @@ namespace { /// getAddExpr - Get a canonical add expression, or something simpler if /// possible. -const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { +const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, + bool HasNUW, bool HasNSW) { assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1139,13 +1236,13 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 // Found a match, merge the two values into a multiply, and add any // remaining values to the result. - const SCEV* Two = getIntegerSCEV(2, Ty); - const SCEV* Mul = getMulExpr(Ops[i], Two); + const SCEV *Two = getIntegerSCEV(2, Ty); + const SCEV *Mul = getMulExpr(Ops[i], Two); if (Ops.size() == 2) return Mul; Ops.erase(Ops.begin()+i, Ops.begin()+i+2); Ops.push_back(Mul); - return getAddExpr(Ops); + return getAddExpr(Ops, HasNUW, HasNSW); } // Check for truncates. If all the operands are truncated from the same @@ -1156,7 +1253,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); const Type *DstType = Trunc->getType(); const Type *SrcType = Trunc->getOperand()->getType(); - SmallVector<const SCEV*, 8> LargeOps; + SmallVector<const SCEV *, 8> LargeOps; bool Ok = true; // Check all the operands to see if they can be represented in the // source type of the truncate. @@ -1172,7 +1269,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { // is much more likely to be foldable here. LargeOps.push_back(getSignExtendExpr(C, SrcType)); } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { - SmallVector<const SCEV*, 8> LargeMulOps; + SmallVector<const SCEV *, 8> LargeMulOps; for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) { @@ -1200,7 +1297,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { } if (Ok) { // Evaluate the expression in the larger type. - const SCEV* Fold = getAddExpr(LargeOps); + const SCEV *Fold = getAddExpr(LargeOps, HasNUW, HasNSW); // If it folds to something simple, use it. Otherwise, don't. if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) return getTruncateExpr(Fold, DstType); @@ -1237,16 +1334,16 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { // operands multiplied by constant values. if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) { uint64_t BitWidth = getTypeSizeInBits(Ty); - DenseMap<const SCEV*, APInt> M; - SmallVector<const SCEV*, 8> NewOps; + DenseMap<const SCEV *, APInt> M; + SmallVector<const SCEV *, 8> NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Ops, APInt(BitWidth, 1), *this)) { // Some interesting folding opportunity is present, so its worthwhile to // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. - std::map<APInt, SmallVector<const SCEV*, 4>, APIntCompare> MulOpLists; - for (SmallVector<const SCEV*, 8>::iterator I = NewOps.begin(), + std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; + for (SmallVector<const SCEV *, 8>::iterator I = NewOps.begin(), E = NewOps.end(); I != E; ++I) MulOpLists[M.find(*I)->second].push_back(*I); // Re-generate the operands list. @@ -1276,17 +1373,17 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) { // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) - const SCEV* InnerMul = Mul->getOperand(MulOp == 0); + const SCEV *InnerMul = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { // If the multiply has more than two operands, we must get the // Y*Z term. - SmallVector<const SCEV*, 4> MulOps(Mul->op_begin(), Mul->op_end()); + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), Mul->op_end()); MulOps.erase(MulOps.begin()+MulOp); InnerMul = getMulExpr(MulOps); } - const SCEV* One = getIntegerSCEV(1, Ty); - const SCEV* AddOne = getAddExpr(InnerMul, One); - const SCEV* OuterMul = getMulExpr(AddOne, Ops[AddOp]); + const SCEV *One = getIntegerSCEV(1, Ty); + const SCEV *AddOne = getAddExpr(InnerMul, One); + const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]); if (Ops.size() == 2) return OuterMul; if (AddOp < Idx) { Ops.erase(Ops.begin()+AddOp); @@ -1310,22 +1407,22 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { OMulOp != e; ++OMulOp) if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) - const SCEV* InnerMul1 = Mul->getOperand(MulOp == 0); + const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), Mul->op_end()); MulOps.erase(MulOps.begin()+MulOp); InnerMul1 = getMulExpr(MulOps); } - const SCEV* InnerMul2 = OtherMul->getOperand(OMulOp == 0); + const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); if (OtherMul->getNumOperands() != 2) { SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(), OtherMul->op_end()); MulOps.erase(MulOps.begin()+OMulOp); InnerMul2 = getMulExpr(MulOps); } - const SCEV* InnerMulSum = getAddExpr(InnerMul1,InnerMul2); - const SCEV* OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2); + const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); if (Ops.size() == 2) return OuterMul; Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+OtherMulIdx-1); @@ -1346,7 +1443,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { // Scan all of the other operands to this add and add them to the vector if // they are loop invariant w.r.t. the recurrence. - SmallVector<const SCEV*, 8> LIOps; + SmallVector<const SCEV *, 8> LIOps; const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { @@ -1360,11 +1457,11 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} LIOps.push_back(AddRec->getStart()); - SmallVector<const SCEV*, 4> AddRecOps(AddRec->op_begin(), + SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), AddRec->op_end()); AddRecOps[0] = getAddExpr(LIOps); - const SCEV* NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1396,7 +1493,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { } NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i)); } - const SCEV* NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop()); + const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop()); if (Ops.size() == 2) return NewAddRec; @@ -1420,16 +1517,19 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { ID.AddPointer(Ops[i]); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVAddExpr>(); - new (S) SCEVAddExpr(Ops); + SCEVAddExpr *S = SCEVAllocator.Allocate<SCEVAddExpr>(); + new (S) SCEVAddExpr(ID, Ops); UniqueSCEVs.InsertNode(S, IP); + if (HasNUW) S->setHasNoUnsignedWrap(true); + if (HasNSW) S->setHasNoSignedWrap(true); return S; } /// getMulExpr - Get a canonical multiply expression, or something simpler if /// possible. -const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) { +const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, + bool HasNUW, bool HasNSW) { assert(!Ops.empty() && "Cannot get empty mul!"); #ifndef NDEBUG for (unsigned i = 1, e = Ops.size(); i != e; ++i) @@ -1457,7 +1557,8 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) { ++Idx; while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() * + ConstantInt *Fold = ConstantInt::get(getContext(), + LHSC->getValue()->getValue() * RHSC->getValue()->getValue()); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element @@ -1510,7 +1611,7 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) { for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { // Scan all of the other operands to this mul and add them to the vector if // they are loop invariant w.r.t. the recurrence. - SmallVector<const SCEV*, 8> LIOps; + SmallVector<const SCEV *, 8> LIOps; const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { @@ -1522,7 +1623,7 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) { // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} - SmallVector<const SCEV*, 4> NewOps; + SmallVector<const SCEV *, 4> NewOps; NewOps.reserve(AddRec->getNumOperands()); if (LIOps.size() == 1) { const SCEV *Scale = LIOps[0]; @@ -1530,13 +1631,13 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) { NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); } else { for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { - SmallVector<const SCEV*, 4> MulOps(LIOps.begin(), LIOps.end()); + SmallVector<const SCEV *, 4> MulOps(LIOps.begin(), LIOps.end()); MulOps.push_back(AddRec->getOperand(i)); NewOps.push_back(getMulExpr(MulOps)); } } - const SCEV* NewRec = getAddRecExpr(NewOps, AddRec->getLoop()); + const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1560,14 +1661,14 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) { if (AddRec->getLoop() == OtherAddRec->getLoop()) { // F * G --> {A,+,B} * {C,+,D} --> {A*C,+,F*D + G*B + B*D} const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; - const SCEV* NewStart = getMulExpr(F->getStart(), + const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart()); - const SCEV* B = F->getStepRecurrence(*this); - const SCEV* D = G->getStepRecurrence(*this); - const SCEV* NewStep = getAddExpr(getMulExpr(F, D), + const SCEV *B = F->getStepRecurrence(*this); + const SCEV *D = G->getStepRecurrence(*this); + const SCEV *NewStep = getAddExpr(getMulExpr(F, D), getMulExpr(G, B), getMulExpr(B, D)); - const SCEV* NewAddRec = getAddRecExpr(NewStart, NewStep, + const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, F->getLoop()); if (Ops.size() == 2) return NewAddRec; @@ -1591,14 +1692,16 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) { ID.AddPointer(Ops[i]); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVMulExpr>(); - new (S) SCEVMulExpr(Ops); + SCEVMulExpr *S = SCEVAllocator.Allocate<SCEVMulExpr>(); + new (S) SCEVMulExpr(ID, Ops); UniqueSCEVs.InsertNode(S, IP); + if (HasNUW) S->setHasNoUnsignedWrap(true); + if (HasNSW) S->setHasNoSignedWrap(true); return S; } -/// getUDivExpr - Get a canonical multiply expression, or something simpler if -/// possible. +/// getUDivExpr - Get a canonical unsigned division expression, or something +/// simpler if possible. const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, const SCEV *RHS) { assert(getEffectiveSCEVType(LHS->getType()) == @@ -1607,7 +1710,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { if (RHSC->getValue()->equalsInt(1)) - return LHS; // X udiv 1 --> x + return LHS; // X udiv 1 --> x if (RHSC->isZero()) return getIntegerSCEV(0, LHS->getType()); // value is undefined @@ -1622,7 +1725,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, if (!RHSC->getValue()->getValue().isPowerOf2()) ++MaxShiftAmt; const IntegerType *ExtTy = - IntegerType::get(getTypeSizeInBits(Ty) + MaxShiftAmt); + IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) if (const SCEVConstant *Step = @@ -1633,24 +1736,24 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop())) { - SmallVector<const SCEV*, 4> Operands; + SmallVector<const SCEV *, 4> Operands; for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); return getAddRecExpr(Operands, AR->getLoop()); } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { - SmallVector<const SCEV*, 4> Operands; + SmallVector<const SCEV *, 4> Operands; for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) // Find an operand that's safely divisible. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { - const SCEV* Op = M->getOperand(i); - const SCEV* Div = getUDivExpr(Op, RHSC); + const SCEV *Op = M->getOperand(i); + const SCEV *Div = getUDivExpr(Op, RHSC); if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { - const SmallVectorImpl<const SCEV*> &MOperands = M->getOperands(); - Operands = SmallVector<const SCEV*, 4>(MOperands.begin(), + const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands(); + Operands = SmallVector<const SCEV *, 4>(MOperands.begin(), MOperands.end()); Operands[i] = Div; return getMulExpr(Operands); @@ -1659,13 +1762,13 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) { - SmallVector<const SCEV*, 4> Operands; + SmallVector<const SCEV *, 4> Operands; for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { Operands.clear(); for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { - const SCEV* Op = getUDivExpr(A->getOperand(i), RHS); + const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); if (isa<SCEVUDivExpr>(Op) || getMulExpr(Op, RHS) != A->getOperand(i)) break; Operands.push_back(Op); @@ -1691,7 +1794,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate<SCEVUDivExpr>(); - new (S) SCEVUDivExpr(LHS, RHS); + new (S) SCEVUDivExpr(ID, LHS, RHS); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -1699,9 +1802,10 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. -const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start, - const SCEV* Step, const Loop *L) { - SmallVector<const SCEV*, 4> Operands; +const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, + const SCEV *Step, const Loop *L, + bool HasNUW, bool HasNSW) { + SmallVector<const SCEV *, 4> Operands; Operands.push_back(Start); if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) if (StepChrec->getLoop() == L) { @@ -1711,14 +1815,15 @@ const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start, } Operands.push_back(Step); - return getAddRecExpr(Operands, L); + return getAddRecExpr(Operands, L, HasNUW, HasNSW); } /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. const SCEV * -ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands, - const Loop *L) { +ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, + const Loop *L, + bool HasNUW, bool HasNSW) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG for (unsigned i = 1, e = Operands.size(); i != e; ++i) @@ -1729,14 +1834,14 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands, if (Operands.back()->isZero()) { Operands.pop_back(); - return getAddRecExpr(Operands, L); // {X,+,0} --> X + return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X } // Canonicalize nested AddRecs in by nesting them in order of loop depth. if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { const Loop* NestedLoop = NestedAR->getLoop(); if (L->getLoopDepth() < NestedLoop->getLoopDepth()) { - SmallVector<const SCEV*, 4> NestedOperands(NestedAR->op_begin(), + SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), NestedAR->op_end()); Operands[0] = NestedAR->getStart(); // AddRecs require their operands be loop-invariant with respect to their @@ -1758,7 +1863,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands, } if (AllInvariant) // Ok, both add recurrences are valid after the transformation. - return getAddRecExpr(NestedOperands, NestedLoop); + return getAddRecExpr(NestedOperands, NestedLoop, HasNUW, HasNSW); } // Reset Operands to its original state. Operands[0] = NestedAR; @@ -1773,22 +1878,24 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands, ID.AddPointer(L); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVAddRecExpr>(); - new (S) SCEVAddRecExpr(Operands, L); + SCEVAddRecExpr *S = SCEVAllocator.Allocate<SCEVAddRecExpr>(); + new (S) SCEVAddRecExpr(ID, Operands, L); UniqueSCEVs.InsertNode(S, IP); + if (HasNUW) S->setHasNoUnsignedWrap(true); + if (HasNSW) S->setHasNoSignedWrap(true); return S; } const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { - SmallVector<const SCEV*, 2> Ops; + SmallVector<const SCEV *, 2> Ops; Ops.push_back(LHS); Ops.push_back(RHS); return getSMaxExpr(Ops); } -const SCEV* -ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) { +const SCEV * +ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { assert(!Ops.empty() && "Cannot get empty smax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1808,7 +1915,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) { assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get( + ConstantInt *Fold = ConstantInt::get(getContext(), APIntOps::smax(LHSC->getValue()->getValue(), RHSC->getValue()->getValue())); Ops[0] = getConstant(Fold); @@ -1871,21 +1978,21 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) { void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate<SCEVSMaxExpr>(); - new (S) SCEVSMaxExpr(Ops); + new (S) SCEVSMaxExpr(ID, Ops); UniqueSCEVs.InsertNode(S, IP); return S; } const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) { - SmallVector<const SCEV*, 2> Ops; + SmallVector<const SCEV *, 2> Ops; Ops.push_back(LHS); Ops.push_back(RHS); return getUMaxExpr(Ops); } -const SCEV* -ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) { +const SCEV * +ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { assert(!Ops.empty() && "Cannot get empty umax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1905,7 +2012,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) { assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get( + ConstantInt *Fold = ConstantInt::get(getContext(), APIntOps::umax(LHSC->getValue()->getValue(), RHSC->getValue()->getValue())); Ops[0] = getConstant(Fold); @@ -1968,7 +2075,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) { void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate<SCEVUMaxExpr>(); - new (S) SCEVUMaxExpr(Ops); + new (S) SCEVUMaxExpr(ID, Ops); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -1985,7 +2092,77 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV* ScalarEvolution::getUnknown(Value *V) { +const SCEV *ScalarEvolution::getFieldOffsetExpr(const StructType *STy, + unsigned FieldNo) { + // If we have TargetData we can determine the constant offset. + if (TD) { + const Type *IntPtrTy = TD->getIntPtrType(getContext()); + const StructLayout &SL = *TD->getStructLayout(STy); + uint64_t Offset = SL.getElementOffset(FieldNo); + return getIntegerSCEV(Offset, IntPtrTy); + } + + // Field 0 is always at offset 0. + if (FieldNo == 0) { + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); + return getIntegerSCEV(0, Ty); + } + + // Okay, it looks like we really DO need an offsetof expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scFieldOffset); + ID.AddPointer(STy); + ID.AddInteger(FieldNo); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = SCEVAllocator.Allocate<SCEVFieldOffsetExpr>(); + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); + new (S) SCEVFieldOffsetExpr(ID, Ty, STy, FieldNo); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getAllocSizeExpr(const Type *AllocTy) { + // If we have TargetData we can determine the constant size. + if (TD && AllocTy->isSized()) { + const Type *IntPtrTy = TD->getIntPtrType(getContext()); + return getIntegerSCEV(TD->getTypeAllocSize(AllocTy), IntPtrTy); + } + + // Expand an array size into the element size times the number + // of elements. + if (const ArrayType *ATy = dyn_cast<ArrayType>(AllocTy)) { + const SCEV *E = getAllocSizeExpr(ATy->getElementType()); + return getMulExpr( + E, getConstant(ConstantInt::get(cast<IntegerType>(E->getType()), + ATy->getNumElements()))); + } + + // Expand a vector size into the element size times the number + // of elements. + if (const VectorType *VTy = dyn_cast<VectorType>(AllocTy)) { + const SCEV *E = getAllocSizeExpr(VTy->getElementType()); + return getMulExpr( + E, getConstant(ConstantInt::get(cast<IntegerType>(E->getType()), + VTy->getNumElements()))); + } + + // Okay, it looks like we really DO need a sizeof expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scAllocSize); + ID.AddPointer(AllocTy); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = SCEVAllocator.Allocate<SCEVAllocSizeExpr>(); + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + new (S) SCEVAllocSizeExpr(ID, Ty, AllocTy); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getUnknown(Value *V) { // Don't attempt to do anything other than create a SCEVUnknown object // here. createSCEV only calls getUnknown after checking for all other // interesting possibilities, and any other code that calls getUnknown @@ -1997,7 +2174,7 @@ const SCEV* ScalarEvolution::getUnknown(Value *V) { void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate<SCEVUnknown>(); - new (S) SCEVUnknown(V); + new (S) SCEVUnknown(ID, V); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -2011,17 +2188,8 @@ const SCEV* ScalarEvolution::getUnknown(Value *V) { /// can optionally include pointer types if the ScalarEvolution class /// has access to target-specific information. bool ScalarEvolution::isSCEVable(const Type *Ty) const { - // Integers are always SCEVable. - if (Ty->isInteger()) - return true; - - // Pointers are SCEVable if TargetData information is available - // to provide pointer size information. - if (isa<PointerType>(Ty)) - return TD != NULL; - - // Otherwise it's not SCEVable. - return false; + // Integers and pointers are always SCEVable. + return Ty->isInteger() || isa<PointerType>(Ty); } /// getTypeSizeInBits - Return the size in bits of the specified type, @@ -2033,9 +2201,14 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { if (TD) return TD->getTypeSizeInBits(Ty); - // Otherwise, we support only integer types. - assert(Ty->isInteger() && "isSCEVable permitted a non-SCEVable type!"); - return Ty->getPrimitiveSizeInBits(); + // Integer types have fixed sizes. + if (Ty->isInteger()) + return Ty->getPrimitiveSizeInBits(); + + // The only other support type is pointer. Without TargetData, conservatively + // assume pointers are 64-bit. + assert(isa<PointerType>(Ty) && "isSCEVable permitted a non-SCEVable type!"); + return 64; } /// getEffectiveSCEVType - Return a type with the same bitwidth as @@ -2048,58 +2221,60 @@ const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { if (Ty->isInteger()) return Ty; + // The only other support type is pointer. assert(isa<PointerType>(Ty) && "Unexpected non-pointer non-integer type!"); - return TD->getIntPtrType(); -} + if (TD) return TD->getIntPtrType(getContext()); -const SCEV* ScalarEvolution::getCouldNotCompute() { - return &CouldNotCompute; + // Without TargetData, conservatively assume pointers are 64-bit. + return Type::getInt64Ty(getContext()); } -/// hasSCEV - Return true if the SCEV for this value has already been -/// computed. -bool ScalarEvolution::hasSCEV(Value *V) const { - return Scalars.count(V); +const SCEV *ScalarEvolution::getCouldNotCompute() { + return &CouldNotCompute; } /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the /// expression and create a new one. -const SCEV* ScalarEvolution::getSCEV(Value *V) { +const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - std::map<SCEVCallbackVH, const SCEV*>::iterator I = Scalars.find(V); + std::map<SCEVCallbackVH, const SCEV *>::iterator I = Scalars.find(V); if (I != Scalars.end()) return I->second; - const SCEV* S = createSCEV(V); + const SCEV *S = createSCEV(V); Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S)); return S; } /// getIntegerSCEV - Given a SCEVable type, create a constant for the /// specified signed integer value and return a SCEV for the constant. -const SCEV* ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) { +const SCEV *ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) { const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); return getConstant(ConstantInt::get(ITy, Val)); } /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V /// -const SCEV* ScalarEvolution::getNegativeSCEV(const SCEV* V) { +const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) - return getConstant(cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); + return getConstant( + cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); const Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); - return getMulExpr(V, getConstant(ConstantInt::getAllOnesValue(Ty))); + return getMulExpr(V, + getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)))); } /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V -const SCEV* ScalarEvolution::getNotSCEV(const SCEV* V) { +const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) - return getConstant(cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); + return getConstant( + cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); const Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); - const SCEV* AllOnes = getConstant(ConstantInt::getAllOnesValue(Ty)); + const SCEV *AllOnes = + getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))); return getMinusSCEV(AllOnes, V); } @@ -2114,12 +2289,12 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is zero /// extended. -const SCEV* -ScalarEvolution::getTruncateOrZeroExtend(const SCEV* V, +const SCEV * +ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && - (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && + assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && + (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion @@ -2131,12 +2306,12 @@ ScalarEvolution::getTruncateOrZeroExtend(const SCEV* V, /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is sign /// extended. -const SCEV* -ScalarEvolution::getTruncateOrSignExtend(const SCEV* V, +const SCEV * +ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && - (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && + assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && + (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion @@ -2148,11 +2323,11 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV* V, /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is zero /// extended. The conversion must not be narrowing. -const SCEV* -ScalarEvolution::getNoopOrZeroExtend(const SCEV* V, const Type *Ty) { +const SCEV * +ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && - (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && + assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && + (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot noop or zero extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrZeroExtend cannot truncate!"); @@ -2164,11 +2339,11 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV* V, const Type *Ty) { /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is sign /// extended. The conversion must not be narrowing. -const SCEV* -ScalarEvolution::getNoopOrSignExtend(const SCEV* V, const Type *Ty) { +const SCEV * +ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && - (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && + assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && + (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot noop or sign extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrSignExtend cannot truncate!"); @@ -2181,11 +2356,11 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV* V, const Type *Ty) { /// the input value to the specified type. If the type must be extended, /// it is extended with unspecified bits. The conversion must not be /// narrowing. -const SCEV* -ScalarEvolution::getNoopOrAnyExtend(const SCEV* V, const Type *Ty) { +const SCEV * +ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && - (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && + assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && + (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot noop or any extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrAnyExtend cannot truncate!"); @@ -2196,11 +2371,11 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV* V, const Type *Ty) { /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the /// input value to the specified type. The conversion must not be widening. -const SCEV* -ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) { +const SCEV * +ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && - (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && + assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && + (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot truncate or noop with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && "getTruncateOrNoop cannot extend!"); @@ -2214,8 +2389,8 @@ ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) { /// with them. const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS) { - const SCEV* PromotedLHS = LHS; - const SCEV* PromotedRHS = RHS; + const SCEV *PromotedLHS = LHS; + const SCEV *PromotedRHS = RHS; if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); @@ -2230,8 +2405,8 @@ const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, /// with them. const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS) { - const SCEV* PromotedLHS = LHS; - const SCEV* PromotedRHS = RHS; + const SCEV *PromotedLHS = LHS; + const SCEV *PromotedRHS = RHS; if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); @@ -2241,34 +2416,60 @@ const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, return getUMinExpr(PromotedLHS, PromotedRHS); } -/// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for -/// the specified instruction and replaces any references to the symbolic value -/// SymName with the specified value. This is used during PHI resolution. +/// PushDefUseChildren - Push users of the given Instruction +/// onto the given Worklist. +static void +PushDefUseChildren(Instruction *I, + SmallVectorImpl<Instruction *> &Worklist) { + // Push the def-use children onto the Worklist stack. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Worklist.push_back(cast<Instruction>(UI)); +} + +/// ForgetSymbolicValue - This looks up computed SCEV values for all +/// instructions that depend on the given instruction and removes them from +/// the Scalars map if they reference SymName. This is used during PHI +/// resolution. void -ScalarEvolution::ReplaceSymbolicValueWithConcrete(Instruction *I, - const SCEV *SymName, - const SCEV *NewVal) { - std::map<SCEVCallbackVH, const SCEV*>::iterator SI = - Scalars.find(SCEVCallbackVH(I, this)); - if (SI == Scalars.end()) return; +ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) { + SmallVector<Instruction *, 16> Worklist; + PushDefUseChildren(I, Worklist); - const SCEV* NV = - SI->second->replaceSymbolicValuesWithConcrete(SymName, NewVal, *this); - if (NV == SI->second) return; // No change. + SmallPtrSet<Instruction *, 8> Visited; + Visited.insert(I); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; - SI->second = NV; // Update the scalars map! + std::map<SCEVCallbackVH, const SCEV*>::iterator It = + Scalars.find(static_cast<Value *>(I)); + if (It != Scalars.end()) { + // Short-circuit the def-use traversal if the symbolic name + // ceases to appear in expressions. + if (!It->second->hasOperand(SymName)) + continue; + + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, or it's a PHI that's in the progress of being computed + // by createNodeForPHI. In the former case, additional loop trip + // count information isn't going to change anything. In the later + // case, createNodeForPHI will perform the necessary updates on its + // own when it gets to that point. + if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) { + ValuesAtScopes.erase(It->second); + Scalars.erase(It); + } + } - // Any instruction values that use this instruction might also need to be - // updated! - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) - ReplaceSymbolicValueWithConcrete(cast<Instruction>(*UI), SymName, NewVal); + PushDefUseChildren(I, Worklist); + } } /// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in /// a loop header, making it a potential recurrence, or it doesn't. /// -const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { +const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (PN->getNumIncomingValues() == 2) // The loops have been canonicalized. if (const Loop *L = LI->getLoopFor(PN->getParent())) if (L->getHeader() == PN->getParent()) { @@ -2278,14 +2479,15 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { unsigned BackEdge = IncomingEdge^1; // While we are analyzing this PHI node, handle its value symbolically. - const SCEV* SymbolicName = getUnknown(PN); + const SCEV *SymbolicName = getUnknown(PN); assert(Scalars.find(PN) == Scalars.end() && "PHI node already processed?"); Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); // Using this symbolic name for the PHI, analyze the value coming around // the back-edge. - const SCEV* BEValue = getSCEV(PN->getIncomingValue(BackEdge)); + Value *BEValueV = PN->getIncomingValue(BackEdge); + const SCEV *BEValue = getSCEV(BEValueV); // NOTE: If BEValue is loop invariant, we know that the PHI node just // has a special value for the first iteration of the loop. @@ -2305,11 +2507,11 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { if (FoundIndex != Add->getNumOperands()) { // Create an add with everything but the specified operand. - SmallVector<const SCEV*, 8> Ops; + SmallVector<const SCEV *, 8> Ops; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (i != FoundIndex) Ops.push_back(Add->getOperand(i)); - const SCEV* Accum = getAddExpr(Ops); + const SCEV *Accum = getAddExpr(Ops); // This is not a valid addrec if the step amount is varying each // loop iteration, but is not itself an addrec in this loop. @@ -2318,15 +2520,35 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); - const SCEV *PHISCEV = - getAddRecExpr(StartVal, Accum, L); + const SCEVAddRecExpr *PHISCEV = + cast<SCEVAddRecExpr>(getAddRecExpr(StartVal, Accum, L)); + + // If the increment doesn't overflow, then neither the addrec nor the + // post-increment will overflow. + if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) + if (OBO->getOperand(0) == PN && + getSCEV(OBO->getOperand(1)) == + PHISCEV->getStepRecurrence(*this)) { + const SCEVAddRecExpr *PostInc = PHISCEV->getPostIncExpr(*this); + if (OBO->hasNoUnsignedWrap()) { + const_cast<SCEVAddRecExpr *>(PHISCEV) + ->setHasNoUnsignedWrap(true); + const_cast<SCEVAddRecExpr *>(PostInc) + ->setHasNoUnsignedWrap(true); + } + if (OBO->hasNoSignedWrap()) { + const_cast<SCEVAddRecExpr *>(PHISCEV) + ->setHasNoSignedWrap(true); + const_cast<SCEVAddRecExpr *>(PostInc) + ->setHasNoSignedWrap(true); + } + } // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and update all of the - // entries for the scalars that use the PHI (except for the PHI - // itself) to use the new analyzed value instead of the "symbolic" - // value. - ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV); + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + Scalars[SCEVCallbackVH(PN, this)] = PHISCEV; return PHISCEV; } } @@ -2338,21 +2560,20 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { // Because the other in-value of i (0) fits the evolution of BEValue // i really is an addrec evolution. if (AddRec->getLoop() == L && AddRec->isAffine()) { - const SCEV* StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); + const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); // If StartVal = j.start - j.stride, we can use StartVal as the // initial step of the addrec evolution. if (StartVal == getMinusSCEV(AddRec->getOperand(0), AddRec->getOperand(1))) { - const SCEV* PHISCEV = + const SCEV *PHISCEV = getAddRecExpr(StartVal, AddRec->getOperand(1), L); // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and update all of the - // entries for the scalars that use the PHI (except for the PHI - // itself) to use the new analyzed value instead of the "symbolic" - // value. - ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV); + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + Scalars[SCEVCallbackVH(PN, this)] = PHISCEV; return PHISCEV; } } @@ -2361,6 +2582,10 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { return SymbolicName; } + // It's tempting to recognize PHIs with a unique incoming value, however + // this leads passes like indvars to break LCSSA form. Fortunately, such + // PHIs are rare, as instcombine zaps them. + // If it's not a loop phi, we can't handle it yet. return getUnknown(PN); } @@ -2368,14 +2593,14 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { /// createNodeForGEP - Expand GEP instructions into add and multiply /// operations. This allows them to be analyzed by regular SCEV code. /// -const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) { +const SCEV *ScalarEvolution::createNodeForGEP(Operator *GEP) { - const Type *IntPtrTy = TD->getIntPtrType(); + const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); Value *Base = GEP->getOperand(0); // Don't attempt to analyze GEPs over unsized objects. if (!cast<PointerType>(Base->getType())->getElementType()->isSized()) return getUnknown(GEP); - const SCEV* TotalOffset = getIntegerSCEV(0, IntPtrTy); + const SCEV *TotalOffset = getIntegerSCEV(0, IntPtrTy); gep_type_iterator GTI = gep_type_begin(GEP); for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()), E = GEP->op_end(); @@ -2384,22 +2609,16 @@ const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) { // Compute the (potentially symbolic) offset in bytes for this index. if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { // For a struct, add the member offset. - const StructLayout &SL = *TD->getStructLayout(STy); unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); - uint64_t Offset = SL.getElementOffset(FieldNo); TotalOffset = getAddExpr(TotalOffset, - getIntegerSCEV(Offset, IntPtrTy)); + getFieldOffsetExpr(STy, FieldNo)); } else { // For an array, add the element offset, explicitly scaled. - const SCEV* LocalOffset = getSCEV(Index); + const SCEV *LocalOffset = getSCEV(Index); if (!isa<PointerType>(LocalOffset->getType())) // Getelementptr indicies are signed. - LocalOffset = getTruncateOrSignExtend(LocalOffset, - IntPtrTy); - LocalOffset = - getMulExpr(LocalOffset, - getIntegerSCEV(TD->getTypeAllocSize(*GTI), - IntPtrTy)); + LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy); + LocalOffset = getMulExpr(LocalOffset, getAllocSizeExpr(*GTI)); TotalOffset = getAddExpr(TotalOffset, LocalOffset); } } @@ -2411,7 +2630,7 @@ const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) { /// the minimum number of times S is divisible by 2. For example, given {4,+,8} /// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S. uint32_t -ScalarEvolution::GetMinTrailingZeros(const SCEV* S) { +ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) return C->getValue()->getValue().countTrailingZeros(); @@ -2487,18 +2706,100 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV* S) { return 0; } -uint32_t -ScalarEvolution::GetMinLeadingZeros(const SCEV* S) { - // TODO: Handle other SCEV expression types here. +/// getUnsignedRange - Determine the unsigned range for a particular SCEV. +/// +ConstantRange +ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) - return C->getValue()->getValue().countLeadingZeros(); + return ConstantRange(C->getValue()->getValue()); + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + ConstantRange X = getUnsignedRange(Add->getOperand(0)); + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) + X = X.add(getUnsignedRange(Add->getOperand(i))); + return X; + } + + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { + ConstantRange X = getUnsignedRange(Mul->getOperand(0)); + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) + X = X.multiply(getUnsignedRange(Mul->getOperand(i))); + return X; + } + + if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { + ConstantRange X = getUnsignedRange(SMax->getOperand(0)); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) + X = X.smax(getUnsignedRange(SMax->getOperand(i))); + return X; + } + + if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { + ConstantRange X = getUnsignedRange(UMax->getOperand(0)); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) + X = X.umax(getUnsignedRange(UMax->getOperand(i))); + return X; + } - if (const SCEVZeroExtendExpr *C = dyn_cast<SCEVZeroExtendExpr>(S)) { - // A zero-extension cast adds zero bits. - return GetMinLeadingZeros(C->getOperand()) + - (getTypeSizeInBits(C->getType()) - - getTypeSizeInBits(C->getOperand()->getType())); + if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { + ConstantRange X = getUnsignedRange(UDiv->getLHS()); + ConstantRange Y = getUnsignedRange(UDiv->getRHS()); + return X.udiv(Y); + } + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { + ConstantRange X = getUnsignedRange(ZExt->getOperand()); + return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth()); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { + ConstantRange X = getUnsignedRange(SExt->getOperand()); + return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth()); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { + ConstantRange X = getUnsignedRange(Trunc->getOperand()); + return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth()); + } + + ConstantRange FullSet(getTypeSizeInBits(S->getType()), true); + + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEV *T = getBackedgeTakenCount(AddRec->getLoop()); + const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T); + if (!Trip) return FullSet; + + // TODO: non-affine addrec + if (AddRec->isAffine()) { + const Type *Ty = AddRec->getType(); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) { + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*this); + const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); + + // Check for overflow. + // TODO: This is very conservative. + if (!(Step->isOne() && + isKnownPredicate(ICmpInst::ICMP_ULT, Start, End)) && + !(Step->isAllOnesValue() && + isKnownPredicate(ICmpInst::ICMP_UGT, Start, End))) + return FullSet; + + ConstantRange StartRange = getUnsignedRange(Start); + ConstantRange EndRange = getUnsignedRange(End); + APInt Min = APIntOps::umin(StartRange.getUnsignedMin(), + EndRange.getUnsignedMin()); + APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), + EndRange.getUnsignedMax()); + if (Min.isMinValue() && Max.isMaxValue()) + return FullSet; + return ConstantRange(Min, Max+1); + } + } } if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { @@ -2507,73 +2808,128 @@ ScalarEvolution::GetMinLeadingZeros(const SCEV* S) { APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); - return Zeros.countLeadingOnes(); + if (Ones == ~Zeros + 1) + return FullSet; + return ConstantRange(Ones, ~Zeros + 1); } - return 1; + return FullSet; } -uint32_t -ScalarEvolution::GetMinSignBits(const SCEV* S) { - // TODO: Handle other SCEV expression types here. +/// getSignedRange - Determine the signed range for a particular SCEV. +/// +ConstantRange +ScalarEvolution::getSignedRange(const SCEV *S) { - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { - const APInt &A = C->getValue()->getValue(); - return A.isNegative() ? A.countLeadingOnes() : - A.countLeadingZeros(); + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return ConstantRange(C->getValue()->getValue()); + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + ConstantRange X = getSignedRange(Add->getOperand(0)); + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) + X = X.add(getSignedRange(Add->getOperand(i))); + return X; } - if (const SCEVSignExtendExpr *C = dyn_cast<SCEVSignExtendExpr>(S)) { - // A sign-extension cast adds sign bits. - return GetMinSignBits(C->getOperand()) + - (getTypeSizeInBits(C->getType()) - - getTypeSizeInBits(C->getOperand()->getType())); + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { + ConstantRange X = getSignedRange(Mul->getOperand(0)); + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) + X = X.multiply(getSignedRange(Mul->getOperand(i))); + return X; } - if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { - unsigned BitWidth = getTypeSizeInBits(A->getType()); - - // Special case decrementing a value (ADD X, -1): - if (const SCEVConstant *CRHS = dyn_cast<SCEVConstant>(A->getOperand(0))) - if (CRHS->isAllOnesValue()) { - SmallVector<const SCEV *, 4> OtherOps(A->op_begin() + 1, A->op_end()); - const SCEV *OtherOpsAdd = getAddExpr(OtherOps); - unsigned LZ = GetMinLeadingZeros(OtherOpsAdd); - - // If the input is known to be 0 or 1, the output is 0/-1, which is all - // sign bits set. - if (LZ == BitWidth - 1) - return BitWidth; - - // If we are subtracting one from a positive number, there is no carry - // out of the result. - if (LZ > 0) - return GetMinSignBits(OtherOpsAdd); - } + if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { + ConstantRange X = getSignedRange(SMax->getOperand(0)); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) + X = X.smax(getSignedRange(SMax->getOperand(i))); + return X; + } + + if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { + ConstantRange X = getSignedRange(UMax->getOperand(0)); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) + X = X.umax(getSignedRange(UMax->getOperand(i))); + return X; + } + + if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { + ConstantRange X = getSignedRange(UDiv->getLHS()); + ConstantRange Y = getSignedRange(UDiv->getRHS()); + return X.udiv(Y); + } + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { + ConstantRange X = getSignedRange(ZExt->getOperand()); + return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth()); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { + ConstantRange X = getSignedRange(SExt->getOperand()); + return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth()); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { + ConstantRange X = getSignedRange(Trunc->getOperand()); + return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth()); + } - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. - unsigned Min = BitWidth; - for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { - unsigned N = GetMinSignBits(A->getOperand(i)); - Min = std::min(Min, N) - 1; - if (Min == 0) return 1; + ConstantRange FullSet(getTypeSizeInBits(S->getType()), true); + + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEV *T = getBackedgeTakenCount(AddRec->getLoop()); + const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T); + if (!Trip) return FullSet; + + // TODO: non-affine addrec + if (AddRec->isAffine()) { + const Type *Ty = AddRec->getType(); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) { + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*this); + const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); + + // Check for overflow. + // TODO: This is very conservative. + if (!(Step->isOne() && + isKnownPredicate(ICmpInst::ICMP_SLT, Start, End)) && + !(Step->isAllOnesValue() && + isKnownPredicate(ICmpInst::ICMP_SGT, Start, End))) + return FullSet; + + ConstantRange StartRange = getSignedRange(Start); + ConstantRange EndRange = getSignedRange(End); + APInt Min = APIntOps::smin(StartRange.getSignedMin(), + EndRange.getSignedMin()); + APInt Max = APIntOps::smax(StartRange.getSignedMax(), + EndRange.getSignedMax()); + if (Min.isMinSignedValue() && Max.isMaxSignedValue()) + return FullSet; + return ConstantRange(Min, Max+1); + } } - return 1; } if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. - return ComputeNumSignBits(U->getValue(), TD); + unsigned BitWidth = getTypeSizeInBits(U->getType()); + unsigned NS = ComputeNumSignBits(U->getValue(), TD); + if (NS == 1) + return FullSet; + return + ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), + APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1); } - return 1; + return FullSet; } /// createSCEV - We know that there is no SCEV for the specified value. /// Analyze the expression. /// -const SCEV* ScalarEvolution::createSCEV(Value *V) { +const SCEV *ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) return getUnknown(V); @@ -2588,15 +2944,23 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { return getIntegerSCEV(0, V->getType()); else if (isa<UndefValue>(V)) return getIntegerSCEV(0, V->getType()); + else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) + return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee()); else return getUnknown(V); - User *U = cast<User>(V); + Operator *U = cast<Operator>(V); switch (Opcode) { case Instruction::Add: + // Don't transfer the NSW and NUW bits from the Add instruction to the + // Add expression, because the Instruction may be guarded by control + // flow and the no-overflow bits may not be valid for the expression in + // any context. return getAddExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); case Instruction::Mul: + // Don't transfer the NSW and NUW bits from the Mul instruction to the + // Mul expression, as with Add. return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); case Instruction::UDiv: @@ -2630,7 +2994,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask)) return getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)), - IntegerType::get(BitWidth - LZ)), + IntegerType::get(getContext(), BitWidth - LZ)), U->getType()); } break; @@ -2643,11 +3007,23 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { // In order for this transformation to be safe, the LHS must be of the // form X*(2^n) and the Or constant must be less than 2^n. if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { - const SCEV* LHS = getSCEV(U->getOperand(0)); + const SCEV *LHS = getSCEV(U->getOperand(0)); const APInt &CIVal = CI->getValue(); if (GetMinTrailingZeros(LHS) >= - (CIVal.getBitWidth() - CIVal.countLeadingZeros())) - return getAddExpr(LHS, getSCEV(U->getOperand(1))); + (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { + // Build a plain add SCEV. + const SCEV *S = getAddExpr(LHS, getSCEV(CI)); + // If the LHS of the add was an addrec and it has no-wrap flags, + // transfer the no-wrap flags, since an or won't introduce a wrap. + if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); + if (OldAR->hasNoUnsignedWrap()) + const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoUnsignedWrap(true); + if (OldAR->hasNoSignedWrap()) + const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoSignedWrap(true); + } + return S; + } } break; case Instruction::Xor: @@ -2673,7 +3049,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { if (const SCEVZeroExtendExpr *Z = dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) { const Type *UTy = U->getType(); - const SCEV* Z0 = Z->getOperand(); + const SCEV *Z0 = Z->getOperand(); const Type *Z0Ty = Z0->getType(); unsigned Z0TySize = getTypeSizeInBits(Z0Ty); @@ -2699,7 +3075,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { // Turn shift left of a constant amount into a multiply. if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); - Constant *X = ConstantInt::get( + Constant *X = ConstantInt::get(getContext(), APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth))); return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } @@ -2709,7 +3085,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { // Turn logical shift right of a constant into a unsigned divide. if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); - Constant *X = ConstantInt::get( + Constant *X = ConstantInt::get(getContext(), APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth))); return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } @@ -2729,7 +3105,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { return getIntegerSCEV(0, U->getType()); // value is undefined return getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)), - IntegerType::get(Amt)), + IntegerType::get(getContext(), Amt)), U->getType()); } break; @@ -2749,18 +3125,12 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { return getSCEV(U->getOperand(0)); break; - case Instruction::IntToPtr: - if (!TD) break; // Without TD we can't analyze pointers. - return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)), - TD->getIntPtrType()); - - case Instruction::PtrToInt: - if (!TD) break; // Without TD we can't analyze pointers. - return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)), - U->getType()); + // It's tempting to handle inttoptr and ptrtoint, however this can + // lead to pointer expressions which cannot be expanded to GEPs + // (because they may overflow). For now, the only pointer-typed + // expressions we handle are GEPs and address literals. case Instruction::GetElementPtr: - if (!TD) break; // Without TD we can't analyze pointers. return createNodeForGEP(U); case Instruction::PHI: @@ -2842,17 +3212,29 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { /// loop-invariant backedge-taken count (see /// hasLoopInvariantBackedgeTakenCount). /// -const SCEV* ScalarEvolution::getBackedgeTakenCount(const Loop *L) { +const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).Exact; } /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except /// return the least SCEV value that is known never to be less than the /// actual backedge taken count. -const SCEV* ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { +const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).Max; } +/// PushLoopPHIs - Push PHI nodes in the header of the given loop +/// onto the given Worklist. +static void +PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { + BasicBlock *Header = L->getHeader(); + + // Push all Loop-header PHIs onto the Worklist stack. + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + Worklist.push_back(PN); +} + const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Initially insert a CouldNotCompute for this loop. If the insertion @@ -2883,10 +3265,39 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Now that we know more about the trip count for this loop, forget any // existing SCEV values for PHI nodes in this loop since they are only - // conservative estimates made without the benefit - // of trip count information. - if (ItCount.hasAnyInfo()) - forgetLoopPHIs(L); + // conservative estimates made without the benefit of trip count + // information. This is similar to the code in + // forgetLoopBackedgeTakenCount, except that it handles SCEVUnknown PHI + // nodes specially. + if (ItCount.hasAnyInfo()) { + SmallVector<Instruction *, 16> Worklist; + PushLoopPHIs(L, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + std::map<SCEVCallbackVH, const SCEV*>::iterator It = + Scalars.find(static_cast<Value *>(I)); + if (It != Scalars.end()) { + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, or it's a PHI that's in the progress of being computed + // by createNodeForPHI. In the former case, additional loop trip + // count information isn't going to change anything. In the later + // case, createNodeForPHI will perform the necessary updates on its + // own when it gets to that point. + if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) { + ValuesAtScopes.erase(It->second); + Scalars.erase(It); + } + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } + } } return Pair.first->second; } @@ -2897,37 +3308,25 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { /// is deleted. void ScalarEvolution::forgetLoopBackedgeTakenCount(const Loop *L) { BackedgeTakenCounts.erase(L); - forgetLoopPHIs(L); -} -/// forgetLoopPHIs - Delete the memoized SCEVs associated with the -/// PHI nodes in the given loop. This is used when the trip count of -/// the loop may have changed. -void ScalarEvolution::forgetLoopPHIs(const Loop *L) { - BasicBlock *Header = L->getHeader(); - - // Push all Loop-header PHIs onto the Worklist stack, except those - // that are presently represented via a SCEVUnknown. SCEVUnknown for - // a PHI either means that it has an unrecognized structure, or it's - // a PHI that's in the progress of being computed by createNodeForPHI. - // In the former case, additional loop trip count information isn't - // going to change anything. In the later case, createNodeForPHI will - // perform the necessary updates on its own when it gets to that point. SmallVector<Instruction *, 16> Worklist; - for (BasicBlock::iterator I = Header->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) { - std::map<SCEVCallbackVH, const SCEV*>::iterator It = - Scalars.find((Value*)I); - if (It != Scalars.end() && !isa<SCEVUnknown>(It->second)) - Worklist.push_back(PN); - } + PushLoopPHIs(L, Worklist); + SmallPtrSet<Instruction *, 8> Visited; while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); - if (Scalars.erase(I)) - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) - Worklist.push_back(cast<Instruction>(UI)); + if (!Visited.insert(I)) continue; + + std::map<SCEVCallbackVH, const SCEV*>::iterator It = + Scalars.find(static_cast<Value *>(I)); + if (It != Scalars.end()) { + ValuesAtScopes.erase(It->second); + Scalars.erase(It); + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); } } @@ -2939,8 +3338,8 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { L->getExitingBlocks(ExitingBlocks); // Examine all exits and pick the most conservative values. - const SCEV* BECount = getCouldNotCompute(); - const SCEV* MaxBECount = getCouldNotCompute(); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); bool CouldNotComputeBECount = false; for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BackedgeTakenInfo NewBTI = @@ -3049,8 +3448,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); BackedgeTakenInfo BTI1 = ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); - const SCEV* BECount = getCouldNotCompute(); - const SCEV* MaxBECount = getCouldNotCompute(); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); if (L->contains(TBB)) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. @@ -3084,8 +3483,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); BackedgeTakenInfo BTI1 = ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); - const SCEV* BECount = getCouldNotCompute(); - const SCEV* MaxBECount = getCouldNotCompute(); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); if (L->contains(FBB)) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. @@ -3143,7 +3542,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, // Handle common loops like: for (X = "string"; *X; ++X) if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { - const SCEV* ItCnt = + const SCEV *ItCnt = ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); if (!isa<SCEVCouldNotCompute>(ItCnt)) { unsigned BitWidth = getTypeSizeInBits(ItCnt->getType()); @@ -3153,8 +3552,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, } } - const SCEV* LHS = getSCEV(ExitCond->getOperand(0)); - const SCEV* RHS = getSCEV(ExitCond->getOperand(1)); + const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); + const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); // Try to evaluate any dependencies out of the loop. LHS = getSCEVAtScope(LHS, L); @@ -3177,20 +3576,20 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, ConstantRange CompRange( ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); - const SCEV* Ret = AddRec->getNumIterationsInRange(CompRange, *this); + const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; } switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - const SCEV* TC = HowFarToZero(getMinusSCEV(LHS, RHS), L); + const SCEV *TC = HowFarToZero(getMinusSCEV(LHS, RHS), L); if (!isa<SCEVCouldNotCompute>(TC)) return TC; break; } - case ICmpInst::ICMP_EQ: { - // Convert to: while (X-Y == 0) // while (X == Y) - const SCEV* TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); + case ICmpInst::ICMP_EQ: { // while (X == Y) + // Convert to: while (X-Y == 0) + const SCEV *TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); if (!isa<SCEVCouldNotCompute>(TC)) return TC; break; } @@ -3234,8 +3633,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, static ConstantInt * EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, ScalarEvolution &SE) { - const SCEV* InVal = SE.getConstant(C); - const SCEV* Val = AddRec->evaluateAtIteration(InVal, SE); + const SCEV *InVal = SE.getConstant(C); + const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); assert(isa<SCEVConstant>(Val) && "Evaluation of SCEV at constant didn't fold correctly?"); return cast<SCEVConstant>(Val)->getValue(); @@ -3246,7 +3645,7 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, /// the addressed element of the initializer or null if the index expression is /// invalid. static Constant * -GetAddressedElementFromGlobal(GlobalVariable *GV, +GetAddressedElementFromGlobal(LLVMContext &Context, GlobalVariable *GV, const std::vector<ConstantInt*> &Indices) { Constant *Init = GV->getInitializer(); for (unsigned i = 0, e = Indices.size(); i != e; ++i) { @@ -3265,7 +3664,7 @@ GetAddressedElementFromGlobal(GlobalVariable *GV, if (Idx >= ATy->getNumElements()) return 0; // Bogus program Init = Constant::getNullValue(ATy->getElementType()); } else { - assert(0 && "Unknown constant aggregate type!"); + llvm_unreachable("Unknown constant aggregate type!"); } return 0; } else { @@ -3293,7 +3692,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( // Make sure that it is really a constant global we are gepping, with an // initializer, and make sure the first IDX is really 0. GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); - if (!GV || !GV->isConstant() || !GV->hasInitializer() || + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) || !cast<Constant>(GEP->getOperand(1))->isNullValue()) return getCouldNotCompute(); @@ -3314,7 +3713,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. // Check to see if X is a loop variant variable value now. - const SCEV* Idx = getSCEV(VarIdx); + const SCEV *Idx = getSCEV(VarIdx); Idx = getSCEVAtScope(Idx, L); // We can only recognize very limited forms of loop index expressions, in @@ -3327,14 +3726,14 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( unsigned MaxSteps = MaxBruteForceIterations; for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { - ConstantInt *ItCst = - ConstantInt::get(cast<IntegerType>(IdxExpr->getType()), IterationNum); + ConstantInt *ItCst = ConstantInt::get( + cast<IntegerType>(IdxExpr->getType()), IterationNum); ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); // Form the GEP offset. Indexes[VarIdxNum] = Val; - Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); + Constant *Result = GetAddressedElementFromGlobal(getContext(), GV, Indexes); if (Result == 0) break; // Cannot compute! // Evaluate the condition for this iteration. @@ -3418,6 +3817,7 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal) { if (Constant *C = dyn_cast<Constant>(V)) return C; if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV; Instruction *I = cast<Instruction>(V); + LLVMContext &Context = I->getParent()->getContext(); std::vector<Constant*> Operands; Operands.resize(I->getNumOperands()); @@ -3429,10 +3829,12 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal) { if (const CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), - &Operands[0], Operands.size()); + &Operands[0], Operands.size(), + Context); else return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size()); + &Operands[0], Operands.size(), + Context); } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is @@ -3487,7 +3889,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, } } -/// ComputeBackedgeTakenCountExhaustively - If the trip is known to execute a +/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a /// constant number of times (the condition evolves only from constants), /// try to evaluate a few iterations of the loop until we get the exit /// condition gets a value of ExitWhen (true or false). If we cannot @@ -3526,7 +3928,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, if (CondVal->getValue() == uint64_t(ExitWhen)) { ++NumBruteForceTripCountsComputed; - return getConstant(Type::Int32Ty, IterationNum); + return getConstant(Type::getInt32Ty(getContext()), IterationNum); } // Compute the value of the PHI node for the next iteration. @@ -3540,7 +3942,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, return getCouldNotCompute(); } -/// getSCEVAtScope - Return a SCEV expression handle for the specified value +/// getSCEVAtScope - Return a SCEV expression for the specified value /// at the specified scope in the program. The L value specifies a loop /// nest to evaluate the expression at, where null is the top-level or a /// specified loop is immediately inside of the loop. @@ -3550,9 +3952,21 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, /// /// In the case that a relevant loop exit value cannot be computed, the /// original value V is returned. -const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { - // FIXME: this should be turned into a virtual method on SCEV! +const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { + // Check to see if we've folded this expression at this loop before. + std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V]; + std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair = + Values.insert(std::make_pair(L, static_cast<const SCEV *>(0))); + if (!Pair.second) + return Pair.first->second ? Pair.first->second : V; + // Otherwise compute it. + const SCEV *C = computeSCEVAtScope(V, L); + ValuesAtScopes[V][L] = C; + return C; +} + +const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (isa<SCEVConstant>(V)) return V; // If this instruction is evolved from a constant-evolving PHI, compute the @@ -3567,7 +3981,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // to see if the loop that contains it has a known backedge-taken // count. If so, we may be able to force computation of the exit // value. - const SCEV* BackedgeTakenCount = getBackedgeTakenCount(LI); + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); if (const SCEVConstant *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) { // Okay, we know how many times the containing loop executes. If @@ -3585,13 +3999,6 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // the arguments into constants, and if so, try to constant propagate the // result. This is particularly useful for computing loop exit values. if (CanConstantFold(I)) { - // Check to see if we've folded this instruction at this loop before. - std::map<const Loop *, Constant *> &Values = ValuesAtScopes[I]; - std::pair<std::map<const Loop *, Constant *>::iterator, bool> Pair = - Values.insert(std::make_pair(L, static_cast<Constant *>(0))); - if (!Pair.second) - return Pair.first->second ? &*getSCEV(Pair.first->second) : V; - std::vector<Constant*> Operands; Operands.reserve(I->getNumOperands()); for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { @@ -3605,7 +4012,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { if (!isSCEVable(Op->getType())) return V; - const SCEV* OpV = getSCEVAtScope(getSCEV(Op), L); + const SCEV* OpV = getSCEVAtScope(Op, L); if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) { Constant *C = SC->getValue(); if (C->getType() != Op->getType()) @@ -3634,11 +4041,12 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { Constant *C; if (const CmpInst *CI = dyn_cast<CmpInst>(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), - &Operands[0], Operands.size()); + &Operands[0], Operands.size(), + getContext()); else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size()); - Pair.first->second = C; + &Operands[0], Operands.size(), + getContext()); return getSCEV(C); } } @@ -3651,7 +4059,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // Avoid performing the look-up in the common case where the specified // expression has no loop-variant portions. for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { - const SCEV* OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); if (OpAtScope != Comm->getOperand(i)) { // Okay, at least one of these operands is loop variant but might be // foldable. Build a new instance of the folded commutative expression. @@ -3671,7 +4079,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { return getSMaxExpr(NewOps); if (isa<SCEVUMaxExpr>(Comm)) return getUMaxExpr(NewOps); - assert(0 && "Unknown commutative SCEV type!"); + llvm_unreachable("Unknown commutative SCEV type!"); } } // If we got here, all operands are loop invariant. @@ -3679,8 +4087,8 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { } if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) { - const SCEV* LHS = getSCEVAtScope(Div->getLHS(), L); - const SCEV* RHS = getSCEVAtScope(Div->getRHS(), L); + const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L); + const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L); if (LHS == Div->getLHS() && RHS == Div->getRHS()) return Div; // must be loop invariant return getUDivExpr(LHS, RHS); @@ -3692,7 +4100,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { if (!L || !AddRec->getLoop()->contains(L->getHeader())) { // To evaluate this recurrence, we need to know how many times the AddRec // loop iterates. Compute this now. - const SCEV* BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; // Then, evaluate the AddRec. @@ -3702,33 +4110,36 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { } if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) { - const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L); + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getZeroExtendExpr(Op, Cast->getType()); } if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) { - const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L); + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getSignExtendExpr(Op, Cast->getType()); } if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) { - const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L); + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getTruncateExpr(Op, Cast->getType()); } - assert(0 && "Unknown SCEV type!"); + if (isa<SCEVTargetDataConstant>(V)) + return V; + + llvm_unreachable("Unknown SCEV type!"); return 0; } /// getSCEVAtScope - This is a convenience function which does /// getSCEVAtScope(getSCEV(V), L). -const SCEV* ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { +const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { return getSCEVAtScope(getSCEV(V), L); } @@ -3741,7 +4152,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { /// A and B isn't important. /// /// If the equation does not have a solution, SCEVCouldNotCompute is returned. -static const SCEV* SolveLinEquationWithOverflow(const APInt &A, const APInt &B, +static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, ScalarEvolution &SE) { uint32_t BW = A.getBitWidth(); assert(BW == B.getBitWidth() && "Bit widths must be the same."); @@ -3784,7 +4195,7 @@ static const SCEV* SolveLinEquationWithOverflow(const APInt &A, const APInt &B, /// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which /// might be the same) or two SCEVCouldNotCompute objects. /// -static std::pair<const SCEV*,const SCEV*> +static std::pair<const SCEV *,const SCEV *> SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0)); @@ -3833,8 +4244,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { return std::make_pair(CNC, CNC); } - ConstantInt *Solution1 = ConstantInt::get((NegB + SqrtVal).sdiv(TwoA)); - ConstantInt *Solution2 = ConstantInt::get((NegB - SqrtVal).sdiv(TwoA)); + LLVMContext &Context = SE.getContext(); + + ConstantInt *Solution1 = + ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); + ConstantInt *Solution2 = + ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); return std::make_pair(SE.getConstant(Solution1), SE.getConstant(Solution2)); @@ -3843,7 +4258,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// HowFarToZero - Return the number of times a backedge comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. -const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { +const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // If the value is a constant if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { // If the value is already zero, the branch will execute zero times. @@ -3878,7 +4293,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // First, handle unitary steps. if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so: - return getNegativeSCEV(Start); // N = -Start (as unsigned) + return getNegativeSCEV(Start); // N = -Start (as unsigned) if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so: return Start; // N = Start (as unsigned) @@ -3891,7 +4306,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) { // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of // the quadratic equation to solve it. - std::pair<const SCEV*,const SCEV*> Roots = SolveQuadraticEquation(AddRec, + std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec, *this); const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); @@ -3910,7 +4325,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // We can only use this value if the chrec ends up with an exact zero // value at this index. When solving for "X*X != 5", for example, we // should not accept a root of 2. - const SCEV* Val = AddRec->evaluateAtIteration(R1, *this); + const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); if (Val->isZero()) return R1; // We found a quadratic root! } @@ -3923,7 +4338,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { /// HowFarToNonZero - Return the number of times a backedge checking the /// specified value for nonzero will execute. If not computable, return /// CouldNotCompute -const SCEV* ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { +const SCEV *ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { // Loops that look like: while (X == 0) are very strange indeed. We don't // handle them yet except for the trivial case. This could be expanded in the // future as needed. @@ -3984,7 +4399,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { /// more general, since a front-end may have replicated the controlling /// expression. /// -static bool HasSameValue(const SCEV* A, const SCEV* B) { +static bool HasSameValue(const SCEV *A, const SCEV *B) { // Quick check to see if they are the same SCEV. if (A == B) return true; @@ -3994,19 +4409,142 @@ static bool HasSameValue(const SCEV* A, const SCEV* B) { if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B)) if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue())) if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) - if (AI->isIdenticalTo(BI)) + if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory()) return true; // Otherwise assume they may have a different value. return false; } -/// isLoopGuardedByCond - Test whether entry to the loop is protected by -/// a conditional between LHS and RHS. This is used to help avoid max -/// expressions in loop trip counts. -bool ScalarEvolution::isLoopGuardedByCond(const Loop *L, - ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS) { +bool ScalarEvolution::isKnownNegative(const SCEV *S) { + return getSignedRange(S).getSignedMax().isNegative(); +} + +bool ScalarEvolution::isKnownPositive(const SCEV *S) { + return getSignedRange(S).getSignedMin().isStrictlyPositive(); +} + +bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { + return !getSignedRange(S).getSignedMin().isNegative(); +} + +bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { + return !getSignedRange(S).getSignedMax().isStrictlyPositive(); +} + +bool ScalarEvolution::isKnownNonZero(const SCEV *S) { + return isKnownNegative(S) || isKnownPositive(S); +} + +bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + + if (HasSameValue(LHS, RHS)) + return ICmpInst::isTrueWhenEqual(Pred); + + switch (Pred) { + default: + llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + break; + case ICmpInst::ICMP_SGT: + Pred = ICmpInst::ICMP_SLT; + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLT: { + ConstantRange LHSRange = getSignedRange(LHS); + ConstantRange RHSRange = getSignedRange(RHS); + if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin())) + return true; + if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax())) + return false; + break; + } + case ICmpInst::ICMP_SGE: + Pred = ICmpInst::ICMP_SLE; + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLE: { + ConstantRange LHSRange = getSignedRange(LHS); + ConstantRange RHSRange = getSignedRange(RHS); + if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin())) + return true; + if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax())) + return false; + break; + } + case ICmpInst::ICMP_UGT: + Pred = ICmpInst::ICMP_ULT; + std::swap(LHS, RHS); + case ICmpInst::ICMP_ULT: { + ConstantRange LHSRange = getUnsignedRange(LHS); + ConstantRange RHSRange = getUnsignedRange(RHS); + if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin())) + return true; + if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax())) + return false; + break; + } + case ICmpInst::ICMP_UGE: + Pred = ICmpInst::ICMP_ULE; + std::swap(LHS, RHS); + case ICmpInst::ICMP_ULE: { + ConstantRange LHSRange = getUnsignedRange(LHS); + ConstantRange RHSRange = getUnsignedRange(RHS); + if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin())) + return true; + if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax())) + return false; + break; + } + case ICmpInst::ICMP_NE: { + if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet()) + return true; + if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet()) + return true; + + const SCEV *Diff = getMinusSCEV(LHS, RHS); + if (isKnownNonZero(Diff)) + return true; + break; + } + case ICmpInst::ICMP_EQ: + // The check at the top of the function catches the case where + // the values are known to be equal. + break; + } + return false; +} + +/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is +/// protected by a conditional between LHS and RHS. This is used to +/// to eliminate casts. +bool +ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) return true; + + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return false; + + BranchInst *LoopContinuePredicate = + dyn_cast<BranchInst>(Latch->getTerminator()); + if (!LoopContinuePredicate || + LoopContinuePredicate->isUnconditional()) + return false; + + return isImpliedCond(LoopContinuePredicate->getCondition(), Pred, LHS, RHS, + LoopContinuePredicate->getSuccessor(0) != L->getHeader()); +} + +/// isLoopGuardedByCond - Test whether entry to the loop is protected +/// by a conditional between LHS and RHS. This is used to help avoid max +/// expressions in loop trip counts, and to eliminate casts. +bool +ScalarEvolution::isLoopGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { // Interpret a null as meaning no loop, where there is obviously no guard // (interprocedural conditions notwithstanding). if (!L) return false; @@ -4027,136 +4565,308 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L, LoopEntryPredicate->isUnconditional()) continue; - if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS, - LoopEntryPredicate->getSuccessor(0) != PredecessorDest)) + if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS, + LoopEntryPredicate->getSuccessor(0) != PredecessorDest)) return true; } return false; } -/// isNecessaryCond - Test whether the given CondValue value is a condition -/// which is at least as strict as the one described by Pred, LHS, and RHS. -bool ScalarEvolution::isNecessaryCond(Value *CondValue, - ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS, - bool Inverse) { +/// isImpliedCond - Test whether the condition described by Pred, LHS, +/// and RHS is true whenever the given Cond value evaluates to true. +bool ScalarEvolution::isImpliedCond(Value *CondValue, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + bool Inverse) { // Recursivly handle And and Or conditions. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) - return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || - isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); + return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || + isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); } else if (BO->getOpcode() == Instruction::Or) { if (Inverse) - return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || - isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); + return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || + isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); } } ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue); if (!ICI) return false; + // Bail if the ICmp's operands' types are wider than the needed type + // before attempting to call getSCEV on them. This avoids infinite + // recursion, since the analysis of widening casts can require loop + // exit condition information for overflow checking, which would + // lead back here. + if (getTypeSizeInBits(LHS->getType()) < + getTypeSizeInBits(ICI->getOperand(0)->getType())) + return false; + // Now that we found a conditional branch that dominates the loop, check to // see if it is the comparison we are looking for. - Value *PreCondLHS = ICI->getOperand(0); - Value *PreCondRHS = ICI->getOperand(1); - ICmpInst::Predicate Cond; + ICmpInst::Predicate FoundPred; if (Inverse) - Cond = ICI->getInversePredicate(); + FoundPred = ICI->getInversePredicate(); else - Cond = ICI->getPredicate(); + FoundPred = ICI->getPredicate(); + + const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); + const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); + + // Balance the types. The case where FoundLHS' type is wider than + // LHS' type is checked for above. + if (getTypeSizeInBits(LHS->getType()) > + getTypeSizeInBits(FoundLHS->getType())) { + if (CmpInst::isSigned(Pred)) { + FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); + FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); + } else { + FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); + FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); + } + } - if (Cond == Pred) - ; // An exact match. - else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE) - ; // The actual condition is beyond sufficient. - else - // Check a few special cases. - switch (Cond) { + // Canonicalize the query to match the way instcombine will have + // canonicalized the comparison. + // First, put a constant operand on the right. + if (isa<SCEVConstant>(LHS)) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + // Then, canonicalize comparisons with boundary cases. + if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { + const APInt &RA = RC->getValue()->getValue(); + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + break; + case ICmpInst::ICMP_UGE: + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + break; + } + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + break; + } + if (RA.isMinValue()) return true; + break; + case ICmpInst::ICMP_ULE: + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + break; + } + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + break; + } + if (RA.isMaxValue()) return true; + break; + case ICmpInst::ICMP_SGE: + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + break; + } + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + break; + } + if (RA.isMinSignedValue()) return true; + break; + case ICmpInst::ICMP_SLE: + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + break; + } + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + break; + } + if (RA.isMaxSignedValue()) return true; + break; case ICmpInst::ICMP_UGT: - if (Pred == ICmpInst::ICMP_ULT) { - std::swap(PreCondLHS, PreCondRHS); - Cond = ICmpInst::ICMP_ULT; + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_NE; break; } - return false; + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + break; + } + if (RA.isMaxValue()) return false; + break; + case ICmpInst::ICMP_ULT: + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + break; + } + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + break; + } + if (RA.isMinValue()) return false; + break; case ICmpInst::ICMP_SGT: - if (Pred == ICmpInst::ICMP_SLT) { - std::swap(PreCondLHS, PreCondRHS); - Cond = ICmpInst::ICMP_SLT; + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; break; } - return false; - case ICmpInst::ICMP_NE: - // Expressions like (x >u 0) are often canonicalized to (x != 0), - // so check for this case by checking if the NE is comparing against - // a minimum or maximum constant. - if (!ICmpInst::isTrueWhenEqual(Pred)) - if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) { - const APInt &A = CI->getValue(); - switch (Pred) { - case ICmpInst::ICMP_SLT: - if (A.isMaxSignedValue()) break; - return false; - case ICmpInst::ICMP_SGT: - if (A.isMinSignedValue()) break; - return false; - case ICmpInst::ICMP_ULT: - if (A.isMaxValue()) break; - return false; - case ICmpInst::ICMP_UGT: - if (A.isMinValue()) break; - return false; - default: - return false; - } - Cond = ICmpInst::ICMP_NE; - // NE is symmetric but the original comparison may not be. Swap - // the operands if necessary so that they match below. - if (isa<SCEVConstant>(LHS)) - std::swap(PreCondLHS, PreCondRHS); - break; - } - return false; - default: - // We weren't able to reconcile the condition. - return false; + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + break; + } + if (RA.isMaxSignedValue()) return false; + break; + case ICmpInst::ICMP_SLT: + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + break; + } + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + break; + } + if (RA.isMinSignedValue()) return false; + break; + } + } + + // Check to see if we can make the LHS or RHS match. + if (LHS == FoundRHS || RHS == FoundLHS) { + if (isa<SCEVConstant>(RHS)) { + std::swap(FoundLHS, FoundRHS); + FoundPred = ICmpInst::getSwappedPredicate(FoundPred); + } else { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); } + } - if (!PreCondLHS->getType()->isInteger()) return false; + // Check whether the found predicate is the same as the desired predicate. + if (FoundPred == Pred) + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); - const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS); - const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS); - return (HasSameValue(LHS, PreCondLHSSCEV) && - HasSameValue(RHS, PreCondRHSSCEV)) || - (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) && - HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))); + // Check whether swapping the found predicate makes it the same as the + // desired predicate. + if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { + if (isa<SCEVConstant>(RHS)) + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); + else + return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), + RHS, LHS, FoundLHS, FoundRHS); + } + + // Check whether the actual condition is beyond sufficient. + if (FoundPred == ICmpInst::ICMP_EQ) + if (ICmpInst::isTrueWhenEqual(Pred)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + if (Pred == ICmpInst::ICMP_NE) + if (!ICmpInst::isTrueWhenEqual(FoundPred)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + + // Otherwise assume the worst. + return false; +} + +/// isImpliedCondOperands - Test whether the condition described by Pred, +/// LHS, and RHS is true whenever the condition desribed by Pred, FoundLHS, +/// and FoundRHS is true. +bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + return isImpliedCondOperandsHelper(Pred, LHS, RHS, + FoundLHS, FoundRHS) || + // ~x < ~y --> x > y + isImpliedCondOperandsHelper(Pred, LHS, RHS, + getNotSCEV(FoundRHS), + getNotSCEV(FoundLHS)); +} + +/// isImpliedCondOperandsHelper - Test whether the condition described by +/// Pred, LHS, and RHS is true whenever the condition desribed by Pred, +/// FoundLHS, and FoundRHS is true. +bool +ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + if (isKnownPredicate(ICmpInst::ICMP_SLE, LHS, FoundLHS) && + isKnownPredicate(ICmpInst::ICMP_SGE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + if (isKnownPredicate(ICmpInst::ICMP_SGE, LHS, FoundLHS) && + isKnownPredicate(ICmpInst::ICMP_SLE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + if (isKnownPredicate(ICmpInst::ICMP_ULE, LHS, FoundLHS) && + isKnownPredicate(ICmpInst::ICMP_UGE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + if (isKnownPredicate(ICmpInst::ICMP_UGE, LHS, FoundLHS) && + isKnownPredicate(ICmpInst::ICMP_ULE, RHS, FoundRHS)) + return true; + break; + } + + return false; } /// getBECount - Subtract the end and start values and divide by the step, /// rounding up, to get the number of times the backedge is executed. Return /// CouldNotCompute if an intermediate computation overflows. -const SCEV* ScalarEvolution::getBECount(const SCEV* Start, - const SCEV* End, - const SCEV* Step) { +const SCEV *ScalarEvolution::getBECount(const SCEV *Start, + const SCEV *End, + const SCEV *Step, + bool NoWrap) { const Type *Ty = Start->getType(); - const SCEV* NegOne = getIntegerSCEV(-1, Ty); - const SCEV* Diff = getMinusSCEV(End, Start); - const SCEV* RoundUp = getAddExpr(Step, NegOne); + const SCEV *NegOne = getIntegerSCEV(-1, Ty); + const SCEV *Diff = getMinusSCEV(End, Start); + const SCEV *RoundUp = getAddExpr(Step, NegOne); // Add an adjustment to the difference between End and Start so that // the division will effectively round up. - const SCEV* Add = getAddExpr(Diff, RoundUp); - - // Check Add for unsigned overflow. - // TODO: More sophisticated things could be done here. - const Type *WideTy = IntegerType::get(getTypeSizeInBits(Ty) + 1); - const SCEV* OperandExtendedAdd = - getAddExpr(getZeroExtendExpr(Diff, WideTy), - getZeroExtendExpr(RoundUp, WideTy)); - if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) - return getCouldNotCompute(); + const SCEV *Add = getAddExpr(Diff, RoundUp); + + if (!NoWrap) { + // Check Add for unsigned overflow. + // TODO: More sophisticated things could be done here. + const Type *WideTy = IntegerType::get(getContext(), + getTypeSizeInBits(Ty) + 1); + const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy); + const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy); + const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp); + if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) + return getCouldNotCompute(); + } return getUDivExpr(Add, Step); } @@ -4174,10 +4884,14 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (!AddRec || AddRec->getLoop() != L) return getCouldNotCompute(); + // Check to see if we have a flag which makes analysis easy. + bool NoWrap = isSigned ? AddRec->hasNoSignedWrap() : + AddRec->hasNoUnsignedWrap(); + if (AddRec->isAffine()) { // FORNOW: We only support unit strides. unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); - const SCEV* Step = AddRec->getStepRecurrence(*this); + const SCEV *Step = AddRec->getStepRecurrence(*this); // TODO: handle non-constant strides. const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step); @@ -4186,7 +4900,10 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (CStep->isOne()) { // With unit stride, the iteration never steps past the limit value. } else if (CStep->getValue()->getValue().isStrictlyPositive()) { - if (const SCEVConstant *CLimit = dyn_cast<SCEVConstant>(RHS)) { + if (NoWrap) { + // We know the iteration won't step past the maximum value for its type. + ; + } else if (const SCEVConstant *CLimit = dyn_cast<SCEVConstant>(RHS)) { // Test whether a positive iteration iteration can step past the limit // value and past the maximum value for its type in a single step. if (isSigned) { @@ -4213,39 +4930,37 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // treat m-n as signed nor unsigned due to overflow possibility. // First, we get the value of the LHS in the first iteration: n - const SCEV* Start = AddRec->getOperand(0); + const SCEV *Start = AddRec->getOperand(0); // Determine the minimum constant start value. - const SCEV *MinStart = isa<SCEVConstant>(Start) ? Start : - getConstant(isSigned ? APInt::getSignedMinValue(BitWidth) : - APInt::getMinValue(BitWidth)); + const SCEV *MinStart = getConstant(isSigned ? + getSignedRange(Start).getSignedMin() : + getUnsignedRange(Start).getUnsignedMin()); // If we know that the condition is true in order to enter the loop, // then we know that it will run exactly (m-n)/s times. Otherwise, we // only know that it will execute (max(m,n)-n)/s times. In both cases, // the division must round up. - const SCEV* End = RHS; + const SCEV *End = RHS; if (!isLoopGuardedByCond(L, - isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + isSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, getMinusSCEV(Start, Step), RHS)) End = isSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); // Determine the maximum constant end value. - const SCEV* MaxEnd = - isa<SCEVConstant>(End) ? End : - getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth) - .ashr(GetMinSignBits(End) - 1) : - APInt::getMaxValue(BitWidth) - .lshr(GetMinLeadingZeros(End))); + const SCEV *MaxEnd = getConstant(isSigned ? + getSignedRange(End).getSignedMax() : + getUnsignedRange(End).getUnsignedMax()); // Finally, we subtract these two values and divide, rounding up, to get // the number of times the backedge is executed. - const SCEV* BECount = getBECount(Start, End, Step); + const SCEV *BECount = getBECount(Start, End, Step, NoWrap); // The maximum backedge count is similar, except using the minimum start // value and the maximum end value. - const SCEV* MaxBECount = getBECount(MinStart, MaxEnd, Step); + const SCEV *MaxBECount = getBECount(MinStart, MaxEnd, Step, NoWrap); return BackedgeTakenInfo(BECount, MaxBECount); } @@ -4258,7 +4973,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, /// this is that it returns the first iteration number where the value is not in /// the condition, thus computing the exit count. If the iteration count can't /// be computed, an instance of SCEVCouldNotCompute is returned. -const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, +const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, ScalarEvolution &SE) const { if (Range.isFullSet()) // Infinite loop. return SE.getCouldNotCompute(); @@ -4266,9 +4981,9 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // If the start is a non-zero constant, shift the range to simplify things. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) if (!SC->getValue()->isZero()) { - SmallVector<const SCEV*, 4> Operands(op_begin(), op_end()); + SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); Operands[0] = SE.getIntegerSCEV(0, SC->getType()); - const SCEV* Shifted = SE.getAddRecExpr(Operands, getLoop()); + const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop()); if (const SCEVAddRecExpr *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted)) return ShiftedAddRec->getNumIterationsInRange( @@ -4307,7 +5022,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // The exit value should be (End+A)/A. APInt ExitVal = (End + A).udiv(A); - ConstantInt *ExitValue = ConstantInt::get(ExitVal); + ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); // Evaluate at the exit value. If we really did fall out of the valid // range, then we computed our trip count, otherwise wrap around or other @@ -4319,7 +5034,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // Ensure that the previous value is in the range. This is a sanity check. assert(Range.contains( EvaluateConstantChrecAtConstant(this, - ConstantInt::get(ExitVal - One), SE)->getValue()) && + ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && "Linear scev computation is off in a bad way!"); return SE.getConstant(ExitValue); } else if (isQuadratic()) { @@ -4327,12 +5042,12 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // quadratic equation to solve it. To do this, we must frame our problem in // terms of figuring out when zero is crossed, instead of when // Range.getUpper() is crossed. - SmallVector<const SCEV*, 4> NewOps(op_begin(), op_end()); + SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end()); NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); - const SCEV* NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); + const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); // Next, solve the constructed addrec - std::pair<const SCEV*,const SCEV*> Roots = + std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE); const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); @@ -4340,7 +5055,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // Pick the smallest positive root value. if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, - R1->getValue(), R2->getValue()))) { + R1->getValue(), R2->getValue()))) { if (CB->getZExtValue() == false) std::swap(R1, R2); // R1 is the minimum root now. @@ -4352,7 +5067,8 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, SE); if (Range.contains(R1Val->getValue())) { // The next iteration must be out of the range... - ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()+1); + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); if (!Range.contains(R1Val->getValue())) @@ -4362,7 +5078,8 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // If R1 was not in the range, then it is a good return value. Make // sure that R1-1 WAS in the range though, just in case. - ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()-1); + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); if (Range.contains(R1Val->getValue())) return R1; @@ -4381,22 +5098,21 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, //===----------------------------------------------------------------------===// void ScalarEvolution::SCEVCallbackVH::deleted() { - assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!"); + assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); if (PHINode *PN = dyn_cast<PHINode>(getValPtr())) SE->ConstantEvolutionLoopExitValue.erase(PN); - if (Instruction *I = dyn_cast<Instruction>(getValPtr())) - SE->ValuesAtScopes.erase(I); SE->Scalars.erase(getValPtr()); // this now dangles! } void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) { - assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!"); + assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); // Forget all the expressions associated with users of the old value, // so that future queries will recompute the expressions using the new // value. SmallVector<User *, 16> Worklist; + SmallPtrSet<User *, 8> Visited; Value *Old = getValPtr(); bool DeleteOld = false; for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); @@ -4410,20 +5126,19 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) { DeleteOld = true; continue; } + if (!Visited.insert(U)) + continue; if (PHINode *PN = dyn_cast<PHINode>(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); - if (Instruction *I = dyn_cast<Instruction>(U)) - SE->ValuesAtScopes.erase(I); - if (SE->Scalars.erase(U)) - for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); - UI != UE; ++UI) - Worklist.push_back(*UI); + SE->Scalars.erase(U); + for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); + UI != UE; ++UI) + Worklist.push_back(*UI); } + // Delete the Old value if it (indirectly) references itself. if (DeleteOld) { if (PHINode *PN = dyn_cast<PHINode>(Old)) SE->ConstantEvolutionLoopExitValue.erase(PN); - if (Instruction *I = dyn_cast<Instruction>(Old)) - SE->ValuesAtScopes.erase(I); SE->Scalars.erase(Old); // this now dangles! } @@ -4502,21 +5217,21 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { // out SCEV values of all instructions that are interesting. Doing // this potentially causes it to create new SCEV objects though, // which technically conflicts with the const qualifier. This isn't - // observable from outside the class though (the hasSCEV function - // notwithstanding), so casting away the const isn't dangerous. + // observable from outside the class though, so casting away the + // const isn't dangerous. ScalarEvolution &SE = *const_cast<ScalarEvolution*>(this); OS << "Classifying expressions for: " << F->getName() << "\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isSCEVable(I->getType())) { - OS << *I; + OS << *I << '\n'; OS << " --> "; - const SCEV* SV = SE.getSCEV(&*I); + const SCEV *SV = SE.getSCEV(&*I); SV->print(OS); const Loop *L = LI->getLoopFor((*I).getParent()); - const SCEV* AtUse = SE.getSCEVAtScope(SV, L); + const SCEV *AtUse = SE.getSCEVAtScope(SV, L); if (AtUse != SV) { OS << " --> "; AtUse->print(OS); @@ -4524,7 +5239,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { if (L) { OS << "\t\t" "Exits: "; - const SCEV* ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); + const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) { OS << "<<Unknown>>"; } else { @@ -4540,7 +5255,3 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { PrintLoopInfo(OS, &SE, *I); } -void ScalarEvolution::print(std::ostream &o, const Module *M) const { - raw_os_ostream OS(o); - print(OS, M); -} diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp new file mode 100644 index 000000000000..cc79e6c3b130 --- /dev/null +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -0,0 +1,133 @@ +//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a +// simple alias analysis implemented in terms of ScalarEvolution queries. +// +// ScalarEvolution has a more complete understanding of pointer arithmetic +// than BasicAliasAnalysis' collection of ad-hoc analyses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +namespace { + /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis + /// implementation that uses ScalarEvolution to answer queries. + class VISIBILITY_HIDDEN ScalarEvolutionAliasAnalysis : public FunctionPass, + public AliasAnalysis { + ScalarEvolution *SE; + + public: + static char ID; // Class identification, replacement for typeinfo + ScalarEvolutionAliasAnalysis() : FunctionPass(&ID), SE(0) {} + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); + virtual AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + + Value *GetUnderlyingIdentifiedObject(const SCEV *S); + }; +} // End of anonymous namespace + +// Register this pass... +char ScalarEvolutionAliasAnalysis::ID = 0; +static RegisterPass<ScalarEvolutionAliasAnalysis> +X("scev-aa", "ScalarEvolution-based Alias Analysis", false, true); + +// Declare that we implement the AliasAnalysis interface +static RegisterAnalysisGroup<AliasAnalysis> Y(X); + +FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() { + return new ScalarEvolutionAliasAnalysis(); +} + +void +ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive<ScalarEvolution>(); + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +bool +ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) { + InitializeAliasAnalysis(this); + SE = &getAnalysis<ScalarEvolution>(); + return false; +} + +/// GetUnderlyingIdentifiedObject - Given an expression, try to find an +/// "identified object" (see AliasAnalysis::isIdentifiedObject) base +/// value. Return null is none was found. +Value * +ScalarEvolutionAliasAnalysis::GetUnderlyingIdentifiedObject(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // In an addrec, assume that the base will be in the start, rather + // than the step. + return GetUnderlyingIdentifiedObject(AR->getStart()); + } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { + // If there's a pointer operand, it'll be sorted at the end of the list. + const SCEV *Last = A->getOperand(A->getNumOperands()-1); + if (isa<PointerType>(Last->getType())) + return GetUnderlyingIdentifiedObject(Last); + } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // Determine if we've found an Identified object. + Value *V = U->getValue(); + if (isIdentifiedObject(V)) + return V; + } + // No Identified object found. + return 0; +} + +AliasAnalysis::AliasResult +ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, + const Value *B, unsigned BSize) { + // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! + const SCEV *AS = SE->getSCEV(const_cast<Value *>(A)); + const SCEV *BS = SE->getSCEV(const_cast<Value *>(B)); + + // If they evaluate to the same expression, it's a MustAlias. + if (AS == BS) return MustAlias; + + // If something is known about the difference between the two addresses, + // see if it's enough to prove a NoAlias. + if (SE->getEffectiveSCEVType(AS->getType()) == + SE->getEffectiveSCEVType(BS->getType())) { + unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); + APInt AI(BitWidth, ASize); + const SCEV *BA = SE->getMinusSCEV(BS, AS); + if (AI.ule(SE->getUnsignedRange(BA).getUnsignedMin())) { + APInt BI(BitWidth, BSize); + const SCEV *AB = SE->getMinusSCEV(AS, BS); + if (BI.ule(SE->getUnsignedRange(AB).getUnsignedMin())) + return NoAlias; + } + } + + // If ScalarEvolution can find an underlying object, form a new query. + // The correctness of this depends on ScalarEvolution not recognizing + // inttoptr and ptrtoint operators. + Value *AO = GetUnderlyingIdentifiedObject(AS); + Value *BO = GetUnderlyingIdentifiedObject(BS); + if ((AO && AO != A) || (BO && BO != B)) + if (alias(AO ? AO : A, AO ? ~0u : ASize, + BO ? BO : B, BO ? ~0u : BSize) == NoAlias) + return NoAlias; + + // Forward the query to the next analysis. + return AliasAnalysis::alias(A, ASize, B, BSize); +} diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 729a0c325448..d674ee847f11 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/LLVMContext.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -52,10 +53,9 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { return CE->getOperand(0); } - // FIXME: keep track of the cast instruction. if (Constant *C = dyn_cast<Constant>(V)) return ConstantExpr::getCast(Op, C, Ty); - + if (Argument *A = dyn_cast<Argument>(V)) { // Check to see if there is already a cast! for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); @@ -155,55 +155,95 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made /// unnecessary; in its place, just signed-divide Ops[i] by the scale and /// check to see if the divide was folded. -static bool FactorOutConstant(const SCEV* &S, - const SCEV* &Remainder, - const APInt &Factor, - ScalarEvolution &SE) { +static bool FactorOutConstant(const SCEV *&S, + const SCEV *&Remainder, + const SCEV *Factor, + ScalarEvolution &SE, + const TargetData *TD) { // Everything is divisible by one. - if (Factor == 1) + if (Factor->isOne()) + return true; + + // x/x == 1. + if (S == Factor) { + S = SE.getIntegerSCEV(1, S->getType()); return true; + } // For a Constant, check for a multiple of the given factor. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { - ConstantInt *CI = - ConstantInt::get(C->getValue()->getValue().sdiv(Factor)); - // If the quotient is zero and the remainder is non-zero, reject - // the value at this scale. It will be considered for subsequent - // smaller scales. - if (C->isZero() || !CI->isZero()) { - const SCEV* Div = SE.getConstant(CI); - S = Div; - Remainder = - SE.getAddExpr(Remainder, - SE.getConstant(C->getValue()->getValue().srem(Factor))); + // 0/x == 0. + if (C->isZero()) return true; + // Check for divisibility. + if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { + ConstantInt *CI = + ConstantInt::get(SE.getContext(), + C->getValue()->getValue().sdiv( + FC->getValue()->getValue())); + // If the quotient is zero and the remainder is non-zero, reject + // the value at this scale. It will be considered for subsequent + // smaller scales. + if (!CI->isZero()) { + const SCEV *Div = SE.getConstant(CI); + S = Div; + Remainder = + SE.getAddExpr(Remainder, + SE.getConstant(C->getValue()->getValue().srem( + FC->getValue()->getValue()))); + return true; + } } } // In a Mul, check if there is a constant operand which is a multiple // of the given factor. - if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) - if (!C->getValue()->getValue().srem(Factor)) { - const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands(); - SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(), - MOperands.end()); - NewMulOps[0] = - SE.getConstant(C->getValue()->getValue().sdiv(Factor)); - S = SE.getMulExpr(NewMulOps); - return true; + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { + if (TD) { + // With TargetData, the size is known. Check if there is a constant + // operand which is a multiple of the given factor. If so, we can + // factor it. + const SCEVConstant *FC = cast<SCEVConstant>(Factor); + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) + if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { + const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands(); + SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(), + MOperands.end()); + NewMulOps[0] = + SE.getConstant(C->getValue()->getValue().sdiv( + FC->getValue()->getValue())); + S = SE.getMulExpr(NewMulOps); + return true; + } + } else { + // Without TargetData, check if Factor can be factored out of any of the + // Mul's operands. If so, we can just remove it. + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + const SCEV *SOp = M->getOperand(i); + const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType()); + if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && + Remainder->isZero()) { + const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands(); + SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(), + MOperands.end()); + NewMulOps[i] = SOp; + S = SE.getMulExpr(NewMulOps); + return true; + } } + } + } // In an AddRec, check if both start and step are divisible. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { - const SCEV* Step = A->getStepRecurrence(SE); - const SCEV* StepRem = SE.getIntegerSCEV(0, Step->getType()); - if (!FactorOutConstant(Step, StepRem, Factor, SE)) + const SCEV *Step = A->getStepRecurrence(SE); + const SCEV *StepRem = SE.getIntegerSCEV(0, Step->getType()); + if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) return false; if (!StepRem->isZero()) return false; - const SCEV* Start = A->getStart(); - if (!FactorOutConstant(Start, Remainder, Factor, SE)) + const SCEV *Start = A->getStart(); + if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) return false; S = SE.getAddRecExpr(Start, Step, A->getLoop()); return true; @@ -212,15 +252,81 @@ static bool FactorOutConstant(const SCEV* &S, return false; } -/// expandAddToGEP - Expand a SCEVAddExpr with a pointer type into a GEP -/// instead of using ptrtoint+arithmetic+inttoptr. This helps -/// BasicAliasAnalysis analyze the result. However, it suffers from the -/// underlying bug described in PR2831. Addition in LLVM currently always -/// has two's complement wrapping guaranteed. However, the semantics for -/// getelementptr overflow are ambiguous. In the common case though, this -/// expansion gets used when a GEP in the original code has been converted -/// into integer arithmetic, in which case the resulting code will be no -/// more undefined than it was originally. +/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs +/// is the number of SCEVAddRecExprs present, which are kept at the end of +/// the list. +/// +static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, + const Type *Ty, + ScalarEvolution &SE) { + unsigned NumAddRecs = 0; + for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) + ++NumAddRecs; + // Group Ops into non-addrecs and addrecs. + SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); + SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); + // Let ScalarEvolution sort and simplify the non-addrecs list. + const SCEV *Sum = NoAddRecs.empty() ? + SE.getIntegerSCEV(0, Ty) : + SE.getAddExpr(NoAddRecs); + // If it returned an add, use the operands. Otherwise it simplified + // the sum into a single value, so just use that. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) + Ops = Add->getOperands(); + else { + Ops.clear(); + if (!Sum->isZero()) + Ops.push_back(Sum); + } + // Then append the addrecs. + Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); +} + +/// SplitAddRecs - Flatten a list of add operands, moving addrec start values +/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. +/// This helps expose more opportunities for folding parts of the expressions +/// into GEP indices. +/// +static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, + const Type *Ty, + ScalarEvolution &SE) { + // Find the addrecs. + SmallVector<const SCEV *, 8> AddRecs; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { + const SCEV *Start = A->getStart(); + if (Start->isZero()) break; + const SCEV *Zero = SE.getIntegerSCEV(0, Ty); + AddRecs.push_back(SE.getAddRecExpr(Zero, + A->getStepRecurrence(SE), + A->getLoop())); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { + Ops[i] = Zero; + Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); + e += Add->getNumOperands(); + } else { + Ops[i] = Start; + } + } + if (!AddRecs.empty()) { + // Add the addrecs onto the end of the list. + Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); + // Resort the operand list, moving any constants to the front. + SimplifyAddOperands(Ops, Ty, SE); + } +} + +/// expandAddToGEP - Expand an addition expression with a pointer type into +/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps +/// BasicAliasAnalysis and other passes analyze the result. See the rules +/// for getelementptr vs. inttoptr in +/// http://llvm.org/docs/LangRef.html#pointeraliasing +/// for details. +/// +/// Design note: The correctness of using getelmeentptr here depends on +/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as +/// they may introduce pointer arithmetic which may not be safely converted +/// into getelementptr. /// /// Design note: It might seem desirable for this function to be more /// loop-aware. If some of the indices are loop-invariant while others @@ -237,92 +343,130 @@ static bool FactorOutConstant(const SCEV* &S, /// loop-invariant portions of expressions, after considering what /// can be folded using target addressing modes. /// -Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin, - const SCEV* const *op_end, +Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, + const SCEV *const *op_end, const PointerType *PTy, const Type *Ty, Value *V) { const Type *ElTy = PTy->getElementType(); SmallVector<Value *, 4> GepIndices; - SmallVector<const SCEV*, 8> Ops(op_begin, op_end); + SmallVector<const SCEV *, 8> Ops(op_begin, op_end); bool AnyNonZeroIndices = false; + // Split AddRecs up into parts as either of the parts may be usable + // without the other. + SplitAddRecs(Ops, Ty, SE); + // Decend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP // indexes into the array implied by the pointer operand; the rest of // the indices index into the element or field type selected by the // preceding index. for (;;) { - APInt ElSize = APInt(SE.getTypeSizeInBits(Ty), - ElTy->isSized() ? SE.TD->getTypeAllocSize(ElTy) : 0); - SmallVector<const SCEV*, 8> NewOps; - SmallVector<const SCEV*, 8> ScaledOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - // Split AddRecs up into parts as either of the parts may be usable - // without the other. - if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) - if (!A->getStart()->isZero()) { - const SCEV* Start = A->getStart(); - Ops.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()), - A->getStepRecurrence(SE), - A->getLoop())); - Ops[i] = Start; - ++e; - } - // If the scale size is not 0, attempt to factor out a scale. - if (ElSize != 0) { - const SCEV* Op = Ops[i]; - const SCEV* Remainder = SE.getIntegerSCEV(0, Op->getType()); - if (FactorOutConstant(Op, Remainder, ElSize, SE)) { - ScaledOps.push_back(Op); // Op now has ElSize factored out. - NewOps.push_back(Remainder); - continue; + const SCEV *ElSize = SE.getAllocSizeExpr(ElTy); + // If the scale size is not 0, attempt to factor out a scale for + // array indexing. + SmallVector<const SCEV *, 8> ScaledOps; + if (ElTy->isSized() && !ElSize->isZero()) { + SmallVector<const SCEV *, 8> NewOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + const SCEV *Op = Ops[i]; + const SCEV *Remainder = SE.getIntegerSCEV(0, Ty); + if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { + // Op now has ElSize factored out. + ScaledOps.push_back(Op); + if (!Remainder->isZero()) + NewOps.push_back(Remainder); + AnyNonZeroIndices = true; + } else { + // The operand was not divisible, so add it to the list of operands + // we'll scan next iteration. + NewOps.push_back(Ops[i]); } } - // If the operand was not divisible, add it to the list of operands - // we'll scan next iteration. - NewOps.push_back(Ops[i]); + // If we made any changes, update Ops. + if (!ScaledOps.empty()) { + Ops = NewOps; + SimplifyAddOperands(Ops, Ty, SE); + } } - Ops = NewOps; - AnyNonZeroIndices |= !ScaledOps.empty(); + + // Record the scaled array index for this level of the type. If + // we didn't find any operands that could be factored, tentatively + // assume that element zero was selected (since the zero offset + // would obviously be folded away). Value *Scaled = ScaledOps.empty() ? Constant::getNullValue(Ty) : expandCodeFor(SE.getAddExpr(ScaledOps), Ty); GepIndices.push_back(Scaled); // Collect struct field index operands. - if (!Ops.empty()) - while (const StructType *STy = dyn_cast<StructType>(ElTy)) { + while (const StructType *STy = dyn_cast<StructType>(ElTy)) { + bool FoundFieldNo = false; + // An empty struct has no fields. + if (STy->getNumElements() == 0) break; + if (SE.TD) { + // With TargetData, field offsets are known. See if a constant offset + // falls within any of the struct fields. + if (Ops.empty()) break; if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) if (SE.getTypeSizeInBits(C->getType()) <= 64) { const StructLayout &SL = *SE.TD->getStructLayout(STy); uint64_t FullOffset = C->getValue()->getZExtValue(); if (FullOffset < SL.getSizeInBytes()) { unsigned ElIdx = SL.getElementContainingOffset(FullOffset); - GepIndices.push_back(ConstantInt::get(Type::Int32Ty, ElIdx)); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); ElTy = STy->getTypeAtIndex(ElIdx); Ops[0] = SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); AnyNonZeroIndices = true; - continue; + FoundFieldNo = true; } } - break; + } else { + // Without TargetData, just check for a SCEVFieldOffsetExpr of the + // appropriate struct type. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (const SCEVFieldOffsetExpr *FO = + dyn_cast<SCEVFieldOffsetExpr>(Ops[i])) + if (FO->getStructType() == STy) { + unsigned FieldNo = FO->getFieldNo(); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), + FieldNo)); + ElTy = STy->getTypeAtIndex(FieldNo); + Ops[i] = SE.getConstant(Ty, 0); + AnyNonZeroIndices = true; + FoundFieldNo = true; + break; + } + } + // If no struct field offsets were found, tentatively assume that + // field zero was selected (since the zero offset would obviously + // be folded away). + if (!FoundFieldNo) { + ElTy = STy->getTypeAtIndex(0u); + GepIndices.push_back( + Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); } + } - if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) { + if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) ElTy = ATy->getElementType(); - continue; - } - break; + else + break; } // If none of the operands were convertable to proper GEP indices, cast // the base to i8* and do an ugly getelementptr with that. It's still // better than ptrtoint+arithmetic+inttoptr at least. if (!AnyNonZeroIndices) { + // Cast the base to i8*. V = InsertNoopCastOfTo(V, - Type::Int8Ty->getPointerTo(PTy->getAddressSpace())); + Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + + // Expand the operands for a plain byte offset. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); // Fold a GEP with constant operands. @@ -345,12 +489,15 @@ Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin, } } - Value *GEP = Builder.CreateGEP(V, Idx, "scevgep"); + // Emit a GEP. + Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); InsertedValues.insert(GEP); return GEP; } - // Insert a pretty getelementptr. + // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, + // because ScalarEvolution may have changed the address arithmetic to + // compute a value which is beyond the end of the allocated object. Value *GEP = Builder.CreateGEP(V, GepIndices.begin(), GepIndices.end(), @@ -361,21 +508,37 @@ Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin, } Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { + int NumOperands = S->getNumOperands(); const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expand(S->getOperand(S->getNumOperands()-1)); + + // Find the index of an operand to start with. Choose the operand with + // pointer type, if there is one, or the last operand otherwise. + int PIdx = 0; + for (; PIdx != NumOperands - 1; ++PIdx) + if (isa<PointerType>(S->getOperand(PIdx)->getType())) break; + + // Expand code for the operand that we chose. + Value *V = expand(S->getOperand(PIdx)); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. - if (SE.TD) - if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) { - const SmallVectorImpl<const SCEV*> &Ops = S->getOperands(); - return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1], PTy, Ty, V); - } + if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) { + // Take the operand at PIdx out of the list. + const SmallVectorImpl<const SCEV *> &Ops = S->getOperands(); + SmallVector<const SCEV *, 8> NewOps; + NewOps.insert(NewOps.end(), Ops.begin(), Ops.begin() + PIdx); + NewOps.insert(NewOps.end(), Ops.begin() + PIdx + 1, Ops.end()); + // Make a GEP. + return expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, V); + } + // Otherwise, we'll expand the rest of the SCEVAddExpr as plain integer + // arithmetic. V = InsertNoopCastOfTo(V, Ty); // Emit a bunch of add instructions - for (int i = S->getNumOperands()-2; i >= 0; --i) { + for (int i = NumOperands-1; i >= 0; --i) { + if (i == PIdx) continue; Value *W = expandCodeFor(S->getOperand(i), Ty); V = InsertBinop(Instruction::Add, V, W); } @@ -422,7 +585,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { /// Move parts of Base into Rest to leave Base with the minimal /// expression that provides a pointer operand suitable for a /// GEP expansion. -static void ExposePointerBase(const SCEV* &Base, const SCEV* &Rest, +static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, ScalarEvolution &SE) { while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { Base = A->getStart(); @@ -433,7 +596,7 @@ static void ExposePointerBase(const SCEV* &Base, const SCEV* &Rest, } if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { Base = A->getOperand(A->getNumOperands()-1); - SmallVector<const SCEV*, 8> NewAddOps(A->op_begin(), A->op_end()); + SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end()); NewAddOps.back() = Rest; Rest = SE.getAddExpr(NewAddOps); ExposePointerBase(Base, Rest, SE); @@ -457,11 +620,11 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (CanonicalIV && SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty)) { - const SCEV *Start = SE.getAnyExtendExpr(S->getStart(), - CanonicalIV->getType()); - const SCEV *Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE), - CanonicalIV->getType()); - Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop())); + const SmallVectorImpl<const SCEV *> &Ops = S->getOperands(); + SmallVector<const SCEV *, 4> NewOps(Ops.size()); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + NewOps[i] = SE.getAnyExtendExpr(Ops[i], CanonicalIV->getType()); + Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop())); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = @@ -475,28 +638,26 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { - const SmallVectorImpl<const SCEV*> &SOperands = S->getOperands(); - SmallVector<const SCEV*, 4> NewOps(SOperands.begin(), SOperands.end()); + const SmallVectorImpl<const SCEV *> &SOperands = S->getOperands(); + SmallVector<const SCEV *, 4> NewOps(SOperands.begin(), SOperands.end()); NewOps[0] = SE.getIntegerSCEV(0, Ty); - const SCEV* Rest = SE.getAddRecExpr(NewOps, L); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. - if (SE.TD) { - const SCEV* Base = S->getStart(); - const SCEV* RestArray[1] = { Rest }; - // Dig into the expression to find the pointer base for a GEP. - ExposePointerBase(Base, RestArray[0], SE); - // If we found a pointer, expand the AddRec with a GEP. - if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { - // Make sure the Base isn't something exotic, such as a multiplied - // or divided pointer value. In those cases, the result type isn't - // actually a pointer type. - if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { - Value *StartV = expand(Base); - assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); - return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); - } + const SCEV *Base = S->getStart(); + const SCEV *RestArray[1] = { Rest }; + // Dig into the expression to find the pointer base for a GEP. + ExposePointerBase(Base, RestArray[0], SE); + // If we found a pointer, expand the AddRec with a GEP. + if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { + // Make sure the Base isn't something exotic, such as a multiplied + // or divided pointer value. In those cases, the result type isn't + // actually a pointer type. + if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { + Value *StartV = expand(Base); + assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); + return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); } } @@ -519,29 +680,22 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // Create and insert the PHI node for the induction variable in the // specified loop. BasicBlock *Header = L->getHeader(); - BasicBlock *Preheader = L->getLoopPreheader(); PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin()); InsertedValues.insert(PN); - PN->addIncoming(Constant::getNullValue(Ty), Preheader); - pred_iterator HPI = pred_begin(Header); - assert(HPI != pred_end(Header) && "Loop with zero preds???"); - if (!L->contains(*HPI)) ++HPI; - assert(HPI != pred_end(Header) && L->contains(*HPI) && - "No backedge in loop?"); - - // Insert a unit add instruction right before the terminator corresponding - // to the back-edge. Constant *One = ConstantInt::get(Ty, 1); - Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next", - (*HPI)->getTerminator()); - InsertedValues.insert(Add); - - pred_iterator PI = pred_begin(Header); - if (*PI == Preheader) - ++PI; - PN->addIncoming(Add, *PI); - return PN; + for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); + HPI != HPE; ++HPI) + if (L->contains(*HPI)) { + // Insert a unit add instruction right before the terminator corresponding + // to the back-edge. + Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next", + (*HPI)->getTerminator()); + InsertedValues.insert(Add); + PN->addIncoming(Add, *HPI); + } else { + PN->addIncoming(Constant::getNullValue(Ty), *HPI); + } } // {0,+,F} --> {0,+,1} * F @@ -563,19 +717,19 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // folders, then expandCodeFor the closed form. This allows the folders to // simplify the expression without having to build a bunch of special code // into this folder. - const SCEV* IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV. + const SCEV *IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV. // Promote S up to the canonical IV type, if the cast is foldable. - const SCEV* NewS = S; - const SCEV* Ext = SE.getNoopOrAnyExtend(S, I->getType()); + const SCEV *NewS = S; + const SCEV *Ext = SE.getNoopOrAnyExtend(S, I->getType()); if (isa<SCEVAddRecExpr>(Ext)) NewS = Ext; - const SCEV* V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); + const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; // Truncate the result down to the original type, if needed. - const SCEV* T = SE.getTruncateOrNoop(V, Ty); + const SCEV *T = SE.getTruncateOrNoop(V, Ty); return expand(T); } @@ -607,9 +761,15 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { } Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *LHS = expandCodeFor(S->getOperand(0), Ty); - for (unsigned i = 1; i < S->getNumOperands(); ++i) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + const Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + if (S->getOperand(i)->getType() != Ty) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } Value *RHS = expandCodeFor(S->getOperand(i), Ty); Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp"); InsertedValues.insert(ICmp); @@ -617,13 +777,23 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { InsertedValues.insert(Sel); LHS = Sel; } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); return LHS; } Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *LHS = expandCodeFor(S->getOperand(0), Ty); - for (unsigned i = 1; i < S->getNumOperands(); ++i) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + const Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + if (S->getOperand(i)->getType() != Ty) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } Value *RHS = expandCodeFor(S->getOperand(i), Ty); Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp"); InsertedValues.insert(ICmp); @@ -631,10 +801,22 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { InsertedValues.insert(Sel); LHS = Sel; } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); return LHS; } -Value *SCEVExpander::expandCodeFor(const SCEV* SH, const Type *Ty) { +Value *SCEVExpander::visitFieldOffsetExpr(const SCEVFieldOffsetExpr *S) { + return ConstantExpr::getOffsetOf(S->getStructType(), S->getFieldNo()); +} + +Value *SCEVExpander::visitAllocSizeExpr(const SCEVAllocSizeExpr *S) { + return ConstantExpr::getSizeOf(S->getAllocType()); +} + +Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); if (Ty) { @@ -695,7 +877,7 @@ Value * SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty) { assert(Ty->isInteger() && "Can only insert integer induction variables!"); - const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), + const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), SE.getIntegerSCEV(1, Ty), L); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index 543306854ced..b7844f022765 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -17,7 +17,9 @@ #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -27,7 +29,7 @@ using namespace llvm; AbstractLatticeFunction::~AbstractLatticeFunction() {} /// PrintValue - Render the specified lattice value to the specified stream. -void AbstractLatticeFunction::PrintValue(LatticeVal V, std::ostream &OS) { +void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) { if (V == UndefVal) OS << "undefined"; else if (V == OverdefinedVal) @@ -87,7 +89,7 @@ void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) { /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. void SparseSolver::MarkBlockExecutable(BasicBlock *BB) { - DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n"; + DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n"); BBExecutable.insert(BB); // Basic block is executable! BBWorkList.push_back(BB); // Add the block to the work list! } @@ -98,8 +100,8 @@ void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) return; // This edge is already known to be executable! - DOUT << "Marking Edge Executable: " << Source->getNameStart() - << " -> " << Dest->getNameStart() << "\n"; + DEBUG(errs() << "Marking Edge Executable: " << Source->getName() + << " -> " << Dest->getName() << "\n"); if (BBExecutable.count(Dest)) { // The destination is already executable, but we just made an edge @@ -153,7 +155,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, } // Constant condition variables mean the branch can only go a single way - Succs[C == ConstantInt::getFalse()] = true; + Succs[C == ConstantInt::getFalse(*Context)] = true; return; } @@ -221,6 +223,16 @@ void SparseSolver::visitTerminatorInst(TerminatorInst &TI) { } void SparseSolver::visitPHINode(PHINode &PN) { + // The lattice function may store more information on a PHINode than could be + // computed from its incoming values. For example, SSI form stores its sigma + // functions as PHINodes with a single incoming value. + if (LatticeFunc->IsSpecialCasedPHI(&PN)) { + LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this); + if (IV != LatticeFunc->getUntrackedVal()) + UpdateState(PN, IV); + return; + } + LatticeVal PNIV = getOrInitValueState(&PN); LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); @@ -283,7 +295,7 @@ void SparseSolver::Solve(Function &F) { Instruction *I = InstWorkList.back(); InstWorkList.pop_back(); - DOUT << "\nPopped off I-WL: " << *I; + DEBUG(errs() << "\nPopped off I-WL: " << *I << "\n"); // "I" got into the work list because it made a transition. See if any // users are both live and in need of updating. @@ -300,7 +312,7 @@ void SparseSolver::Solve(Function &F) { BasicBlock *BB = BBWorkList.back(); BBWorkList.pop_back(); - DOUT << "\nPopped off BBWL: " << *BB; + DEBUG(errs() << "\nPopped off BBWL: " << *BB); // Notify all instructions in this basic block that they are newly // executable. @@ -310,7 +322,7 @@ void SparseSolver::Solve(Function &F) { } } -void SparseSolver::Print(Function &F, std::ostream &OS) const { +void SparseSolver::Print(Function &F, raw_ostream &OS) const { OS << "\nFUNCTION: " << F.getNameStr() << "\n"; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (!BBExecutable.count(BB)) @@ -322,7 +334,7 @@ void SparseSolver::Print(Function &F, std::ostream &OS) const { OS << "; anon bb\n"; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { LatticeFunc->PrintValue(getLatticeState(I), OS); - OS << *I; + OS << *I << "\n"; } OS << "\n"; diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index 8f19fda953dd..c9b303b48b28 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -18,7 +18,7 @@ #include "llvm/Analysis/Trace.h" #include "llvm/Function.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; Function *Trace::getFunction() const { @@ -31,9 +31,9 @@ Module *Trace::getModule() const { /// print - Write trace to output stream. /// -void Trace::print(std::ostream &O) const { - Function *F = getFunction (); - O << "; Trace from function " << F->getName() << ", blocks:\n"; +void Trace::print(raw_ostream &O) const { + Function *F = getFunction(); + O << "; Trace from function " << F->getNameStr() << ", blocks:\n"; for (const_iterator i = begin(), e = end(); i != e; ++i) { O << "; "; WriteAsOperand(O, *i, true, getModule()); @@ -46,5 +46,5 @@ void Trace::print(std::ostream &O) const { /// output stream. /// void Trace::dump() const { - print(cerr); + print(errs()); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 07a18fe4de42..baa347a6638f 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -16,25 +16,16 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include <cstring> using namespace llvm; -/// getOpcode - If this is an Instruction or a ConstantExpr, return the -/// opcode value. Otherwise return UserOp1. -static unsigned getOpcode(const Value *V) { - if (const Instruction *I = dyn_cast<Instruction>(V)) - return I->getOpcode(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - return CE->getOpcode(); - // Use UserOp1 to mean there's no opcode. - return Instruction::UserOp1; -} - - /// ComputeMaskedBits - Determine which of the bits specified in Mask are /// known to be either zero or one and return them in the KnownZero/KnownOne /// bit sets. This code only analyzes bits in Mask, in order to short-circuit @@ -45,9 +36,15 @@ static unsigned getOpcode(const Value *V) { /// optimized based on the contradictory assumption that it is non-zero. /// Because instcombine aggressively folds operations with undef args anyway, /// this won't lose us code quality. +/// +/// This function is defined on values with integer type, values with pointer +/// type (but only if TD is non-null), and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, APInt &KnownOne, - TargetData *TD, unsigned Depth) { + const TargetData *TD, unsigned Depth) { const unsigned MaxDepth = 6; assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); @@ -91,8 +88,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { unsigned Align = GV->getAlignment(); - if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) - Align = TD->getPrefTypeAlignment(GV->getType()->getElementType()); + if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) { + const Type *ObjectType = GV->getType()->getElementType(); + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (!GV->isDeclaration() && !GV->mayBeOverridden()) + Align = TD->getPrefTypeAlignment(ObjectType); + else + Align = TD->getABITypeAlignment(ObjectType); + } if (Align > 0) KnownZero = Mask & APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); @@ -101,17 +106,28 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownOne.clear(); return; } + // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has + // the bits of its aliasee. + if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) { + KnownZero.clear(); KnownOne.clear(); + } else { + ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne, + TD, Depth+1); + } + return; + } KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything. if (Depth == MaxDepth || Mask == 0) return; // Limit search depth. - User *I = dyn_cast<User>(V); + Operator *I = dyn_cast<Operator>(V); if (!I) return; APInt KnownZero2(KnownZero), KnownOne2(KnownOne); - switch (getOpcode(I)) { + switch (I->getOpcode()) { default: break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. @@ -228,12 +244,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // FALL THROUGH and handle them the same as zext/trunc. case Instruction::ZExt: case Instruction::Trunc: { + const Type *SrcTy = I->getOperand(0)->getType(); + + unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - const Type *SrcTy = I->getOperand(0)->getType(); - unsigned SrcBitWidth = TD ? - TD->getTypeSizeInBits(SrcTy) : - SrcTy->getScalarSizeInBits(); + if (isa<PointerType>(SrcTy)) + SrcBitWidth = TD->getTypeSizeInBits(SrcTy); + else + SrcBitWidth = SrcTy->getScalarSizeInBits(); + APInt MaskIn(Mask); MaskIn.zextOrTrunc(SrcBitWidth); KnownZero.zextOrTrunc(SrcBitWidth); @@ -261,8 +281,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } case Instruction::SExt: { // Compute the bits in the result that are not present in the input. - const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType()); - unsigned SrcBitWidth = SrcTy->getBitWidth(); + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); APInt MaskIn(Mask); MaskIn.trunc(SrcBitWidth); @@ -382,7 +401,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Determine which operand has more trailing zeros, and use that // many bits from the other operand. if (LHSKnownZeroOut > RHSKnownZeroOut) { - if (getOpcode(I) == Instruction::Add) { + if (I->getOpcode() == Instruction::Add) { APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); KnownZero |= KnownZero2 & Mask; KnownOne |= KnownOne2 & Mask; @@ -462,10 +481,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, Align = TD->getABITypeAlignment(AI->getType()->getElementType()); Align = std::max(Align, - (unsigned)TD->getABITypeAlignment(Type::DoubleTy)); + (unsigned)TD->getABITypeAlignment( + Type::getDoubleTy(V->getContext()))); Align = std::max(Align, - (unsigned)TD->getABITypeAlignment(Type::Int64Ty)); + (unsigned)TD->getABITypeAlignment( + Type::getInt64Ty(V->getContext()))); } } @@ -522,10 +543,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, for (unsigned i = 0; i != 2; ++i) { Value *L = P->getIncomingValue(i); Value *R = P->getIncomingValue(!i); - User *LU = dyn_cast<User>(L); + Operator *LU = dyn_cast<Operator>(L); if (!LU) continue; - unsigned Opcode = getOpcode(LU); + unsigned Opcode = LU->getOpcode(); // Check for operations that have the property that if // both their operands have low zero bits, the result // will have low zero bits. @@ -608,8 +629,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be zero /// for bits that V cannot have. +/// +/// This function is defined on values with integer type, values with pointer +/// type (but only if TD is non-null), and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, - TargetData *TD, unsigned Depth) { + const TargetData *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); @@ -626,7 +653,8 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, /// /// 'Op' must have a scalar integer type. /// -unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) { +unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, + unsigned Depth) { assert((TD || V->getType()->isIntOrIntVector()) && "ComputeNumSignBits requires a TargetData object to operate " "on non-integer values!"); @@ -642,8 +670,8 @@ unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) { if (Depth == 6) return 1; // Limit search depth. - User *U = dyn_cast<User>(V); - switch (getOpcode(V)) { + Operator *U = dyn_cast<Operator>(V); + switch (Operator::getOpcode(V)) { default: break; case Instruction::SExt: Tmp = TyBits-cast<IntegerType>(U->getOperand(0)->getType())->getBitWidth(); @@ -789,7 +817,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (Depth == 6) return 1; // Limit search depth. - const Instruction *I = dyn_cast<Instruction>(V); + const Operator *I = dyn_cast<Operator>(V); if (I == 0) return false; // (add x, 0.0) is guaranteed to return +0.0, not -0.0. @@ -810,15 +838,15 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (const CallInst *CI = dyn_cast<CallInst>(I)) if (const Function *F = CI->getCalledFunction()) { if (F->isDeclaration()) { - switch (F->getNameLen()) { - case 3: // abs(x) != -0.0 - if (!strcmp(F->getNameStart(), "abs")) return true; - break; - case 4: // abs[lf](x) != -0.0 - if (!strcmp(F->getNameStart(), "absf")) return true; - if (!strcmp(F->getNameStart(), "absl")) return true; - break; - } + // abs(x) != -0.0 + if (F->getName() == "abs") return true; + // fabs[lf](x) != -0.0 + if (F->getName() == "fabs") return true; + if (F->getName() == "fabsf") return true; + if (F->getName() == "fabsl") return true; + if (F->getName() == "sqrt" || F->getName() == "sqrtf" || + F->getName() == "sqrtl") + return CannotBeNegativeZero(CI->getOperand(1), Depth+1); } } @@ -831,10 +859,11 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { // indices from Idxs that should be left out when inserting into the resulting // struct. To is the result struct built so far, new insertvalue instructions // build on that. -Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, - SmallVector<unsigned, 10> &Idxs, - unsigned IdxSkip, - Instruction *InsertBefore) { +static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, + SmallVector<unsigned, 10> &Idxs, + unsigned IdxSkip, + LLVMContext &Context, + Instruction *InsertBefore) { const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType); if (STy) { // Save the original To argument so we can modify it @@ -845,7 +874,7 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, Idxs.push_back(i); Value *PrevTo = To; To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, - InsertBefore); + Context, InsertBefore); Idxs.pop_back(); if (!To) { // Couldn't find any inserted value for this index? Cleanup @@ -868,7 +897,7 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, // we might be able to find the complete struct somewhere. // Find the value that is at that particular spot - Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end()); + Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end(), Context); if (!V) return NULL; @@ -890,8 +919,9 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, // insertvalue instruction somewhere). // // All inserted insertvalue instructions are inserted before InsertBefore -Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, - const unsigned *idx_end, Instruction *InsertBefore) { +static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, + const unsigned *idx_end, LLVMContext &Context, + Instruction *InsertBefore) { assert(InsertBefore && "Must have someplace to insert!"); const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), idx_begin, @@ -900,7 +930,8 @@ Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, SmallVector<unsigned, 10> Idxs(idx_begin, idx_end); unsigned IdxSkip = Idxs.size(); - return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); + return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, + Context, InsertBefore); } /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if @@ -910,7 +941,8 @@ Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, /// If InsertBefore is not null, this function will duplicate (modified) /// insertvalues when a part of a nested struct is extracted. Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, - const unsigned *idx_end, Instruction *InsertBefore) { + const unsigned *idx_end, LLVMContext &Context, + Instruction *InsertBefore) { // Nothing to index? Just return V then (this is useful at the end of our // recursion) if (idx_begin == idx_end) @@ -921,20 +953,20 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end) && "Invalid indices for type?"); const CompositeType *PTy = cast<CompositeType>(V->getType()); - + if (isa<UndefValue>(V)) return UndefValue::get(ExtractValueInst::getIndexedType(PTy, idx_begin, idx_end)); else if (isa<ConstantAggregateZero>(V)) return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, - idx_begin, - idx_end)); + idx_begin, + idx_end)); else if (Constant *C = dyn_cast<Constant>(V)) { if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) // Recursively process this constant - return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, idx_end, - InsertBefore); + return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, + idx_end, Context, InsertBefore); } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices @@ -953,7 +985,8 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, // %C = insertvalue {i32, i32 } %A, i32 11, 1 // which allows the unused 0,0 element from the nested struct to be // removed. - return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore); + return BuildSubAggregate(V, idx_begin, req_idx, + Context, InsertBefore); else // We can't handle this without inserting insertvalues return 0; @@ -964,13 +997,13 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, // looking for, then. if (*req_idx != *i) return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end, - InsertBefore); + Context, InsertBefore); } // If we end up here, the indices of the insertvalue match with those // requested (though possibly only partially). Now we recursively look at // the inserted value, passing any remaining indices. return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end, - InsertBefore); + Context, InsertBefore); } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { // If we're extracting a value from an aggregrate that was extracted from // something else, we can extract from that something else directly instead. @@ -994,7 +1027,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, && "Number of indices added not correct?"); return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(), - InsertBefore); + Context, InsertBefore); } // Otherwise, we don't know (such as, extracting from a function return value // or load instruction) @@ -1035,7 +1068,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Make sure the index-ee is a pointer to array of i8. const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); - if (AT == 0 || AT->getElementType() != Type::Int8Ty) + if (AT == 0 || AT->getElementType() != Type::getInt8Ty(V->getContext())) return false; // Check to make sure that the first operand of the GEP is an integer and @@ -1056,11 +1089,16 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, StopAtNul); } + if (MDString *MDStr = dyn_cast<MDString>(V)) { + Str = MDStr->getString(); + return true; + } + // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. GlobalVariable* GV = dyn_cast<GlobalVariable>(V); - if (!GV || !GV->isConstant() || !GV->hasInitializer()) + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; Constant *GlobalInit = GV->getInitializer(); @@ -1074,7 +1112,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Must be a Constant Array ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (Array == 0 || Array->getType()->getElementType() != Type::Int8Ty) + if (Array == 0 || + Array->getType()->getElementType() != Type::getInt8Ty(V->getContext())) return false; // Get the number of elements in the array |