diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2011-02-20 12:57:14 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2011-02-20 12:57:14 +0000 |
commit | cf099d11218cb6f6c5cce947d6738e347f07fb12 (patch) | |
tree | d2b61ce94e654cb01a254d2195259db5f9cc3f3c /lib/Analysis | |
parent | 49011b52fcba02a6051957b84705159f52fae4e4 (diff) | |
download | src-cf099d11218cb6f6c5cce947d6738e347f07fb12.tar.gz src-cf099d11218cb6f6c5cce947d6738e347f07fb12.zip |
Vendor import of llvm trunk r126079:vendor/llvm/llvm-r126079
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=218885
svn path=/vendor/llvm/llvm-r126079/; revision=218886; tag=vendor/llvm/llvm-r126079
Diffstat (limited to 'lib/Analysis')
57 files changed, 8388 insertions, 2481 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 1f2528fa560f..be02ddbaa534 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -30,12 +30,13 @@ #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Target/TargetData.h" using namespace llvm; // Register the AliasAnalysis interface, providing a nice name to refer to. -static RegisterAnalysisGroup<AliasAnalysis> Z("Alias Analysis"); +INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA) char AliasAnalysis::ID = 0; //===----------------------------------------------------------------------===// @@ -43,15 +44,15 @@ char AliasAnalysis::ID = 0; //===----------------------------------------------------------------------===// AliasAnalysis::AliasResult -AliasAnalysis::alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { +AliasAnalysis::alias(const Location &LocA, const Location &LocB) { assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->alias(V1, V1Size, V2, V2Size); + return AA->alias(LocA, LocB); } -bool AliasAnalysis::pointsToConstantMemory(const Value *P) { +bool AliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->pointsToConstantMemory(P); + return AA->pointsToConstantMemory(Loc, OrLocal); } void AliasAnalysis::deleteValue(Value *V) { @@ -64,49 +65,55 @@ void AliasAnalysis::copyValue(Value *From, Value *To) { AA->copyValue(From, To); } +void AliasAnalysis::addEscapingUse(Use &U) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->addEscapingUse(U); +} + + AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(ImmutableCallSite CS, - const Value *P, unsigned Size) { - // Don't assert AA because BasicAA calls us in order to make use of the - // logic here. + const Location &Loc) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); ModRefBehavior MRB = getModRefBehavior(CS); if (MRB == DoesNotAccessMemory) return NoModRef; ModRefResult Mask = ModRef; - if (MRB == OnlyReadsMemory) + if (onlyReadsMemory(MRB)) Mask = Ref; - else if (MRB == AliasAnalysis::AccessesArguments) { + + if (onlyAccessesArgPointees(MRB)) { bool doesAlias = false; - for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - AI != AE; ++AI) - if (!isNoAlias(*AI, ~0U, P, Size)) { - doesAlias = true; - break; - } + if (doesAccessArgPointees(MRB)) + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) + if (!isNoAlias(Location(*AI), Loc)) { + doesAlias = true; + break; + } if (!doesAlias) return NoModRef; } - // If P points to a constant memory location, the call definitely could not + // If Loc is a constant memory location, the call definitely could not // modify the memory location. - if ((Mask & Mod) && pointsToConstantMemory(P)) + if ((Mask & Mod) && pointsToConstantMemory(Loc)) Mask = ModRefResult(Mask & ~Mod); - // If this is BasicAA, don't forward. + // If this is the end of the chain, don't forward. if (!AA) return Mask; // Otherwise, fall back to the next AA in the chain. But we can merge // in any mask we've managed to compute. - return ModRefResult(AA->getModRefInfo(CS, P, Size) & Mask); + return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask); } AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { - // Don't assert AA because BasicAA calls us in order to make use of the - // logic here. + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); // If CS1 or CS2 are readnone, they don't interact. ModRefBehavior CS1B = getModRefBehavior(CS1); @@ -116,45 +123,47 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { if (CS2B == DoesNotAccessMemory) return NoModRef; // If they both only read from memory, there is no dependence. - if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory) + if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B)) return NoModRef; AliasAnalysis::ModRefResult Mask = ModRef; // If CS1 only reads memory, the only dependence on CS2 can be // from CS1 reading memory written by CS2. - if (CS1B == OnlyReadsMemory) + if (onlyReadsMemory(CS1B)) Mask = ModRefResult(Mask & Ref); // If CS2 only access memory through arguments, accumulate the mod/ref // information from CS1's references to the memory referenced by // CS2's arguments. - if (CS2B == AccessesArguments) { + if (onlyAccessesArgPointees(CS2B)) { AliasAnalysis::ModRefResult R = NoModRef; - for (ImmutableCallSite::arg_iterator - I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { - R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask); - if (R == Mask) - break; - } + if (doesAccessArgPointees(CS2B)) + for (ImmutableCallSite::arg_iterator + I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { + R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask); + if (R == Mask) + break; + } return R; } // If CS1 only accesses memory through arguments, check if CS2 references // any of the memory referenced by CS1's arguments. If not, return NoModRef. - if (CS1B == AccessesArguments) { + if (onlyAccessesArgPointees(CS1B)) { AliasAnalysis::ModRefResult R = NoModRef; - for (ImmutableCallSite::arg_iterator - I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) - if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) { - R = Mask; - break; - } + if (doesAccessArgPointees(CS1B)) + for (ImmutableCallSite::arg_iterator + I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) + if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) { + R = Mask; + break; + } if (R == NoModRef) return R; } - // If this is BasicAA, don't forward. + // If this is the end of the chain, don't forward. if (!AA) return Mask; // Otherwise, fall back to the next AA in the chain. But we can merge @@ -164,8 +173,7 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { AliasAnalysis::ModRefBehavior AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { - // Don't assert AA because BasicAA calls us in order to make use of the - // logic here. + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); ModRefBehavior Min = UnknownModRefBehavior; @@ -174,12 +182,12 @@ AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { if (const Function *F = CS.getCalledFunction()) Min = getModRefBehavior(F); - // If this is BasicAA, don't forward. + // If this is the end of the chain, don't forward. if (!AA) return Min; // Otherwise, fall back to the next AA in the chain. But we can merge // in any result we've managed to compute. - return std::min(AA->getModRefBehavior(CS), Min); + return ModRefBehavior(AA->getModRefBehavior(CS) & Min); } AliasAnalysis::ModRefBehavior @@ -188,20 +196,66 @@ AliasAnalysis::getModRefBehavior(const Function *F) { return AA->getModRefBehavior(F); } - //===----------------------------------------------------------------------===// // AliasAnalysis non-virtual helper method implementation //===----------------------------------------------------------------------===// +AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) { + return Location(LI->getPointerOperand(), + getTypeStoreSize(LI->getType()), + LI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) { + return Location(SI->getPointerOperand(), + getTypeStoreSize(SI->getValueOperand()->getType()), + SI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) { + return Location(VI->getPointerOperand(), + UnknownSize, + VI->getMetadata(LLVMContext::MD_tbaa)); +} + + +AliasAnalysis::Location +AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) { + uint64_t Size = UnknownSize; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Size = C->getValue().getZExtValue(); + + // memcpy/memmove can have TBAA tags. For memcpy, they apply + // to both the source and the destination. + MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa); + + return Location(MTI->getRawSource(), Size, TBAATag); +} + +AliasAnalysis::Location +AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) { + uint64_t Size = UnknownSize; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Size = C->getValue().getZExtValue(); + + // memcpy/memmove can have TBAA tags. For memcpy, they apply + // to both the source and the destination. + MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa); + + return Location(MTI->getRawDest(), Size, TBAATag); +} + + + AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const LoadInst *L, const Value *P, unsigned Size) { +AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { // Be conservative in the face of volatile. if (L->isVolatile()) return ModRef; // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. - if (!alias(L->getOperand(0), getTypeStoreSize(L->getType()), P, Size)) + if (!alias(getLocation(L), Loc)) return NoModRef; // Otherwise, a load just reads. @@ -209,20 +263,19 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Value *P, unsigned Size) { } AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const StoreInst *S, const Value *P, unsigned Size) { +AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { // Be conservative in the face of volatile. if (S->isVolatile()) return ModRef; // If the store address cannot alias the pointer in question, then the // specified memory cannot be modified by the store. - if (!alias(S->getOperand(1), - getTypeStoreSize(S->getOperand(0)->getType()), P, Size)) + if (!alias(getLocation(S), Loc)) return NoModRef; // If the pointer is a pointer to constant memory, then it could not have been // modified by this store. - if (pointsToConstantMemory(P)) + if (pointsToConstantMemory(Loc)) return NoModRef; // Otherwise, a store just writes. @@ -230,29 +283,21 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Value *P, unsigned Size) } AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const VAArgInst *V, const Value *P, unsigned Size) { +AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) { // If the va_arg address cannot alias the pointer in question, then the // specified memory cannot be accessed by the va_arg. - if (!alias(V->getOperand(0), UnknownSize, P, Size)) + if (!alias(getLocation(V), Loc)) return NoModRef; // If the pointer is a pointer to constant memory, then it could not have been // modified by this va_arg. - if (pointsToConstantMemory(P)) + if (pointsToConstantMemory(Loc)) return NoModRef; // Otherwise, a va_arg reads and writes. return ModRef; } - -AliasAnalysis::ModRefBehavior -AliasAnalysis::getIntrinsicModRefBehavior(unsigned iid) { -#define GET_INTRINSIC_MODREF_BEHAVIOR -#include "llvm/Intrinsics.gen" -#undef GET_INTRINSIC_MODREF_BEHAVIOR -} - // AliasAnalysis destructor: DO NOT move this to the header file for // AliasAnalysis or else clients of the AliasAnalysis class may not depend on // the AliasAnalysis.o file in the current .a file, causing alias analysis @@ -277,16 +322,16 @@ void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { /// getTypeStoreSize - Return the TargetData store size for the given type, /// if known, or a conservative value otherwise. /// -unsigned AliasAnalysis::getTypeStoreSize(const Type *Ty) { - return TD ? TD->getTypeStoreSize(Ty) : ~0u; +uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) { + return TD ? TD->getTypeStoreSize(Ty) : UnknownSize; } /// canBasicBlockModify - Return true if it is possible for execution of the /// specified basic block to modify the value pointed to by Ptr. /// bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, - const Value *Ptr, unsigned Size) { - return canInstructionRangeModify(BB.front(), BB.back(), Ptr, Size); + const Location &Loc) { + return canInstructionRangeModify(BB.front(), BB.back(), Loc); } /// canInstructionRangeModify - Return true if it is possible for the execution @@ -296,7 +341,7 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, /// bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, const Instruction &I2, - const Value *Ptr, unsigned Size) { + const Location &Loc) { assert(I1.getParent() == I2.getParent() && "Instructions not in same basic block!"); BasicBlock::const_iterator I = &I1; @@ -304,7 +349,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, ++E; // Convert from inclusive to exclusive range. for (; I != E; ++I) // Check every instruction in range - if (getModRefInfo(I, Ptr, Size) & Mod) + if (getModRefInfo(I, Loc) & Mod) return true; return false; } @@ -336,9 +381,3 @@ bool llvm::isIdentifiedObject(const Value *V) { return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } - -// Because of the way .a files work, we must force the BasicAA implementation to -// be pulled in if the AliasAnalysis classes are pulled in. Otherwise we run -// the risk of AliasAnalysis being used, but the default implementation not -// being linked into the tool that uses it. -DEFINING_FILE_FOR(AliasAnalysis) diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index b17804186a63..d947220e078d 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -29,13 +29,14 @@ PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden); namespace { class AliasAnalysisCounter : public ModulePass, public AliasAnalysis { - unsigned No, May, Must; + unsigned No, May, Partial, Must; unsigned NoMR, JustRef, JustMod, MR; Module *M; public: static char ID; // Class identification, replacement for typeinfo AliasAnalysisCounter() : ModulePass(ID) { - No = May = Must = 0; + initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry()); + No = May = Partial = Must = 0; NoMR = JustRef = JustMod = MR = 0; } @@ -44,7 +45,7 @@ namespace { << Val*100/Sum << "%)\n"; } ~AliasAnalysisCounter() { - unsigned AASum = No+May+Must; + unsigned AASum = No+May+Partial+Must; unsigned MRSum = NoMR+JustRef+JustMod+MR; if (AASum + MRSum) { // Print a report if any counted queries occurred... errs() << "\n===== Alias Analysis Counter Report =====\n" @@ -53,9 +54,12 @@ namespace { if (AASum) { printLine("no alias", No, AASum); printLine("may alias", May, AASum); + printLine("partial alias", Partial, AASum); printLine("must alias", Must, AASum); errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" - << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n"; + << May*100/AASum << "%/" + << Partial*100/AASum << "%/" + << Must*100/AASum<<"%\n\n"; } errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n"; @@ -94,17 +98,16 @@ namespace { } // FIXME: We could count these too... - bool pointsToConstantMemory(const Value *P) { - return getAnalysis<AliasAnalysis>().pointsToConstantMemory(P); + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) { + return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal); } // Forwarding functions: just delegate to a real AA implementation, counting // the number of responses... - AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size); + AliasResult alias(const Location &LocA, const Location &LocB); ModRefResult getModRefInfo(ImmutableCallSite CS, - const Value *P, unsigned Size); + const Location &Loc); ModRefResult getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { return AliasAnalysis::getModRefInfo(CS1,CS2); @@ -114,32 +117,32 @@ namespace { char AliasAnalysisCounter::ID = 0; INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa", - "Count Alias Analysis Query Responses", false, true, false); + "Count Alias Analysis Query Responses", false, true, false) ModulePass *llvm::createAliasAnalysisCounterPass() { return new AliasAnalysisCounter(); } AliasAnalysis::AliasResult -AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { - AliasResult R = getAnalysis<AliasAnalysis>().alias(V1, V1Size, V2, V2Size); +AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) { + AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB); const char *AliasString; switch (R) { default: llvm_unreachable("Unknown alias type!"); case NoAlias: No++; AliasString = "No alias"; break; case MayAlias: May++; AliasString = "May alias"; break; + case PartialAlias: Partial++; AliasString = "Partial alias"; break; case MustAlias: Must++; AliasString = "Must alias"; break; } if (PrintAll || (PrintAllFailures && R == MayAlias)) { errs() << AliasString << ":\t"; - errs() << "[" << V1Size << "B] "; - WriteAsOperand(errs(), V1, true, M); + errs() << "[" << LocA.Size << "B] "; + WriteAsOperand(errs(), LocA.Ptr, true, M); errs() << ", "; - errs() << "[" << V2Size << "B] "; - WriteAsOperand(errs(), V2, true, M); + errs() << "[" << LocB.Size << "B] "; + WriteAsOperand(errs(), LocB.Ptr, true, M); errs() << "\n"; } @@ -148,8 +151,8 @@ AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size, AliasAnalysis::ModRefResult AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, - const Value *P, unsigned Size) { - ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, P, Size); + const Location &Loc) { + ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc); const char *MRString; switch (R) { @@ -162,8 +165,8 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, if (PrintAll || (PrintAllFailures && R == ModRef)) { errs() << MRString << ": Ptr: "; - errs() << "[" << Size << "B] "; - WriteAsOperand(errs(), P, true, M); + errs() << "[" << Loc.Size << "B] "; + WriteAsOperand(errs(), Loc.Ptr, true, M); errs() << "\t<->" << *CS.getInstruction() << '\n'; } return R; diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index ce363cbc7bbd..1afc1b71d93e 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -36,6 +36,7 @@ static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden); static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden); static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden); +static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden); static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden); static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden); @@ -45,12 +46,14 @@ static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden); namespace { class AAEval : public FunctionPass { - unsigned NoAlias, MayAlias, MustAlias; + unsigned NoAlias, MayAlias, PartialAlias, MustAlias; unsigned NoModRef, Mod, Ref, ModRef; public: static char ID; // Pass identification, replacement for typeid - AAEval() : FunctionPass(ID) {} + AAEval() : FunctionPass(ID) { + initializeAAEvalPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); @@ -58,11 +61,12 @@ namespace { } bool doInitialization(Module &M) { - NoAlias = MayAlias = MustAlias = 0; + NoAlias = MayAlias = PartialAlias = MustAlias = 0; NoModRef = Mod = Ref = ModRef = 0; if (PrintAll) { - PrintNoAlias = PrintMayAlias = PrintMustAlias = true; + PrintNoAlias = PrintMayAlias = true; + PrintPartialAlias = PrintMustAlias = true; PrintNoModRef = PrintMod = PrintRef = PrintModRef = true; } return false; @@ -74,8 +78,11 @@ namespace { } char AAEval::ID = 0; -INITIALIZE_PASS(AAEval, "aa-eval", - "Exhaustive Alias Analysis Precision Evaluator", false, true); +INITIALIZE_PASS_BEGIN(AAEval, "aa-eval", + "Exhaustive Alias Analysis Precision Evaluator", false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(AAEval, "aa-eval", + "Exhaustive Alias Analysis Precision Evaluator", false, true) FunctionPass *llvm::createAAEvalPass() { return new AAEval(); } @@ -155,7 +162,7 @@ bool AAEval::runOnFunction(Function &F) { } } - if (PrintNoAlias || PrintMayAlias || PrintMustAlias || + if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias || PrintNoModRef || PrintMod || PrintRef || PrintModRef) errs() << "Function: " << F.getName() << ": " << Pointers.size() << " pointers, " << CallSites.size() << " call sites\n"; @@ -163,12 +170,12 @@ bool AAEval::runOnFunction(Function &F) { // iterate over the worklist, and run the full (n^2)/2 disambiguations for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { - unsigned I1Size = ~0u; + uint64_t I1Size = AliasAnalysis::UnknownSize; const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { - unsigned I2Size = ~0u; + uint64_t I2Size = AliasAnalysis::UnknownSize; const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); @@ -179,6 +186,10 @@ bool AAEval::runOnFunction(Function &F) { case AliasAnalysis::MayAlias: PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent()); ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; case AliasAnalysis::MustAlias: PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); ++MustAlias; break; @@ -195,7 +206,7 @@ bool AAEval::runOnFunction(Function &F) { for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end(); V != Ve; ++V) { - unsigned Size = ~0u; + uint64_t Size = AliasAnalysis::UnknownSize; const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); @@ -250,7 +261,7 @@ static void PrintPercent(unsigned Num, unsigned Sum) { } bool AAEval::doFinalization(Module &M) { - unsigned AliasSum = NoAlias + MayAlias + MustAlias; + unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias; errs() << "===== Alias Analysis Evaluator Report =====\n"; if (AliasSum == 0) { errs() << " Alias Analysis Evaluator Summary: No pointers!\n"; @@ -260,10 +271,13 @@ bool AAEval::doFinalization(Module &M) { PrintPercent(NoAlias, AliasSum); errs() << " " << MayAlias << " may alias responses "; PrintPercent(MayAlias, AliasSum); + errs() << " " << PartialAlias << " partial alias responses "; + PrintPercent(PartialAlias, AliasSum); errs() << " " << MustAlias << " must alias responses "; PrintPercent(MustAlias, AliasSum); errs() << " Alias Analysis Evaluator Pointer Alias Summary: " << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/" + << PartialAlias*100/AliasSum << "%/" << MustAlias*100/AliasSum << "%\n"; } diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index b9fe64608c01..f15c05153e10 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -39,7 +39,9 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - AliasDebugger() : ModulePass(ID) {} + AliasDebugger() : ModulePass(ID) { + initializeAliasDebuggerPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M) { InitializeAliasAnalysis(this); // set up super class @@ -92,17 +94,18 @@ namespace { //------------------------------------------------ // Implement the AliasAnalysis API // - AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { - assert(Vals.find(V1) != Vals.end() && "Never seen value in AA before"); - assert(Vals.find(V2) != Vals.end() && "Never seen value in AA before"); - return AliasAnalysis::alias(V1, V1Size, V2, V2Size); + AliasResult alias(const Location &LocA, const Location &LocB) { + assert(Vals.find(LocA.Ptr) != Vals.end() && + "Never seen value in AA before"); + assert(Vals.find(LocB.Ptr) != Vals.end() && + "Never seen value in AA before"); + return AliasAnalysis::alias(LocA, LocB); } ModRefResult getModRefInfo(ImmutableCallSite CS, - const Value *P, unsigned Size) { - assert(Vals.find(P) != Vals.end() && "Never seen value in AA before"); - return AliasAnalysis::getModRefInfo(CS, P, Size); + const Location &Loc) { + assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::getModRefInfo(CS, Loc); } ModRefResult getModRefInfo(ImmutableCallSite CS1, @@ -110,9 +113,9 @@ namespace { return AliasAnalysis::getModRefInfo(CS1,CS2); } - bool pointsToConstantMemory(const Value *P) { - assert(Vals.find(P) != Vals.end() && "Never seen value in AA before"); - return AliasAnalysis::pointsToConstantMemory(P); + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) { + assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); } virtual void deleteValue(Value *V) { @@ -129,7 +132,7 @@ namespace { char AliasDebugger::ID = 0; INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa", - "AA use debugger", false, true, false); + "AA use debugger", false, true, false) Pass *llvm::createAliasDebugger() { return new AliasDebugger(); } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index e74543bb508a..3a46976d66f7 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Type.h" #include "llvm/Target/TargetData.h" @@ -45,7 +46,12 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { PointerRec *R = AS.getSomePointer(); // If the pointers are not a must-alias pair, this set becomes a may alias. - if (AA.alias(L->getValue(), L->getSize(), R->getValue(), R->getSize()) + if (AA.alias(AliasAnalysis::Location(L->getValue(), + L->getSize(), + L->getTBAAInfo()), + AliasAnalysis::Location(R->getValue(), + R->getSize(), + R->getTBAAInfo())) != AliasAnalysis::MustAlias) AliasTy = MayAlias; } @@ -87,7 +93,8 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) { } void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, - unsigned Size, bool KnownMustAlias) { + uint64_t Size, const MDNode *TBAAInfo, + bool KnownMustAlias) { assert(!Entry.hasAliasSet() && "Entry already in set!"); // Check to see if we have to downgrade to _may_ alias. @@ -95,16 +102,18 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, if (PointerRec *P = getSomePointer()) { AliasAnalysis &AA = AST.getAliasAnalysis(); AliasAnalysis::AliasResult Result = - AA.alias(P->getValue(), P->getSize(), Entry.getValue(), Size); - if (Result == AliasAnalysis::MayAlias) + AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(), + P->getTBAAInfo()), + AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo)); + if (Result != AliasAnalysis::MustAlias) AliasTy = MayAlias; else // First entry of must alias must have maximum size! - P->updateSize(Size); + P->updateSizeAndTBAAInfo(Size, TBAAInfo); assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!"); } Entry.setAliasSet(this); - Entry.updateSize(Size); + Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); // Add it to the end of the list... assert(*PtrListEnd == 0 && "End of list is not null?"); @@ -120,7 +129,7 @@ void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) { AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS); if (Behavior == AliasAnalysis::DoesNotAccessMemory) return; - else if (Behavior == AliasAnalysis::OnlyReadsMemory) { + if (AliasAnalysis::onlyReadsMemory(Behavior)) { AliasTy = MayAlias; AccessTy |= Refs; return; @@ -134,7 +143,8 @@ void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) { /// aliasesPointer - Return true if the specified pointer "may" (or must) /// alias one of the members in the set. /// -bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size, +bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, + const MDNode *TBAAInfo, AliasAnalysis &AA) const { if (AliasTy == MustAlias) { assert(CallSites.empty() && "Illegal must alias set!"); @@ -143,19 +153,26 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size, // SOME value in the set. PointerRec *SomePtr = getSomePointer(); assert(SomePtr && "Empty must-alias set??"); - return AA.alias(SomePtr->getValue(), SomePtr->getSize(), Ptr, Size); + return AA.alias(AliasAnalysis::Location(SomePtr->getValue(), + SomePtr->getSize(), + SomePtr->getTBAAInfo()), + AliasAnalysis::Location(Ptr, Size, TBAAInfo)); } // If this is a may-alias set, we have to check all of the pointers in the set // to be sure it doesn't alias the set... for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.alias(Ptr, Size, I.getPointer(), I.getSize())) + if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo), + AliasAnalysis::Location(I.getPointer(), I.getSize(), + I.getTBAAInfo()))) return true; // Check the call sites list and invoke list... if (!CallSites.empty()) { for (unsigned i = 0, e = CallSites.size(); i != e; ++i) - if (AA.getModRefInfo(CallSites[i], Ptr, Size) != AliasAnalysis::NoModRef) + if (AA.getModRefInfo(CallSites[i], + AliasAnalysis::Location(Ptr, Size, TBAAInfo)) != + AliasAnalysis::NoModRef) return true; } @@ -198,10 +215,11 @@ void AliasSetTracker::clear() { /// that may alias the pointer, merge them together and return the unified set. /// AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, - unsigned Size) { + uint64_t Size, + const MDNode *TBAAInfo) { AliasSet *FoundSet = 0; for (iterator I = begin(), E = end(); I != E; ++I) { - if (I->Forward || !I->aliasesPointer(Ptr, Size, AA)) continue; + if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue; if (FoundSet == 0) { // If this is the first alias set ptr can go into. FoundSet = I; // Remember it. @@ -216,9 +234,10 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, /// containsPointer - Return true if the specified location is represented by /// this alias set, false otherwise. This does not modify the AST object or /// alias sets. -bool AliasSetTracker::containsPointer(Value *Ptr, unsigned Size) const { +bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, + const MDNode *TBAAInfo) const { for (const_iterator I = begin(), E = end(); I != E; ++I) - if (!I->Forward && I->aliasesPointer(Ptr, Size, AA)) + if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) return true; return false; } @@ -244,33 +263,34 @@ AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) { /// getAliasSetForPointer - Return the alias set that the specified pointer /// lives in. -AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, unsigned Size, +AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size, + const MDNode *TBAAInfo, bool *New) { AliasSet::PointerRec &Entry = getEntryFor(Pointer); // Check to see if the pointer is already known. if (Entry.hasAliasSet()) { - Entry.updateSize(Size); + Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); // Return the set! return *Entry.getAliasSet(*this)->getForwardedTarget(*this); } - if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) { + if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) { // Add it to the alias set it aliases. - AS->addPointer(*this, Entry, Size); + AS->addPointer(*this, Entry, Size, TBAAInfo); return *AS; } if (New) *New = true; // Otherwise create a new alias set to hold the loaded pointer. AliasSets.push_back(new AliasSet()); - AliasSets.back().addPointer(*this, Entry, Size); + AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo); return AliasSets.back(); } -bool AliasSetTracker::add(Value *Ptr, unsigned Size) { +bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { bool NewPtr; - addPointer(Ptr, Size, AliasSet::NoModRef, NewPtr); + addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr); return NewPtr; } @@ -279,6 +299,7 @@ bool AliasSetTracker::add(LoadInst *LI) { bool NewPtr; AliasSet &AS = addPointer(LI->getOperand(0), AA.getTypeStoreSize(LI->getType()), + LI->getMetadata(LLVMContext::MD_tbaa), AliasSet::Refs, NewPtr); if (LI->isVolatile()) AS.setVolatile(); return NewPtr; @@ -289,6 +310,7 @@ bool AliasSetTracker::add(StoreInst *SI) { Value *Val = SI->getOperand(0); AliasSet &AS = addPointer(SI->getOperand(1), AA.getTypeStoreSize(Val->getType()), + SI->getMetadata(LLVMContext::MD_tbaa), AliasSet::Mods, NewPtr); if (SI->isVolatile()) AS.setVolatile(); return NewPtr; @@ -296,7 +318,9 @@ bool AliasSetTracker::add(StoreInst *SI) { bool AliasSetTracker::add(VAArgInst *VAAI) { bool NewPtr; - addPointer(VAAI->getOperand(0), ~0, AliasSet::ModRef, NewPtr); + addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize, + VAAI->getMetadata(LLVMContext::MD_tbaa), + AliasSet::ModRef, NewPtr); return NewPtr; } @@ -358,6 +382,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { bool X; for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(), + ASI.getTBAAInfo(), (AliasSet::AccessType)AS.AccessTy, X); if (AS.isVolatile()) NewAS.setVolatile(); } @@ -393,31 +418,36 @@ void AliasSetTracker::remove(AliasSet &AS) { AS.removeFromTracker(*this); } -bool AliasSetTracker::remove(Value *Ptr, unsigned Size) { - AliasSet *AS = findAliasSetForPointer(Ptr, Size); +bool +AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { + AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo); if (!AS) return false; remove(*AS); return true; } bool AliasSetTracker::remove(LoadInst *LI) { - unsigned Size = AA.getTypeStoreSize(LI->getType()); - AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size); + uint64_t Size = AA.getTypeStoreSize(LI->getType()); + const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa); + AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo); if (!AS) return false; remove(*AS); return true; } bool AliasSetTracker::remove(StoreInst *SI) { - unsigned Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); - AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size); + uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); + const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa); + AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo); if (!AS) return false; remove(*AS); return true; } bool AliasSetTracker::remove(VAArgInst *VAAI) { - AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), ~0); + AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), + AliasAnalysis::UnknownSize, + VAAI->getMetadata(LLVMContext::MD_tbaa)); if (!AS) return false; remove(*AS); return true; @@ -507,7 +537,9 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { // Add it to the alias set it aliases... I = PointerMap.find(From); AliasSet *AS = I->second->getAliasSet(*this); - AS->addPointer(*this, Entry, I->second->getSize(), true); + AS->addPointer(*this, Entry, I->second->getSize(), + I->second->getTBAAInfo(), + true); } @@ -587,7 +619,9 @@ namespace { AliasSetTracker *Tracker; public: static char ID; // Pass identification, replacement for typeid - AliasSetPrinter() : FunctionPass(ID) {} + AliasSetPrinter() : FunctionPass(ID) { + initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -607,5 +641,8 @@ namespace { } char AliasSetPrinter::ID = 0; -INITIALIZE_PASS(AliasSetPrinter, "print-alias-sets", - "Alias Set Printer", false, true); +INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets", + "Alias Set Printer", false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets", + "Alias Set Printer", false, true) diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 398dec7dd0a1..1af1c35f5392 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -8,22 +8,83 @@ //===----------------------------------------------------------------------===// #include "llvm-c/Analysis.h" +#include "llvm/InitializePasses.h" #include "llvm/Analysis/Verifier.h" #include <cstring> using namespace llvm; +/// initializeAnalysis - Initialize all passes linked into the Analysis library. +void llvm::initializeAnalysis(PassRegistry &Registry) { + initializeAliasAnalysisAnalysisGroup(Registry); + initializeAliasAnalysisCounterPass(Registry); + initializeAAEvalPass(Registry); + initializeAliasDebuggerPass(Registry); + initializeAliasSetPrinterPass(Registry); + initializeNoAAPass(Registry); + initializeBasicAliasAnalysisPass(Registry); + initializeCFGViewerPass(Registry); + initializeCFGPrinterPass(Registry); + initializeCFGOnlyViewerPass(Registry); + initializeCFGOnlyPrinterPass(Registry); + initializePrintDbgInfoPass(Registry); + initializeDominanceFrontierPass(Registry); + initializeDomViewerPass(Registry); + initializeDomPrinterPass(Registry); + initializeDomOnlyViewerPass(Registry); + initializePostDomViewerPass(Registry); + initializeDomOnlyPrinterPass(Registry); + initializePostDomPrinterPass(Registry); + initializePostDomOnlyViewerPass(Registry); + initializePostDomOnlyPrinterPass(Registry); + initializeIVUsersPass(Registry); + initializeInstCountPass(Registry); + initializeIntervalPartitionPass(Registry); + initializeLazyValueInfoPass(Registry); + initializeLibCallAliasAnalysisPass(Registry); + initializeLintPass(Registry); + initializeLiveValuesPass(Registry); + initializeLoopDependenceAnalysisPass(Registry); + initializeLoopInfoPass(Registry); + initializeMemDepPrinterPass(Registry); + initializeMemoryDependenceAnalysisPass(Registry); + initializeModuleDebugInfoPrinterPass(Registry); + initializePostDominatorTreePass(Registry); + initializePostDominanceFrontierPass(Registry); + initializeProfileEstimatorPassPass(Registry); + initializeNoProfileInfoPass(Registry); + initializeNoPathProfileInfoPass(Registry); + initializeProfileInfoAnalysisGroup(Registry); + initializePathProfileInfoAnalysisGroup(Registry); + initializeLoaderPassPass(Registry); + initializePathProfileLoaderPassPass(Registry); + initializeProfileVerifierPassPass(Registry); + initializePathProfileVerifierPass(Registry); + initializeRegionInfoPass(Registry); + initializeRegionViewerPass(Registry); + initializeRegionPrinterPass(Registry); + initializeRegionOnlyViewerPass(Registry); + initializeRegionOnlyPrinterPass(Registry); + initializeScalarEvolutionPass(Registry); + initializeScalarEvolutionAliasAnalysisPass(Registry); + initializeTypeBasedAliasAnalysisPass(Registry); +} + +void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { + initializeAnalysis(*unwrap(R)); +} + LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, char **OutMessages) { std::string Messages; - + LLVMBool Result = verifyModule(*unwrap(M), static_cast<VerifierFailureAction>(Action), OutMessages? &Messages : 0); - + if (OutMessages) *OutMessages = strdup(Messages.c_str()); - + return Result; } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 113c72b94dac..f7bcd9ec44d8 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1,4 +1,4 @@ -//===- BasicAliasAnalysis.cpp - Local Alias Analysis Impl -----------------===// +//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===// // // The LLVM Compiler Infrastructure // @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// // -// This file defines the default implementation of the Alias Analysis interface -// that simply implements a few identities (two different globals cannot alias, -// etc), but otherwise does no analysis. +// This file defines the primary stateless implementation of the +// Alias Analysis interface that implements identities (two different +// globals cannot alias, etc), but does no stateful analysis. // //===----------------------------------------------------------------------===// @@ -22,10 +22,12 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" @@ -95,104 +97,54 @@ static bool isEscapeSource(const Value *V) { return false; } -/// isObjectSmallerThan - Return true if we can prove that the object specified -/// by V is smaller than Size. -static bool isObjectSmallerThan(const Value *V, unsigned Size, - const TargetData &TD) { +/// getObjectSize - Return the size of the object specified by V, or +/// UnknownSize if unknown. +static uint64_t getObjectSize(const Value *V, const TargetData &TD) { const Type *AccessTy; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + if (!GV->hasDefinitiveInitializer()) + return AliasAnalysis::UnknownSize; AccessTy = GV->getType()->getElementType(); } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { if (!AI->isArrayAllocation()) AccessTy = AI->getType()->getElementType(); else - return false; + return AliasAnalysis::UnknownSize; } else if (const CallInst* CI = extractMallocCall(V)) { if (!isArrayMalloc(V, &TD)) // The size is the argument to the malloc call. if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0))) - return (C->getZExtValue() < Size); - return false; + return C->getZExtValue(); + return AliasAnalysis::UnknownSize; } else if (const Argument *A = dyn_cast<Argument>(V)) { if (A->hasByValAttr()) AccessTy = cast<PointerType>(A->getType())->getElementType(); else - return false; + return AliasAnalysis::UnknownSize; } else { - return false; + return AliasAnalysis::UnknownSize; } if (AccessTy->isSized()) - return TD.getTypeAllocSize(AccessTy) < Size; - return false; + return TD.getTypeAllocSize(AccessTy); + return AliasAnalysis::UnknownSize; } -//===----------------------------------------------------------------------===// -// NoAA Pass -//===----------------------------------------------------------------------===// - -namespace { - /// NoAA - This class implements the -no-aa pass, which always returns "I - /// don't know" for alias queries. NoAA is unlike other alias analysis - /// implementations, in that it does not chain to a previous analysis. As - /// such it doesn't follow many of the rules that other alias analyses must. - /// - struct NoAA : public ImmutablePass, public AliasAnalysis { - static char ID; // Class identification, replacement for typeinfo - NoAA() : ImmutablePass(ID) {} - explicit NoAA(char &PID) : ImmutablePass(PID) { } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - } - - virtual void initializePass() { - TD = getAnalysisIfAvailable<TargetData>(); - } - - virtual AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { - return MayAlias; - } - - virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { - return UnknownModRefBehavior; - } - virtual ModRefBehavior getModRefBehavior(const Function *F) { - return UnknownModRefBehavior; - } - - virtual bool pointsToConstantMemory(const Value *P) { return false; } - virtual ModRefResult getModRefInfo(ImmutableCallSite CS, - const Value *P, unsigned Size) { - return ModRef; - } - virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { - return ModRef; - } - - virtual void deleteValue(Value *V) {} - virtual void copyValue(Value *From, Value *To) {} - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const void *ID) { - if (ID == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - }; -} // End of anonymous namespace - -// Register this pass... -char NoAA::ID = 0; -INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa", - "No Alias Analysis (always returns 'may' alias)", - true, true, false); +/// isObjectSmallerThan - Return true if we can prove that the object specified +/// by V is smaller than Size. +static bool isObjectSmallerThan(const Value *V, uint64_t Size, + const TargetData &TD) { + uint64_t ObjectSize = getObjectSize(V, TD); + return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; +} -ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } +/// isObjectSize - Return true if we can prove that the object specified +/// by V has size Size. +static bool isObjectSize(const Value *V, uint64_t Size, + const TargetData &TD) { + uint64_t ObjectSize = getObjectSize(V, TD); + return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size; +} //===----------------------------------------------------------------------===// // GetElementPtr Instruction Decomposition and Analysis @@ -272,14 +224,14 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, Value *CastOp = cast<CastInst>(V)->getOperand(0); unsigned OldWidth = Scale.getBitWidth(); unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); - Scale.trunc(SmallWidth); - Offset.trunc(SmallWidth); + Scale = Scale.trunc(SmallWidth); + Offset = Offset.trunc(SmallWidth); Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt; Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, TD, Depth+1); - Scale.zext(OldWidth); - Offset.zext(OldWidth); + Scale = Scale.zext(OldWidth); + Offset = Offset.zext(OldWidth); return Result; } @@ -299,7 +251,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, /// the gep cannot necessarily be reconstructed from its decomposed form. /// /// When TargetData is around, this function is capable of analyzing everything -/// that Value::getUnderlyingObject() can look through. When not, it just looks +/// that GetUnderlyingObject can look through. When not, it just looks /// through pointer casts. /// static const Value * @@ -328,6 +280,14 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, V = Op->getOperand(0); continue; } + + if (const Instruction *I = dyn_cast<Instruction>(V)) + // TODO: Get a DominatorTree and use it here. + if (const Value *Simplified = + SimplifyInstruction(const_cast<Instruction *>(I), TD)) { + V = Simplified; + continue; + } const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); if (GEPOp == 0) @@ -386,8 +346,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. - BaseOffs += IndexOffset.getZExtValue()*Scale; - Scale *= IndexScale.getZExtValue(); + BaseOffs += IndexOffset.getSExtValue()*Scale; + Scale *= IndexScale.getSExtValue(); // If we already had an occurrance of this index variable, merge this @@ -407,7 +367,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // pointer size. if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { Scale <<= ShiftBits; - Scale >>= ShiftBits; + Scale = (int64_t)Scale >> ShiftBits; } if (Scale) { @@ -485,25 +445,34 @@ static bool notDifferentParent(const Value *O1, const Value *O2) { #endif namespace { - /// BasicAliasAnalysis - This is the default alias analysis implementation. - /// Because it doesn't chain to a previous alias analysis (like -no-aa), it - /// derives from the NoAA class. - struct BasicAliasAnalysis : public NoAA { + /// BasicAliasAnalysis - This is the primary alias analysis implementation. + struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { static char ID; // Class identification, replacement for typeinfo - BasicAliasAnalysis() : NoAA(ID) {} + BasicAliasAnalysis() : ImmutablePass(ID) { + initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + } - virtual AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { + virtual AliasResult alias(const Location &LocA, + const Location &LocB) { assert(Visited.empty() && "Visited must be cleared after use!"); - assert(notDifferentParent(V1, V2) && + assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && "BasicAliasAnalysis doesn't support interprocedural queries."); - AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size); + AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag, + LocB.Ptr, LocB.Size, LocB.TBAATag); Visited.clear(); return Alias; } virtual ModRefResult getModRefInfo(ImmutableCallSite CS, - const Value *P, unsigned Size); + const Location &Loc); virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { @@ -513,7 +482,7 @@ namespace { /// pointsToConstantMemory - Chase pointers until we find a (constant /// global) or not. - virtual bool pointsToConstantMemory(const Value *P); + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); /// getModRefBehavior - Return the behavior when calling the given /// call site. @@ -539,46 +508,102 @@ namespace { // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP // instruction against another. - AliasResult aliasGEP(const GEPOperator *V1, unsigned V1Size, - const Value *V2, unsigned V2Size, + AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo, const Value *UnderlyingV1, const Value *UnderlyingV2); // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI // instruction against another. - AliasResult aliasPHI(const PHINode *PN, unsigned PNSize, - const Value *V2, unsigned V2Size); + AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize, + const MDNode *PNTBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo); /// aliasSelect - Disambiguate a Select instruction against another value. - AliasResult aliasSelect(const SelectInst *SI, unsigned SISize, - const Value *V2, unsigned V2Size); - - AliasResult aliasCheck(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size); + AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize, + const MDNode *SITBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo); + + AliasResult aliasCheck(const Value *V1, uint64_t V1Size, + const MDNode *V1TBAATag, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAATag); }; } // End of anonymous namespace // Register this pass... char BasicAliasAnalysis::ID = 0; INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa", - "Basic Alias Analysis (default AA impl)", - false, true, true); + "Basic Alias Analysis (stateless AA impl)", + false, true, false) ImmutablePass *llvm::createBasicAliasAnalysisPass() { return new BasicAliasAnalysis(); } +/// pointsToConstantMemory - Returns whether the given pointer value +/// points to memory that is local to the function, with global constants being +/// considered local to all functions. +bool +BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) { + assert(Visited.empty() && "Visited must be cleared after use!"); + + unsigned MaxLookup = 8; + SmallVector<const Value *, 16> Worklist; + Worklist.push_back(Loc.Ptr); + do { + const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD); + if (!Visited.insert(V)) { + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + + // An alloca instruction defines local memory. + if (OrLocal && isa<AllocaInst>(V)) + continue; + + // A global constant counts as local memory for our purposes. + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + // Note: this doesn't require GV to be "ODR" because it isn't legal for a + // global to be marked constant in some modules and non-constant in + // others. GV may even be a declaration, not a definition. + if (!GV->isConstant()) { + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + continue; + } + + // If both select values point to local memory, then so does the select. + if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + // If all values incoming to a phi node point to local memory, then so does + // the phi. + if (const PHINode *PN = dyn_cast<PHINode>(V)) { + // Don't bother inspecting phi nodes with many operands. + if (PN->getNumIncomingValues() > MaxLookup) { + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } -/// pointsToConstantMemory - Chase pointers until we find a (constant -/// global) or not. -bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) { - if (const GlobalVariable *GV = - dyn_cast<GlobalVariable>(P->getUnderlyingObject())) - // Note: this doesn't require GV to be "ODR" because it isn't legal for a - // global to be marked constant in some modules and non-constant in others. - // GV may even be a declaration, not a definition. - return GV->isConstant(); + // Otherwise be conservative. + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); - return NoAA::pointsToConstantMemory(P); + } while (!Worklist.empty() && --MaxLookup); + + Visited.clear(); + return Worklist.empty(); } /// getModRefBehavior - Return the behavior when calling the given call site. @@ -596,22 +621,32 @@ BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { Min = OnlyReadsMemory; // The AliasAnalysis base class has some smarts, lets use them. - return std::min(AliasAnalysis::getModRefBehavior(CS), Min); + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); } /// getModRefBehavior - Return the behavior when calling the given function. /// For use when the call site is not known. AliasAnalysis::ModRefBehavior BasicAliasAnalysis::getModRefBehavior(const Function *F) { + // If the function declares it doesn't access memory, we can't do better. if (F->doesNotAccessMemory()) - // Can't do better than this. return DoesNotAccessMemory; + + // For intrinsics, we can check the table. + if (unsigned iid = F->getIntrinsicID()) { +#define GET_INTRINSIC_MODREF_BEHAVIOR +#include "llvm/Intrinsics.gen" +#undef GET_INTRINSIC_MODREF_BEHAVIOR + } + + ModRefBehavior Min = UnknownModRefBehavior; + + // If the function declares it only reads memory, go with that. if (F->onlyReadsMemory()) - return OnlyReadsMemory; - if (unsigned id = F->getIntrinsicID()) - return getIntrinsicModRefBehavior(id); + Min = OnlyReadsMemory; - return NoAA::getModRefBehavior(F); + // Otherwise be conservative. + return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); } /// getModRefInfo - Check to see if the specified callsite can clobber the @@ -620,13 +655,13 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { /// simple "address taken" analysis on local objects. AliasAnalysis::ModRefResult BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, - const Value *P, unsigned Size) { - assert(notDifferentParent(CS.getInstruction(), P) && + const Location &Loc) { + assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) && "AliasAnalysis query involving multiple functions!"); - const Value *Object = P->getUnderlyingObject(); + const Value *Object = GetUnderlyingObject(Loc.Ptr, TD); - // If this is a tail call and P points to a stack location, we know that + // If this is a tail call and Loc.Ptr points to a stack location, we know that // the tail call cannot access or modify the local stack. // We cannot exclude byval arguments here; these belong to the caller of // the current function not to the current function, and a tail callee @@ -650,11 +685,11 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)) continue; - // If this is a no-capture pointer argument, see if we can tell that it + // If this is a no-capture pointer argument, see if we can tell that it // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. - if (!isNoAlias(cast<Value>(CI), UnknownSize, P, UnknownSize)) { + if (!isNoAlias(Location(cast<Value>(CI)), Loc)) { PassedAsArg = true; break; } @@ -664,6 +699,8 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return NoModRef; } + ModRefResult Min = ModRef; + // Finally, handle specific knowledge of intrinsics. const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); if (II != 0) @@ -671,15 +708,20 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, default: break; case Intrinsic::memcpy: case Intrinsic::memmove: { - unsigned Len = UnknownSize; + uint64_t Len = UnknownSize; if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) Len = LenCI->getZExtValue(); Value *Dest = II->getArgOperand(0); Value *Src = II->getArgOperand(1); - if (isNoAlias(Dest, Len, P, Size)) { - if (isNoAlias(Src, Len, P, Size)) + // If it can't overlap the source dest, then it doesn't modref the loc. + if (isNoAlias(Location(Dest, Len), Loc)) { + if (isNoAlias(Location(Src, Len), Loc)) return NoModRef; - return Ref; + // If it can't overlap the dest, then worst case it reads the loc. + Min = Ref; + } else if (isNoAlias(Location(Src, Len), Loc)) { + // If it can't overlap the source, then worst case it mutates the loc. + Min = Mod; } break; } @@ -687,11 +729,13 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // Since memset is 'accesses arguments' only, the AliasAnalysis base class // will handle it for the variable length case. if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) { - unsigned Len = LenCI->getZExtValue(); + uint64_t Len = LenCI->getZExtValue(); Value *Dest = II->getArgOperand(0); - if (isNoAlias(Dest, Len, P, Size)) + if (isNoAlias(Location(Dest, Len), Loc)) return NoModRef; } + // We know that memset doesn't load anything. + Min = Mod; break; case Intrinsic::atomic_cmp_swap: case Intrinsic::atomic_swap: @@ -707,42 +751,49 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, case Intrinsic::atomic_load_umin: if (TD) { Value *Op1 = II->getArgOperand(0); - unsigned Op1Size = TD->getTypeStoreSize(Op1->getType()); - if (isNoAlias(Op1, Op1Size, P, Size)) + uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType()); + MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa); + if (isNoAlias(Location(Op1, Op1Size, Tag), Loc)) return NoModRef; } break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: { - unsigned PtrSize = + uint64_t PtrSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); - if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size)) + if (isNoAlias(Location(II->getArgOperand(1), + PtrSize, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) return NoModRef; break; } case Intrinsic::invariant_end: { - unsigned PtrSize = + uint64_t PtrSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); - if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size)) + if (isNoAlias(Location(II->getArgOperand(2), + PtrSize, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) return NoModRef; break; } } // The AliasAnalysis base class has some smarts, lets use them. - return AliasAnalysis::getModRefInfo(CS, P, Size); + return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); } - /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know -/// anything about V2. UnderlyingV1 is GEP1->getUnderlyingObject(), +/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD), /// UnderlyingV2 is the same for V2. /// AliasAnalysis::AliasResult -BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, - const Value *V2, unsigned V2Size, +BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo, const Value *UnderlyingV1, const Value *UnderlyingV2) { // If this GEP has been visited before, we're on a use-def cycle. @@ -759,8 +810,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, // out if the indexes to the GEP tell us anything about the derived pointer. if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, - UnderlyingV2, UnknownSize); + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, + UnderlyingV2, UnknownSize, 0); // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. @@ -782,7 +833,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, // to handle without it. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { assert(TD == 0 && - "DecomposeGEPExpression and getUnderlyingObject disagree!"); + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } @@ -800,7 +851,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, if (V1Size == UnknownSize && V2Size == UnknownSize) return MayAlias; - AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, V2, V2Size); + AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0, + V2, V2Size, V2TBAAInfo); if (R != MustAlias) // If V2 may alias GEP base pointer, conservatively returns MayAlias. // If V2 is known not to alias GEP base pointer, then the two values @@ -817,7 +869,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, // to handle without it. if (GEP1BasePtr != UnderlyingV1) { assert(TD == 0 && - "DecomposeGEPExpression and getUnderlyingObject disagree!"); + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } } @@ -831,6 +883,17 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty()) return MustAlias; + // If there is a difference betwen the pointers, but the difference is + // less than the size of the associated memory object, then we know + // that the objects are partially overlapping. + if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) { + if (GEP1BaseOffset >= 0 ? + (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) : + (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size && + GEP1BaseOffset != INT64_MIN)) + return PartialAlias; + } + // If we have a known constant offset, see if this offset is larger than the // access size being queried. If so, and if no variable indices can remove // pieces of this constant, then we know we have a no-alias. For example, @@ -850,8 +913,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, // If our known offset is bigger than the access size, we know we don't have // an alias. if (GEP1BaseOffset) { - if (GEP1BaseOffset >= (int64_t)V2Size || - GEP1BaseOffset <= -(int64_t)V1Size) + if (GEP1BaseOffset >= 0 ? + (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) : + (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size && + GEP1BaseOffset != INT64_MIN)) return NoAlias; } @@ -861,8 +926,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, /// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select /// instruction against another. AliasAnalysis::AliasResult -BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, - const Value *V2, unsigned V2Size) { +BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, + const MDNode *SITBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo) { // If this select has been visited before, we're on a use-def cycle. // Such cycles are only valid when PHI nodes are involved or in unreachable // code. The visitPHI function catches cycles containing PHIs, but there @@ -875,13 +942,13 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) if (SI->getCondition() == SI2->getCondition()) { AliasResult Alias = - aliasCheck(SI->getTrueValue(), SISize, - SI2->getTrueValue(), V2Size); + aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo, + SI2->getTrueValue(), V2Size, V2TBAAInfo); if (Alias == MayAlias) return MayAlias; AliasResult ThisAlias = - aliasCheck(SI->getFalseValue(), SISize, - SI2->getFalseValue(), V2Size); + aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo, + SI2->getFalseValue(), V2Size, V2TBAAInfo); if (ThisAlias != Alias) return MayAlias; return Alias; @@ -890,7 +957,7 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, // If both arms of the Select node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. AliasResult Alias = - aliasCheck(V2, V2Size, SI->getTrueValue(), SISize); + aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo); if (Alias == MayAlias) return MayAlias; @@ -900,7 +967,7 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, Visited.erase(V2); AliasResult ThisAlias = - aliasCheck(V2, V2Size, SI->getFalseValue(), SISize); + aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo); if (ThisAlias != Alias) return MayAlias; return Alias; @@ -909,8 +976,10 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction // against another. AliasAnalysis::AliasResult -BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, - const Value *V2, unsigned V2Size) { +BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, + const MDNode *PNTBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo) { // The PHI node has already been visited, avoid recursion any further. if (!Visited.insert(PN)) return MayAlias; @@ -921,16 +990,16 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, if (const PHINode *PN2 = dyn_cast<PHINode>(V2)) if (PN2->getParent() == PN->getParent()) { AliasResult Alias = - aliasCheck(PN->getIncomingValue(0), PNSize, + aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)), - V2Size); + V2Size, V2TBAAInfo); if (Alias == MayAlias) return MayAlias; for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = - aliasCheck(PN->getIncomingValue(i), PNSize, + aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), - V2Size); + V2Size, V2TBAAInfo); if (ThisAlias != Alias) return MayAlias; } @@ -951,7 +1020,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, V1Srcs.push_back(PV1); } - AliasResult Alias = aliasCheck(V2, V2Size, V1Srcs[0], PNSize); + AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo, + V1Srcs[0], PNSize, PNTBAAInfo); // Early exit if the check of the first PHI source against V2 is MayAlias. // Other results are not possible. if (Alias == MayAlias) @@ -967,7 +1037,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, // don't need to assume that V2 is being visited recursively. Visited.erase(V2); - AliasResult ThisAlias = aliasCheck(V2, V2Size, V, PNSize); + AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo, + V, PNSize, PNTBAAInfo); if (ThisAlias != Alias || ThisAlias == MayAlias) return MayAlias; } @@ -979,8 +1050,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, // such as array references. // AliasAnalysis::AliasResult -BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { +BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, + const MDNode *V1TBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. if (V1Size == 0 || V2Size == 0) @@ -997,8 +1070,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, return NoAlias; // Scalars cannot alias each other // Figure out what objects these things are pointing to if we can. - const Value *O1 = V1->getUnderlyingObject(); - const Value *O2 = V2->getUnderlyingObject(); + const Value *O1 = GetUnderlyingObject(V1, TD); + const Value *O2 = GetUnderlyingObject(V2, TD); // Null values in the default address space don't point to any object, so they // don't alias any other pointer. @@ -1059,25 +1132,39 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, std::swap(V1Size, V2Size); std::swap(O1, O2); } - if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) - return aliasGEP(GV1, V1Size, V2, V2Size, O1, O2); + if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { + AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2); + if (Result != MayAlias) return Result; + } if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { std::swap(V1, V2); std::swap(V1Size, V2Size); } - if (const PHINode *PN = dyn_cast<PHINode>(V1)) - return aliasPHI(PN, V1Size, V2, V2Size); + if (const PHINode *PN = dyn_cast<PHINode>(V1)) { + AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo, + V2, V2Size, V2TBAAInfo); + if (Result != MayAlias) return Result; + } if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { std::swap(V1, V2); std::swap(V1Size, V2Size); } - if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) - return aliasSelect(S1, V1Size, V2, V2Size); + if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { + AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo, + V2, V2Size, V2TBAAInfo); + if (Result != MayAlias) return Result; + } - return NoAA::alias(V1, V1Size, V2, V2Size); -} + // If both pointers are pointing into the same object and one of them + // accesses is accessing the entire object, then the accesses must + // overlap in some way. + if (TD && O1 == O2) + if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) || + (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD))) + return PartialAlias; -// Make sure that anything that uses AliasAnalysis pulls in this file. -DEFINING_FILE_FOR(BasicAliasAnalysis) + return AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); +} diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 617a362062fc..7bb063fbbbcf 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -25,7 +25,9 @@ using namespace llvm; namespace { struct CFGViewer : public FunctionPass { static char ID; // Pass identifcation, replacement for typeid - CFGViewer() : FunctionPass(ID) {} + CFGViewer() : FunctionPass(ID) { + initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F) { F.viewCFG(); @@ -41,12 +43,14 @@ namespace { } char CFGViewer::ID = 0; -INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true); +INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true) namespace { struct CFGOnlyViewer : public FunctionPass { static char ID; // Pass identifcation, replacement for typeid - CFGOnlyViewer() : FunctionPass(ID) {} + CFGOnlyViewer() : FunctionPass(ID) { + initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F) { F.viewCFGOnly(); @@ -63,13 +67,14 @@ namespace { char CFGOnlyViewer::ID = 0; INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only", - "View CFG of function (with no function bodies)", false, true); + "View CFG of function (with no function bodies)", false, true) namespace { struct CFGPrinter : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGPrinter() : FunctionPass(ID) {} - explicit CFGPrinter(char &pid) : FunctionPass(pid) {} + CFGPrinter() : FunctionPass(ID) { + initializeCFGPrinterPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F) { std::string Filename = "cfg." + F.getNameStr() + ".dot"; @@ -96,13 +101,15 @@ namespace { char CFGPrinter::ID = 0; INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", - false, true); + false, true) namespace { struct CFGOnlyPrinter : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGOnlyPrinter() : FunctionPass(ID) {} - explicit CFGOnlyPrinter(char &pid) : FunctionPass(pid) {} + CFGOnlyPrinter() : FunctionPass(ID) { + initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnFunction(Function &F) { std::string Filename = "cfg." + F.getNameStr() + ".dot"; errs() << "Writing '" << Filename << "'..."; @@ -128,7 +135,7 @@ namespace { char CFGOnlyPrinter::ID = 0; INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only", "Print CFG of function to 'dot' file (with no function bodies)", - false, true); + false, true) /// viewCFG - This function is meant for use from the debugger. You can just /// say 'call F->viewCFG()' and a ghostview window should pop up from the diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 6a2ab681d1ac..1a738fae837d 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -11,6 +11,8 @@ add_llvm_library(LLVMAnalysis ConstantFolding.cpp DbgInfoPrinter.cpp DebugInfo.cpp + DIBuilder.cpp + DominanceFrontier.cpp DomPrinter.cpp IVUsers.cpp InlineCost.cpp @@ -27,11 +29,15 @@ add_llvm_library(LLVMAnalysis LoopDependenceAnalysis.cpp LoopInfo.cpp LoopPass.cpp + MemDepPrinter.cpp MemoryBuiltins.cpp MemoryDependenceAnalysis.cpp ModuleDebugInfoPrinter.cpp + PathNumbering.cpp + PathProfileInfo.cpp + PathProfileVerifier.cpp + NoAliasAnalysis.cpp PHITransAddr.cpp - PointerTracking.cpp PostDominators.cpp ProfileEstimatorPass.cpp ProfileInfo.cpp @@ -39,6 +45,7 @@ add_llvm_library(LLVMAnalysis ProfileInfoLoaderPass.cpp ProfileVerifierPass.cpp RegionInfo.cpp + RegionPass.cpp RegionPrinter.cpp ScalarEvolution.cpp ScalarEvolutionAliasAnalysis.cpp @@ -50,4 +57,4 @@ add_llvm_library(LLVMAnalysis ValueTracking.cpp ) -target_link_libraries (LLVMAnalysis LLVMSupport) +add_subdirectory(IPA) diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 90eae20858fb..42a54d9d1eb3 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -95,6 +95,9 @@ bool llvm::PointerMayBeCaptured(const Value *V, case Instruction::Load: // Loading from a pointer does not cause it to be captured. break; + case Instruction::VAArg: + // "va-arg" from a pointer does not cause it to be captured. + break; case Instruction::Ret: if (ReturnCaptures) return true; diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 0bf7967e83b1..cd8d52c1c465 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/FEnv.h" #include <cerrno> #include <cmath> using namespace llvm; @@ -53,7 +54,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // vector so the code below can handle it uniformly. if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) { Constant *Ops = C; // don't take the address of C! - return FoldBitCast(ConstantVector::get(&Ops, 1), DestTy, TD); + return FoldBitCast(ConstantVector::get(Ops), DestTy, TD); } // If this is a bitcast from constant vector -> vector, fold it. @@ -166,7 +167,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, } } - return ConstantVector::get(Result.data(), Result.size()); + return ConstantVector::get(Result); } @@ -339,6 +340,13 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, return true; } + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + if (CE->getOpcode() == Instruction::IntToPtr && + CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) + return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, + BytesLeft, TD); + } + // Otherwise, unknown initializer type. return false; } @@ -466,7 +474,8 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // If this load comes from anywhere in a constant global, and if the global // is all undef or zero, we know what it loads. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getUnderlyingObject())){ + if (GlobalVariable *GV = + dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) { if (GV->isConstant() && GV->hasDefinitiveInitializer()) { const Type *ResTy = cast<PointerType>(C->getType())->getElementType(); if (GV->getInitializer()->isNullValue()) @@ -537,7 +546,7 @@ static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps, for (unsigned i = 1; i != NumOps; ++i) { if ((i == 1 || !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(), - reinterpret_cast<Value *const *>(Ops+1), + reinterpret_cast<Value *const *>(Ops+1), i-1))) && Ops[i]->getType() != IntPtrTy) { Any = true; @@ -567,16 +576,35 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, Constant *Ptr = Ops[0]; if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized()) return 0; - - unsigned BitWidth = - TD->getTypeSizeInBits(TD->getIntPtrType(Ptr->getContext())); + + const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' for (unsigned i = 1; i != NumOps; ++i) - if (!isa<ConstantInt>(Ops[i])) + if (!isa<ConstantInt>(Ops[i])) { + + // If this is "gep i8* Ptr, (sub 0, V)", fold this as: + // "inttoptr (sub (ptrtoint Ptr), V)" + if (NumOps == 2 && + cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]); + assert((CE == 0 || CE->getType() == IntPtrTy) && + "CastGEPIndices didn't canonicalize index types!"); + if (CE && CE->getOpcode() == Instruction::Sub && + CE->getOperand(0)->isNullValue()) { + Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); + Res = ConstantExpr::getSub(Res, CE->getOperand(1)); + Res = ConstantExpr::getIntToPtr(Res, ResultTy); + if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res)) + Res = ConstantFoldConstantExpression(ResCE, TD); + return Res; + } + } return 0; + } + unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); APInt Offset = APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), (Value**)Ops+1, NumOps-1)); @@ -609,10 +637,8 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, APInt BasePtr(BitWidth, 0); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) if (CE->getOpcode() == Instruction::IntToPtr) - if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) { - BasePtr = Base->getValue(); - BasePtr.zextOrTrunc(BitWidth); - } + if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) + BasePtr = Base->getValue().zextOrTrunc(BitWidth); if (Ptr->isNullValue() || BasePtr != 0) { Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr); return ConstantExpr::getIntToPtr(C, ResultTy); @@ -638,12 +664,19 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, // Determine which element of the array the offset points into. APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); + const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); if (ElemSize == 0) - return 0; - APInt NewIdx = Offset.udiv(ElemSize); - Offset -= NewIdx * ElemSize; - NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Ty->getContext()), - NewIdx)); + // The element size is 0. This may be [0 x Ty]*, so just use a zero + // index for this level and proceed to the next level to see if it can + // accommodate the offset. + NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0)); + else { + // The element size is non-zero divide the offset by the element + // size (rounding down), to compute the index at this level. + APInt NewIdx = Offset.udiv(ElemSize); + Offset -= NewIdx * ElemSize; + NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx)); + } Ty = ATy->getElementType(); } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { // Determine which field of the struct the offset points into. The @@ -687,27 +720,34 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, // Constant Folding public APIs //===----------------------------------------------------------------------===// - -/// ConstantFoldInstruction - Attempt to constant fold the specified -/// instruction. If successful, the constant result is returned, if not, null -/// is returned. Note that this function can only fail when attempting to fold -/// instructions like loads and stores, which have no constant expression form. -/// +/// ConstantFoldInstruction - Try to constant fold the specified instruction. +/// If successful, the constant result is returned, if not, null is returned. +/// Note that this fails if not all of the operands are constant. Otherwise, +/// this function can only fail when attempting to fold instructions like loads +/// and stores, which have no constant expression form. Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { + // Handle PHI nodes quickly here... if (PHINode *PN = dyn_cast<PHINode>(I)) { - if (PN->getNumIncomingValues() == 0) - return UndefValue::get(PN->getType()); - - Constant *Result = dyn_cast<Constant>(PN->getIncomingValue(0)); - if (Result == 0) return 0; - - // Handle PHI nodes specially here... - for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) != Result && PN->getIncomingValue(i) != PN) - return 0; // Not all the same incoming constants... + Constant *CommonValue = 0; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PN->getIncomingValue(i); + // If the incoming value is undef then skip it. Note that while we could + // skip the value if it is equal to the phi node itself we choose not to + // because that would break the rule that constant folding only applies if + // all operands are constants. + if (isa<UndefValue>(Incoming)) + continue; + // If the incoming value is not a constant, or is a different constant to + // the one we saw previously, then give up. + Constant *C = dyn_cast<Constant>(Incoming); + if (!C || (CommonValue && C != CommonValue)) + return 0; + CommonValue = C; + } - // If we reach here, all incoming values are the same constant. - return Result; + // If we reach here, all incoming values are the same constant or undef. + return CommonValue ? CommonValue : UndefValue::get(PN->getType()); } // Scan the operand list, checking to see if they are all constants, if so, @@ -725,7 +765,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { if (const LoadInst *LI = dyn_cast<LoadInst>(I)) return ConstantFoldLoadInst(LI, TD); - + + if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) + return ConstantExpr::getInsertValue( + cast<Constant>(IVI->getAggregateOperand()), + cast<Constant>(IVI->getInsertedValueOperand()), + IVI->idx_begin(), IVI->getNumIndices()); + + if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) + return ConstantExpr::getExtractValue( + cast<Constant>(EVI->getAggregateOperand()), + EVI->idx_begin(), EVI->getNumIndices()); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops.data(), Ops.size(), TD); } @@ -736,7 +787,8 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, const TargetData *TD) { SmallVector<Constant*, 8> Ops; - for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) { + for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); + i != e; ++i) { Constant *NewC = cast<Constant>(*i); // Recursively fold the ConstantExpr's operands. if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) @@ -1000,8 +1052,17 @@ llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::usub_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::ssub_with_overflow: + case Intrinsic::smul_with_overflow: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: return true; default: return false; @@ -1039,10 +1100,10 @@ llvm::canConstantFoldCallTo(const Function *F) { static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, const Type *Ty) { - errno = 0; + sys::llvm_fenv_clearexcept(); V = NativeFP(V); - if (errno != 0) { - errno = 0; + if (sys::llvm_fenv_testexcept()) { + sys::llvm_fenv_clearexcept(); return 0; } @@ -1056,10 +1117,10 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V, double W, const Type *Ty) { - errno = 0; + sys::llvm_fenv_clearexcept(); V = NativeFP(V, W); - if (errno != 0) { - errno = 0; + if (sys::llvm_fenv_testexcept()) { + sys::llvm_fenv_clearexcept(); return 0; } @@ -1071,6 +1132,36 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), return 0; // dummy return to suppress warning } +/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer +/// conversion of a constant floating point. If roundTowardZero is false, the +/// default IEEE rounding is used (toward nearest, ties to even). This matches +/// the behavior of the non-truncating SSE instructions in the default rounding +/// mode. The desired integer type Ty is used to select how many bits are +/// available for the result. Returns null if the conversion cannot be +/// performed, otherwise returns the Constant value resulting from the +/// conversion. +static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, + const Type *Ty) { + assert(Op && "Called with NULL operand"); + APFloat Val(Op->getValueAPF()); + + // All of these conversion intrinsics form an integer of at most 64bits. + unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth(); + assert(ResultWidth <= 64 && + "Can only constant fold conversions to 64 and 32 bit ints"); + + uint64_t UIntVal; + bool isExact = false; + APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero + : APFloat::rmNearestTiesToEven; + APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth, + /*isSigned=*/true, mode, + &isExact); + if (status != APFloat::opOK && status != APFloat::opInexact) + return 0; + return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true); +} + /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. Constant * @@ -1082,7 +1173,7 @@ llvm::ConstantFoldCall(Function *F, const Type *Ty = F->getReturnType(); if (NumOperands == 1) { if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) { - if (Name == "llvm.convert.to.fp16") { + if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) { APFloat Val(Op->getValueAPF()); bool lost = false; @@ -1093,6 +1184,13 @@ llvm::ConstantFoldCall(Function *F, if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; + + /// We only fold functions with finite arguments. Folding NaN and inf is + /// likely to be aborted with an exception anyway, and some host libms + /// have known errors raising exceptions. + if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity()) + return 0; + /// Currently APFloat versions of these functions do not exist, so we use /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == @@ -1133,8 +1231,8 @@ llvm::ConstantFoldCall(Function *F, return ConstantFoldFP(log, V, Ty); else if (Name == "log10" && V > 0) return ConstantFoldFP(log10, V, Ty); - else if (Name == "llvm.sqrt.f32" || - Name == "llvm.sqrt.f64") { + else if (F->getIntrinsicID() == Intrinsic::sqrt && + (Ty->isFloatTy() || Ty->isDoubleTy())) { if (V >= -0.0) return ConstantFoldFP(sqrt, V, Ty); else // Undefined @@ -1164,18 +1262,18 @@ llvm::ConstantFoldCall(Function *F, } return 0; } - - + if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) { - if (Name.startswith("llvm.bswap")) + switch (F->getIntrinsicID()) { + case Intrinsic::bswap: return ConstantInt::get(F->getContext(), Op->getValue().byteSwap()); - else if (Name.startswith("llvm.ctpop")) + case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().countPopulation()); - else if (Name.startswith("llvm.cttz")) + case Intrinsic::cttz: return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); - else if (Name.startswith("llvm.ctlz")) + case Intrinsic::ctlz: return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); - else if (Name == "llvm.convert.from.fp16") { + case Intrinsic::convert_from_fp16: { APFloat Val(Op->getValue()); bool lost = false; @@ -1183,24 +1281,44 @@ llvm::ConstantFoldCall(Function *F, Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost); // Conversion is always precise. - status = status; + (void)status; assert(status == APFloat::opOK && !lost && "Precision lost during fp16 constfolding"); return ConstantFP::get(F->getContext(), Val); } - return 0; + default: + return 0; + } } - + + if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0))) + return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty); + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: + if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0))) + return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty); + } + } + if (isa<UndefValue>(Operands[0])) { - if (Name.startswith("llvm.bswap")) + if (F->getIntrinsicID() == Intrinsic::bswap) return Operands[0]; return 0; } return 0; } - + if (NumOperands == 2) { if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { if (!Ty->isFloatTy() && !Ty->isDoubleTy()) @@ -1223,11 +1341,11 @@ llvm::ConstantFoldCall(Function *F, if (Name == "atan2") return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { - if (Name == "llvm.powi.f32") + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) return ConstantFP::get(F->getContext(), APFloat((float)std::pow((float)Op1V, (int)Op2C->getZExtValue()))); - if (Name == "llvm.powi.f64") + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy()) return ConstantFP::get(F->getContext(), APFloat((double)std::pow((double)Op1V, (int)Op2C->getZExtValue()))); @@ -1240,42 +1358,37 @@ llvm::ConstantFoldCall(Function *F, if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) { switch (F->getIntrinsicID()) { default: break; - case Intrinsic::uadd_with_overflow: { - Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result. - Constant *Ops[] = { - Res, ConstantExpr::getICmp(CmpInst::ICMP_ULT, Res, Op1) // overflow. - }; - return ConstantStruct::get(F->getContext(), Ops, 2, false); - } - case Intrinsic::usub_with_overflow: { - Constant *Res = ConstantExpr::getSub(Op1, Op2); // result. + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::smul_with_overflow: { + APInt Res; + bool Overflow; + switch (F->getIntrinsicID()) { + default: assert(0 && "Invalid case"); + case Intrinsic::sadd_with_overflow: + Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::uadd_with_overflow: + Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::ssub_with_overflow: + Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::usub_with_overflow: + Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::smul_with_overflow: + Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow); + break; + } Constant *Ops[] = { - Res, ConstantExpr::getICmp(CmpInst::ICMP_UGT, Res, Op1) // overflow. + ConstantInt::get(F->getContext(), Res), + ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow) }; return ConstantStruct::get(F->getContext(), Ops, 2, false); } - case Intrinsic::sadd_with_overflow: { - Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result. - Constant *Overflow = ConstantExpr::getSelect( - ConstantExpr::getICmp(CmpInst::ICMP_SGT, - ConstantInt::get(Op1->getType(), 0), Op1), - ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op2), - ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op2)); // overflow. - - Constant *Ops[] = { Res, Overflow }; - return ConstantStruct::get(F->getContext(), Ops, 2, false); - } - case Intrinsic::ssub_with_overflow: { - Constant *Res = ConstantExpr::getSub(Op1, Op2); // result. - Constant *Overflow = ConstantExpr::getSelect( - ConstantExpr::getICmp(CmpInst::ICMP_SGT, - ConstantInt::get(Op2->getType(), 0), Op2), - ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op1), - ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op1)); // overflow. - - Constant *Ops[] = { Res, Overflow }; - return ConstantStruct::get(F->getContext(), Ops, 2, false); - } } } @@ -1285,4 +1398,3 @@ llvm::ConstantFoldCall(Function *F, } return 0; } - diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp new file mode 100644 index 000000000000..c1072df72925 --- /dev/null +++ b/lib/Analysis/DIBuilder.cpp @@ -0,0 +1,801 @@ +//===--- DIBuilder.cpp - Debug Information Builder ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the DIBuilder. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DIBuilder.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Constants.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Dwarf.h" + +using namespace llvm; +using namespace llvm::dwarf; + +static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) { + assert((Tag & LLVMDebugVersionMask) == 0 && + "Tag too large for debug encoding!"); + return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion); +} + +DIBuilder::DIBuilder(Module &m) + : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {} + +/// CreateCompileUnit - A CompileUnit provides an anchor for all debugging +/// information generated during this instance of compilation. +void DIBuilder::CreateCompileUnit(unsigned Lang, StringRef Filename, + StringRef Directory, StringRef Producer, + bool isOptimized, StringRef Flags, + unsigned RunTimeVer) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + ConstantInt::get(Type::getInt32Ty(VMContext), Lang), + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + MDString::get(VMContext, Producer), + // Deprecate isMain field. + ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + MDString::get(VMContext, Flags), + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) + }; + TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateFile - Create a file descriptor to hold debugging information +/// for a file. +DIFile DIBuilder::CreateFile(StringRef Filename, StringRef Directory) { + assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_file_type), + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + TheCU + }; + return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateEnumerator - Create a single enumerator value. +DIEnumerator DIBuilder::CreateEnumerator(StringRef Name, uint64_t Val) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_enumerator), + MDString::get(VMContext, Name), + ConstantInt::get(Type::getInt64Ty(VMContext), Val) + }; + return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateBasicType - Create debugging information entry for a basic +/// type, e.g 'char'. +DIType DIBuilder::CreateBasicType(StringRef Name, uint64_t SizeInBits, + uint64_t AlignInBits, + unsigned Encoding) { + // Basic types are encoded in DIBasicType format. Line number, filename, + // offset and flags are always empty here. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_base_type), + TheCU, + MDString::get(VMContext, Name), + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; + ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateQaulifiedType - Create debugging information entry for a qualified +/// type, e.g. 'const int'. +DIType DIBuilder::CreateQualifiedType(unsigned Tag, DIType FromTy) { + // Qualified types are encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + TheCU, + MDString::get(VMContext, StringRef()), // Empty name. + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + FromTy + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreatePointerType - Create debugging information entry for a pointer. +DIType DIBuilder::CreatePointerType(DIType PointeeTy, uint64_t SizeInBits, + uint64_t AlignInBits, StringRef Name) { + // Pointer types are encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), + TheCU, + MDString::get(VMContext, Name), + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + PointeeTy + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateReferenceType - Create debugging information entry for a reference. +DIType DIBuilder::CreateReferenceType(DIType RTy) { + // References are encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_reference_type), + TheCU, + NULL, // Name + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + RTy + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateTypedef - Create debugging information entry for a typedef. +DIType DIBuilder::CreateTypedef(DIType Ty, StringRef Name, DIFile File, + unsigned LineNo) { + // typedefs are encoded in DIDerivedType format. + assert(Ty.Verify() && "Invalid typedef type!"); + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_typedef), + Ty.getContext(), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + Ty + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateFriend - Create debugging information entry for a 'friend'. +DIType DIBuilder::CreateFriend(DIType Ty, DIType FriendTy) { + // typedefs are encoded in DIDerivedType format. + assert(Ty.Verify() && "Invalid type!"); + assert(FriendTy.Verify() && "Invalid friend type!"); + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_friend), + Ty, + NULL, // Name + Ty.getFile(), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + FriendTy + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateInheritance - Create debugging information entry to establish +/// inheritnace relationship between two types. +DIType DIBuilder::CreateInheritance(DIType Ty, DIType BaseTy, + uint64_t BaseOffset, unsigned Flags) { + // TAG_inheritance is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_inheritance), + Ty, + NULL, // Name + Ty.getFile(), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + BaseTy + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateMemberType - Create debugging information entry for a member. +DIType DIBuilder::CreateMemberType(StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + File, // Or TheCU ? Ty ? + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateClassType - Create debugging information entry for a class. +DIType DIBuilder::CreateClassType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType DerivedFrom, DIArray Elements, + MDNode *VTableHoder, MDNode *TemplateParams) { + // TAG_class_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_class_type), + Context, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom, + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + VTableHoder, + TemplateParams + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateTemplateTypeParameter - Create debugging information for template +/// type parameter. +DITemplateTypeParameter +DIBuilder::CreateTemplateTypeParameter(DIDescriptor Context, StringRef Name, + DIType Ty, MDNode *File, unsigned LineNo, + unsigned ColumnNo) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter), + Context, + MDString::get(VMContext, Name), + Ty, + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) + }; + return DITemplateTypeParameter(MDNode::get(VMContext, &Elts[0], + array_lengthof(Elts))); +} + +/// CreateTemplateValueParameter - Create debugging information for template +/// value parameter. +DITemplateValueParameter +DIBuilder::CreateTemplateValueParameter(DIDescriptor Context, StringRef Name, + DIType Ty, uint64_t Val, + MDNode *File, unsigned LineNo, + unsigned ColumnNo) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter), + Context, + MDString::get(VMContext, Name), + Ty, + ConstantInt::get(Type::getInt64Ty(VMContext), Val), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) + }; + return DITemplateValueParameter(MDNode::get(VMContext, &Elts[0], + array_lengthof(Elts))); +} + +/// CreateStructType - Create debugging information entry for a struct. +DIType DIBuilder::CreateStructType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + unsigned Flags, DIArray Elements, + unsigned RunTimeLang) { + // TAG_structure_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_structure_type), + Context, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateUnionType - Create debugging information entry for an union. +DIType DIBuilder::CreateUnionType(DIDescriptor Scope, StringRef Name, + DIFile File, + unsigned LineNumber, uint64_t SizeInBits, + uint64_t AlignInBits, unsigned Flags, + DIArray Elements, unsigned RunTimeLang) { + // TAG_union_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_union_type), + Scope, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateSubroutineType - Create subroutine type. +DIType DIBuilder::CreateSubroutineType(DIFile File, DIArray ParameterTypes) { + // TAG_subroutine_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), + File, + MDString::get(VMContext, ""), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + ParameterTypes, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateEnumerationType - Create debugging information entry for an +/// enumeration. +DIType DIBuilder::CreateEnumerationType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, DIArray Elements) { + // TAG_enumeration_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), + Scope, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); + NMD->addOperand(Node); + return DIType(Node); +} + +/// CreateArrayType - Create debugging information entry for an array. +DIType DIBuilder::CreateArrayType(uint64_t Size, uint64_t AlignInBits, + DIType Ty, DIArray Subscripts) { + // TAG_array_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_array_type), + TheCU, + MDString::get(VMContext, ""), + TheCU, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), Size), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + Ty, + Subscripts, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateVectorType - Create debugging information entry for a vector. +DIType DIBuilder::CreateVectorType(uint64_t Size, uint64_t AlignInBits, + DIType Ty, DIArray Subscripts) { + // TAG_vector_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_vector_type), + TheCU, + MDString::get(VMContext, ""), + TheCU, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), Size), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + Ty, + Subscripts, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// CreateArtificialType - Create a new DIType with "artificial" flag set. +DIType DIBuilder::CreateArtificialType(DIType Ty) { + if (Ty.isArtificial()) + return Ty; + + SmallVector<Value *, 9> Elts; + MDNode *N = Ty; + assert (N && "Unexpected input DIType!"); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (Value *V = N->getOperand(i)) + Elts.push_back(V); + else + Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); + } + + unsigned CurFlags = Ty.getFlags(); + CurFlags = CurFlags | DIType::FlagArtificial; + + // Flags are stored at this slot. + Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); + + return DIType(MDNode::get(VMContext, Elts.data(), Elts.size())); +} + +/// RetainType - Retain DIType in a module even if it is not referenced +/// through debug info anchors. +void DIBuilder::RetainType(DIType T) { + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty"); + NMD->addOperand(T); +} + +/// CreateUnspecifiedParameter - Create unspeicified type descriptor +/// for the subroutine type. +DIDescriptor DIBuilder::CreateUnspecifiedParameter() { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) + }; + return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1)); +} + +/// CreateTemporaryType - Create a temporary forward-declared type. +DIType DIBuilder::CreateTemporaryType() { + // Give the temporary MDNode a tag. It doesn't matter what tag we + // use here as long as DIType accepts it. + Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + return DIType(Node); +} + +/// CreateTemporaryType - Create a temporary forward-declared type. +DIType DIBuilder::CreateTemporaryType(DIFile F) { + // Give the temporary MDNode a tag. It doesn't matter what tag we + // use here as long as DIType accepts it. + Value *Elts[] = { + GetTagConstant(VMContext, DW_TAG_base_type), + F.getCompileUnit(), + NULL, + F + }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + return DIType(Node); +} + +/// GetOrCreateArray - Get a DIArray, create one if required. +DIArray DIBuilder::GetOrCreateArray(Value *const *Elements, unsigned NumElements) { + if (NumElements == 0) { + Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); + return DIArray(MDNode::get(VMContext, &Null, 1)); + } + return DIArray(MDNode::get(VMContext, Elements, NumElements)); +} + +/// GetOrCreateSubrange - Create a descriptor for a value range. This +/// implicitly uniques the values returned. +DISubrange DIBuilder::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type), + ConstantInt::get(Type::getInt64Ty(VMContext), Lo), + ConstantInt::get(Type::getInt64Ty(VMContext), Hi) + }; + + return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); +} + +/// CreateGlobalVariable - Create a new descriptor for the specified global. +DIGlobalVariable DIBuilder:: +CreateGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, + DIType Ty, bool isLocalToUnit, llvm::Value *Val) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_variable), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + TheCU, + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ + Val + }; + MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addOperand(Node); + return DIGlobalVariable(Node); +} + +/// CreateStaticVariable - Create a new descriptor for the specified static +/// variable. +DIGlobalVariable DIBuilder:: +CreateStaticVariable(DIDescriptor Context, StringRef Name, + StringRef LinkageName, DIFile F, unsigned LineNumber, + DIType Ty, bool isLocalToUnit, llvm::Value *Val) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_variable), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context, + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ + Val + }; + MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addOperand(Node); + return DIGlobalVariable(Node); +} + +/// CreateVariable - Create a new descriptor for the specified variable. +DIVariable DIBuilder::CreateLocalVariable(unsigned Tag, DIDescriptor Scope, + StringRef Name, DIFile File, + unsigned LineNo, DIType Ty, + bool AlwaysPreserve, unsigned Flags) { + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + Scope, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags) + }; + MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + if (AlwaysPreserve) { + // The optimizer may remove local variable. If there is an interest + // to preserve variable info in such situation then stash it in a + // named mdnode. + DISubprogram Fn(getDISubprogram(Scope)); + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName); + FnLocals->addOperand(Node); + } + return DIVariable(Node); +} + +/// CreateComplexVariable - Create a new descriptor for the specified variable +/// which has a complex address expression for its address. +DIVariable DIBuilder::CreateComplexVariable(unsigned Tag, DIDescriptor Scope, + StringRef Name, DIFile F, + unsigned LineNo, + DIType Ty, Value *const *Addr, + unsigned NumAddr) { + SmallVector<Value *, 15> Elts; + Elts.push_back(GetTagConstant(VMContext, Tag)); + Elts.push_back(Scope); + Elts.push_back(MDString::get(VMContext, Name)); + Elts.push_back(F); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)); + Elts.push_back(Ty); + Elts.append(Addr, Addr+NumAddr); + + return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size())); +} + +/// CreateFunction - Create a new descriptor for the specified function. +DISubprogram DIBuilder::CreateFunction(DIDescriptor Context, + StringRef Name, + StringRef LinkageName, + DIFile File, unsigned LineNo, + DIType Ty, + bool isLocalToUnit, bool isDefinition, + unsigned Flags, bool isOptimized, + Function *Fn) { + + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context, + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + Fn + }; + MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(Node); + return DISubprogram(Node); +} + +/// CreateMethod - Create a new descriptor for the specified C++ method. +DISubprogram DIBuilder::CreateMethod(DIDescriptor Context, + StringRef Name, + StringRef LinkageName, + DIFile F, + unsigned LineNo, DIType Ty, + bool isLocalToUnit, + bool isDefinition, + unsigned VK, unsigned VIndex, + MDNode *VTableHolder, + unsigned Flags, + bool isOptimized, + Function *Fn) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context, + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), + ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), + VTableHolder, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + Fn + }; + MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(Node); + return DISubprogram(Node); +} + +/// CreateNameSpace - This creates new descriptor for a namespace +/// with the specified parent scope. +DINameSpace DIBuilder::CreateNameSpace(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNo) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_namespace), + Scope, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) + }; + return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +DILexicalBlock DIBuilder::CreateLexicalBlock(DIDescriptor Scope, DIFile File, + unsigned Line, unsigned Col) { + // Defeat MDNode uniqing for lexical blocks by using unique id. + static unsigned int unique_id = 0; + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), + Scope, + ConstantInt::get(Type::getInt32Ty(VMContext), Line), + ConstantInt::get(Type::getInt32Ty(VMContext), Col), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) + }; + return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); +} + +/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo, + Instruction *InsertBefore) { + assert(Storage && "no storage passed to dbg.declare"); + assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare"); + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo }; + return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); +} + +/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo, + BasicBlock *InsertAtEnd) { + assert(Storage && "no storage passed to dbg.declare"); + assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare"); + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo }; + + // If this block already has a terminator then insert this intrinsic + // before the terminator. + if (TerminatorInst *T = InsertAtEnd->getTerminator()) + return CallInst::Create(DeclareFn, Args, Args+2, "", T); + else + return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); +} + +/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, + DIVariable VarInfo, + Instruction *InsertBefore) { + assert(V && "no value passed to dbg.value"); + assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), + VarInfo }; + return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); +} + +/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, + DIVariable VarInfo, + BasicBlock *InsertAtEnd) { + assert(V && "no value passed to dbg.value"); + assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), + VarInfo }; + return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); +} + diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp index 056775060610..b23c3514d0bd 100644 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ b/lib/Analysis/DbgInfoPrinter.cpp @@ -20,6 +20,7 @@ #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" +#include "llvm/Module.h" #include "llvm/Assembly/Writer.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/Passes.h" @@ -40,7 +41,9 @@ namespace { void printVariableDeclaration(const Value *V); public: static char ID; // Pass identification - PrintDbgInfo() : FunctionPass(ID), Out(errs()) {} + PrintDbgInfo() : FunctionPass(ID), Out(errs()) { + initializePrintDbgInfoPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -48,12 +51,124 @@ namespace { } }; char PrintDbgInfo::ID = 0; - INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo", - "Print debug info in human readable form", false, false); } +INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo", + "Print debug info in human readable form", false, false) + FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); } +/// Find the debug info descriptor corresponding to this global variable. +static Value *findDbgGlobalDeclare(GlobalVariable *V) { + const Module *M = V->getParent(); + NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); + if (!NMD) + return 0; + + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); + if (!DIG.isGlobalVariable()) + continue; + if (DIGlobalVariable(DIG).getGlobal() == V) + return DIG; + } + return 0; +} + +/// Find the debug info descriptor corresponding to this function. +static Value *findDbgSubprogramDeclare(Function *V) { + const Module *M = V->getParent(); + NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"); + if (!NMD) + return 0; + + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); + if (!DIG.isSubprogram()) + continue; + if (DISubprogram(DIG).getFunction() == V) + return DIG; + } + return 0; +} + +/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. +/// It looks through pointer casts too. +static const DbgDeclareInst *findDbgDeclare(const Value *V) { + V = V->stripPointerCasts(); + + if (!isa<Instruction>(V) && !isa<Argument>(V)) + return 0; + + const Function *F = NULL; + if (const Instruction *I = dyn_cast<Instruction>(V)) + F = I->getParent()->getParent(); + else if (const Argument *A = dyn_cast<Argument>(V)) + F = A->getParent(); + + for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) + for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); + BI != BE; ++BI) + if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + if (DDI->getAddress() == V) + return DDI; + + return 0; +} + +static bool getLocationInfo(const Value *V, std::string &DisplayName, + std::string &Type, unsigned &LineNo, + std::string &File, std::string &Dir) { + DICompileUnit Unit; + DIType TypeD; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) { + Value *DIGV = findDbgGlobalDeclare(GV); + if (!DIGV) return false; + DIGlobalVariable Var(cast<MDNode>(DIGV)); + + StringRef D = Var.getDisplayName(); + if (!D.empty()) + DisplayName = D; + LineNo = Var.getLineNumber(); + Unit = Var.getCompileUnit(); + TypeD = Var.getType(); + } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){ + Value *DIF = findDbgSubprogramDeclare(F); + if (!DIF) return false; + DISubprogram Var(cast<MDNode>(DIF)); + + StringRef D = Var.getDisplayName(); + if (!D.empty()) + DisplayName = D; + LineNo = Var.getLineNumber(); + Unit = Var.getCompileUnit(); + TypeD = Var.getType(); + } else { + const DbgDeclareInst *DDI = findDbgDeclare(V); + if (!DDI) return false; + DIVariable Var(cast<MDNode>(DDI->getVariable())); + + StringRef D = Var.getName(); + if (!D.empty()) + DisplayName = D; + LineNo = Var.getLineNumber(); + Unit = Var.getCompileUnit(); + TypeD = Var.getType(); + } + + StringRef T = TypeD.getName(); + if (!T.empty()) + Type = T; + StringRef F = Unit.getFilename(); + if (!F.empty()) + File = F; + StringRef D = Unit.getDirectory(); + if (!D.empty()) + Dir = D; + return true; +} + void PrintDbgInfo::printVariableDeclaration(const Value *V) { std::string DisplayName, File, Directory, Type; unsigned LineNo; @@ -63,8 +178,12 @@ void PrintDbgInfo::printVariableDeclaration(const Value *V) { Out << "; "; WriteAsOperand(Out, V, false, 0); - Out << " is variable " << DisplayName - << " of type " << Type << " declared at "; + if (isa<Function>(V)) + Out << " is function " << DisplayName + << " of type " << Type << " declared at "; + else + Out << " is variable " << DisplayName + << " of type " << Type << " declared at "; if (PrintDirectory) Out << Directory << "/"; diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 5ca89c658df6..9db1456edd05 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -109,7 +109,9 @@ Function *DIDescriptor::getFunctionField(unsigned Elt) const { } unsigned DIVariable::getNumAddrElements() const { - return DbgNode->getNumOperands()-6; + if (getVersion() <= llvm::LLVMDebugVersion8) + return DbgNode->getNumOperands()-6; + return DbgNode->getNumOperands()-7; } @@ -197,6 +199,12 @@ bool DIDescriptor::isGlobal() const { return isGlobalVariable(); } +/// isUnspecifiedParmeter - Return true if the specified tag is +/// DW_TAG_unspecified_parameters. +bool DIDescriptor::isUnspecifiedParameter() const { + return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters; +} + /// isScope - Return true if the specified tag is one of the scope /// related tag. bool DIDescriptor::isScope() const { @@ -213,6 +221,18 @@ bool DIDescriptor::isScope() const { return false; } +/// isTemplateTypeParameter - Return true if the specified tag is +/// DW_TAG_template_type_parameter. +bool DIDescriptor::isTemplateTypeParameter() const { + return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter; +} + +/// isTemplateValueParameter - Return true if the specified tag is +/// DW_TAG_template_value_parameter. +bool DIDescriptor::isTemplateValueParameter() const { + return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter; +} + /// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit. bool DIDescriptor::isCompileUnit() const { return DbgNode && getTag() == dwarf::DW_TAG_compile_unit; @@ -280,6 +300,26 @@ void DIType::replaceAllUsesWith(DIDescriptor &D) { } } +/// replaceAllUsesWith - Replace all uses of debug info referenced by +/// this descriptor. +void DIType::replaceAllUsesWith(MDNode *D) { + if (!DbgNode) + return; + + // Since we use a TrackingVH for the node, its easy for clients to manufacture + // legitimate situations where they want to replaceAllUsesWith() on something + // which, due to uniquing, has merged with the source. We shield clients from + // this detail by allowing a value to be replaced with replaceAllUsesWith() + // itself. + if (DbgNode != D) { + MDNode *Node = const_cast<MDNode*>(DbgNode); + const MDNode *DN = D; + const Value *V = cast_or_null<Value>(DN); + Node->replaceAllUsesWith(const_cast<Value*>(V)); + MDNode::deleteTemporary(Node); + } +} + /// Verify - Verify that a compile unit is well formed. bool DICompileUnit::Verify() const { if (!DbgNode) @@ -297,9 +337,13 @@ bool DIType::Verify() const { return false; if (!getContext().Verify()) return false; - - DICompileUnit CU = getCompileUnit(); - if (!CU.Verify()) + unsigned Tag = getTag(); + if (!isBasicType() && Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type && + Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type + && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type + && Tag != dwarf::DW_TAG_enumeration_type + && getFilename().empty()) return false; return true; } @@ -701,15 +745,13 @@ Constant *DIFactory::GetTagConstant(unsigned TAG) { /// GetOrCreateArray - Create an descriptor for an array of descriptors. /// This implicitly uniques the arrays created. DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) { - SmallVector<Value*, 16> Elts; - - if (NumTys == 0) - Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); - else - for (unsigned i = 0; i != NumTys; ++i) - Elts.push_back(Tys[i]); + if (NumTys == 0) { + Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); + return DIArray(MDNode::get(VMContext, &Null, 1)); + } - return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size())); + SmallVector<Value *, 16> Elts(Tys, Tys+NumTys); + return DIArray(MDNode::get(VMContext, Elts.data(), Elts.size())); } /// GetOrCreateSubrange - Create a descriptor for a value range. This @@ -724,7 +766,14 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); } - +/// CreateUnspecifiedParameter - Create unspeicified type descriptor +/// for the subroutine type. +DIDescriptor DIFactory::CreateUnspecifiedParameter() { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_unspecified_parameters) + }; + return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1)); +} /// CreateCompileUnit - Create a new descriptor for the specified compile /// unit. Note that this does not unique compile units within the module. @@ -946,7 +995,6 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag, return DICompositeType(Node); } - /// CreateTemporaryType - Create a temporary forward-declared type. DIType DIFactory::CreateTemporaryType() { // Give the temporary MDNode a tag. It doesn't matter what tag we @@ -958,6 +1006,19 @@ DIType DIFactory::CreateTemporaryType() { return DIType(Node); } +/// CreateTemporaryType - Create a temporary forward-declared type. +DIType DIFactory::CreateTemporaryType(DIFile F) { + // Give the temporary MDNode a tag. It doesn't matter what tag we + // use here as long as DIType accepts it. + Value *Elts[] = { + GetTagConstant(DW_TAG_base_type), + F.getCompileUnit(), + NULL, + F + }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + return DIType(Node); +} /// CreateCompositeType - Create a composite type like array, struct, etc. DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, @@ -1011,7 +1072,7 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, bool isDefinition, unsigned VK, unsigned VIndex, DIType ContainingType, - bool isArtificial, + unsigned Flags, bool isOptimized, Function *Fn) { @@ -1030,7 +1091,7 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), ContainingType, - ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn }; @@ -1064,7 +1125,7 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration){ DeclNode->getOperand(11), // Virtuality DeclNode->getOperand(12), // VIndex DeclNode->getOperand(13), // Containting Type - DeclNode->getOperand(14), // isArtificial + DeclNode->getOperand(14), // Flags DeclNode->getOperand(15), // isOptimized SPDeclaration.getFunction() }; @@ -1142,12 +1203,47 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, return DIGlobalVariable(Node); } +/// fixupObjcLikeName - Replace contains special characters used +/// in a typical Objective-C names with '.' in a given string. +static void fixupObjcLikeName(std::string &Str) { + for (size_t i = 0, e = Str.size(); i < e; ++i) { + char C = Str[i]; + if (C == '[' || C == ']' || C == ' ' || C == ':' || C == '+' || + C == '(' || C == ')') + Str[i] = '.'; + } +} + +/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable +/// to hold function specific information. +NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { + SmallString<32> Out; + if (FuncName.find('[') == StringRef::npos) + return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName) + .toStringRef(Out)); + std::string Name = FuncName; + fixupObjcLikeName(Name); + return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name) + .toStringRef(Out)); +} + +/// getFnSpecificMDNode - Return a NameMDNode, if available, that is +/// suitable to hold function specific information. +NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) { + if (FuncName.find('[') == StringRef::npos) + return M.getNamedMetadata(Twine("llvm.dbg.lv.", FuncName)); + std::string Name = FuncName; + fixupObjcLikeName(Name); + return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name)); +} + /// CreateVariable - Create a new descriptor for the specified variable. DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, StringRef Name, DIFile F, unsigned LineNo, - DIType Ty, bool AlwaysPreserve) { + DIType Ty, bool AlwaysPreserve, + unsigned Flags) { Value *Elts[] = { GetTagConstant(Tag), Context, @@ -1155,8 +1251,9 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags) }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], 6); + MDNode *Node = MDNode::get(VMContext, &Elts[0], 7); if (AlwaysPreserve) { // The optimizer may remove local variable. If there is an interest // to preserve variable info in such situation then stash it in a @@ -1169,9 +1266,8 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, if (FName.startswith(StringRef(&One, 1))) FName = FName.substr(1); - SmallString<32> Out; - NamedMDNode *FnLocals = - M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FName).toStringRef(Out)); + + NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName); FnLocals->addOperand(Node); } return DIVariable(Node); @@ -1181,21 +1277,20 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, /// CreateComplexVariable - Create a new descriptor for the specified variable /// which has a complex address expression for its address. DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context, - const std::string &Name, - DIFile F, + StringRef Name, DIFile F, unsigned LineNo, - DIType Ty, - SmallVector<Value *, 9> &addr) { - SmallVector<Value *, 9> Elts; + DIType Ty, Value *const *Addr, + unsigned NumAddr) { + SmallVector<Value *, 15> Elts; Elts.push_back(GetTagConstant(Tag)); Elts.push_back(Context); Elts.push_back(MDString::get(VMContext, Name)); Elts.push_back(F); Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)); Elts.push_back(Ty); - Elts.insert(Elts.end(), addr.begin(), addr.end()); + Elts.append(Addr, Addr+NumAddr); - return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size())); + return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size())); } @@ -1309,6 +1404,14 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); } +// RecordType - Record DIType in a module such that it is not lost even if +// it is not referenced through debug info anchors. +void DIFactory::RecordType(DIType T) { + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty"); + NMD->addOperand(T); +} + + //===----------------------------------------------------------------------===// // DebugInfoFinder implementations. //===----------------------------------------------------------------------===// @@ -1472,89 +1575,6 @@ bool DebugInfoFinder::addSubprogram(DISubprogram SP) { return true; } -/// Find the debug info descriptor corresponding to this global variable. -static Value *findDbgGlobalDeclare(GlobalVariable *V) { - const Module *M = V->getParent(); - NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); - if (!NMD) - return 0; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); - if (!DIG.isGlobalVariable()) - continue; - if (DIGlobalVariable(DIG).getGlobal() == V) - return DIG; - } - return 0; -} - -/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. -/// It looks through pointer casts too. -static const DbgDeclareInst *findDbgDeclare(const Value *V) { - V = V->stripPointerCasts(); - - if (!isa<Instruction>(V) && !isa<Argument>(V)) - return 0; - - const Function *F = NULL; - if (const Instruction *I = dyn_cast<Instruction>(V)) - F = I->getParent()->getParent(); - else if (const Argument *A = dyn_cast<Argument>(V)) - F = A->getParent(); - - for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) - for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); - BI != BE; ++BI) - if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) - if (DDI->getAddress() == V) - return DDI; - - return 0; -} - -bool llvm::getLocationInfo(const Value *V, std::string &DisplayName, - std::string &Type, unsigned &LineNo, - std::string &File, std::string &Dir) { - DICompileUnit Unit; - DIType TypeD; - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) { - Value *DIGV = findDbgGlobalDeclare(GV); - if (!DIGV) return false; - DIGlobalVariable Var(cast<MDNode>(DIGV)); - - StringRef D = Var.getDisplayName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } else { - const DbgDeclareInst *DDI = findDbgDeclare(V); - if (!DDI) return false; - DIVariable Var(cast<MDNode>(DDI->getVariable())); - - StringRef D = Var.getName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } - - StringRef T = TypeD.getName(); - if (!T.empty()) - Type = T; - StringRef F = Unit.getFilename(); - if (!F.empty()) - File = F; - StringRef D = Unit.getDirectory(); - if (!D.empty()) - Dir = D; - return true; -} - /// getDISubprogram - Find subprogram that is enclosing this scope. DISubprogram llvm::getDISubprogram(const MDNode *Scope) { DIDescriptor D(Scope); diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp index 9f340942f2cc..cde431459d50 100644 --- a/lib/Analysis/DomPrinter.cpp +++ b/lib/Analysis/DomPrinter.cpp @@ -19,8 +19,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DomPrinter.h" - -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" #include "llvm/Analysis/PostDominators.h" @@ -86,74 +84,90 @@ namespace { struct DomViewer : public DOTGraphTraitsViewer<DominatorTree, false> { static char ID; - DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){} + DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){ + initializeDomViewerPass(*PassRegistry::getPassRegistry()); + } }; struct DomOnlyViewer : public DOTGraphTraitsViewer<DominatorTree, true> { static char ID; - DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){} + DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){ + initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry()); + } }; struct PostDomViewer : public DOTGraphTraitsViewer<PostDominatorTree, false> { static char ID; PostDomViewer() : - DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){} + DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){ + initializePostDomViewerPass(*PassRegistry::getPassRegistry()); + } }; struct PostDomOnlyViewer : public DOTGraphTraitsViewer<PostDominatorTree, true> { static char ID; PostDomOnlyViewer() : - DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){} + DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){ + initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry()); + } }; } // end anonymous namespace char DomViewer::ID = 0; INITIALIZE_PASS(DomViewer, "view-dom", - "View dominance tree of function", false, false); + "View dominance tree of function", false, false) char DomOnlyViewer::ID = 0; INITIALIZE_PASS(DomOnlyViewer, "view-dom-only", "View dominance tree of function (with no function bodies)", - false, false); + false, false) char PostDomViewer::ID = 0; INITIALIZE_PASS(PostDomViewer, "view-postdom", - "View postdominance tree of function", false, false); + "View postdominance tree of function", false, false) char PostDomOnlyViewer::ID = 0; INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only", "View postdominance tree of function " "(with no function bodies)", - false, false); + false, false) namespace { struct DomPrinter : public DOTGraphTraitsPrinter<DominatorTree, false> { static char ID; - DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {} + DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) { + initializeDomPrinterPass(*PassRegistry::getPassRegistry()); + } }; struct DomOnlyPrinter : public DOTGraphTraitsPrinter<DominatorTree, true> { static char ID; - DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {} + DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) { + initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } }; struct PostDomPrinter : public DOTGraphTraitsPrinter<PostDominatorTree, false> { static char ID; PostDomPrinter() : - DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {} + DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) { + initializePostDomPrinterPass(*PassRegistry::getPassRegistry()); + } }; struct PostDomOnlyPrinter : public DOTGraphTraitsPrinter<PostDominatorTree, true> { static char ID; PostDomOnlyPrinter() : - DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {} + DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) { + initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } }; } // end anonymous namespace @@ -162,24 +176,24 @@ struct PostDomOnlyPrinter char DomPrinter::ID = 0; INITIALIZE_PASS(DomPrinter, "dot-dom", "Print dominance tree of function to 'dot' file", - false, false); + false, false) char DomOnlyPrinter::ID = 0; INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only", "Print dominance tree of function to 'dot' file " "(with no function bodies)", - false, false); + false, false) char PostDomPrinter::ID = 0; INITIALIZE_PASS(PostDomPrinter, "dot-postdom", "Print postdominance tree of function to 'dot' file", - false, false); + false, false) char PostDomOnlyPrinter::ID = 0; INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only", "Print postdominance tree of function to 'dot' file " "(with no function bodies)", - false, false); + false, false) // Create methods available outside of this file, to use them // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp new file mode 100644 index 000000000000..6de4e1e1d7de --- /dev/null +++ b/lib/Analysis/DominanceFrontier.cpp @@ -0,0 +1,137 @@ +//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +char DominanceFrontier::ID = 0; +INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier", + "Dominance Frontier Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(DominanceFrontier, "domfrontier", + "Dominance Frontier Construction", true, true) + +namespace { + class DFCalculateWorkObject { + public: + DFCalculateWorkObject(BasicBlock *B, BasicBlock *P, + const DomTreeNode *N, + const DomTreeNode *PN) + : currentBB(B), parentBB(P), Node(N), parentNode(PN) {} + BasicBlock *currentBB; + BasicBlock *parentBB; + const DomTreeNode *Node; + const DomTreeNode *parentNode; + }; +} + +const DominanceFrontier::DomSetType & +DominanceFrontier::calculate(const DominatorTree &DT, + const DomTreeNode *Node) { + BasicBlock *BB = Node->getBlock(); + DomSetType *Result = NULL; + + std::vector<DFCalculateWorkObject> workList; + SmallPtrSet<BasicBlock *, 32> visited; + + workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL)); + do { + DFCalculateWorkObject *currentW = &workList.back(); + assert (currentW && "Missing work object."); + + BasicBlock *currentBB = currentW->currentBB; + BasicBlock *parentBB = currentW->parentBB; + const DomTreeNode *currentNode = currentW->Node; + const DomTreeNode *parentNode = currentW->parentNode; + assert (currentBB && "Invalid work object. Missing current Basic Block"); + assert (currentNode && "Invalid work object. Missing current Node"); + DomSetType &S = Frontiers[currentBB]; + + // Visit each block only once. + if (visited.count(currentBB) == 0) { + visited.insert(currentBB); + + // Loop over CFG successors to calculate DFlocal[currentNode] + for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB); + SI != SE; ++SI) { + // Does Node immediately dominate this successor? + if (DT[*SI]->getIDom() != currentNode) + S.insert(*SI); + } + } + + // At this point, S is DFlocal. Now we union in DFup's of our children... + // Loop through and visit the nodes that Node immediately dominates (Node's + // children in the IDomTree) + bool visitChild = false; + for (DomTreeNode::const_iterator NI = currentNode->begin(), + NE = currentNode->end(); NI != NE; ++NI) { + DomTreeNode *IDominee = *NI; + BasicBlock *childBB = IDominee->getBlock(); + if (visited.count(childBB) == 0) { + workList.push_back(DFCalculateWorkObject(childBB, currentBB, + IDominee, currentNode)); + visitChild = true; + } + } + + // If all children are visited or there is any child then pop this block + // from the workList. + if (!visitChild) { + + if (!parentBB) { + Result = &S; + break; + } + + DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end(); + DomSetType &parentSet = Frontiers[parentBB]; + for (; CDFI != CDFE; ++CDFI) { + if (!DT.properlyDominates(parentNode, DT[*CDFI])) + parentSet.insert(*CDFI); + } + workList.pop_back(); + } + + } while (!workList.empty()); + + return *Result; +} + +void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) { + OS << " DomFrontier for BB "; + if (I->first) + WriteAsOperand(OS, I->first, false); + else + OS << " <<exit node>>"; + OS << " is:\t"; + + const std::set<BasicBlock*> &BBs = I->second; + + for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end(); + I != E; ++I) { + OS << ' '; + if (*I) + WriteAsOperand(OS, *I, false); + else + OS << "<<exit node>>"; + } + OS << "\n"; + } +} + +void DominanceFrontierBase::dump() const { + print(dbgs()); +} + diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 007ad228ae56..8ffef29870ae 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -3,4 +3,5 @@ add_llvm_library(LLVMipa CallGraphSCCPass.cpp FindUsedTypes.cpp GlobalsModRef.cpp + IPA.cpp ) diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index b3635283fda5..690c4b4b6f1a 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -43,7 +43,9 @@ class BasicCallGraph : public ModulePass, public CallGraph { public: static char ID; // Class identification, replacement for typeinfo BasicCallGraph() : ModulePass(ID), Root(0), - ExternalCallingNode(0), CallsExternalNode(0) {} + ExternalCallingNode(0), CallsExternalNode(0) { + initializeBasicCallGraphPass(*PassRegistry::getPassRegistry()); + } // runOnModule - Compute the call graph for the specified module. virtual bool runOnModule(Module &M) { @@ -171,9 +173,9 @@ private: } //End anonymous namespace -static RegisterAnalysisGroup<CallGraph> X("Call Graph"); +INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph) INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg", - "Basic CallGraph Construction", false, true, true); + "Basic CallGraph Construction", false, true, true) char CallGraph::ID = 0; char BasicCallGraph::ID = 0; @@ -228,6 +230,21 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { return F; } +/// spliceFunction - Replace the function represented by this node by another. +/// This does not rescan the body of the function, so it is suitable when +/// splicing the body of the old function to the new while also updating all +/// callers from old to new. +/// +void CallGraph::spliceFunction(const Function *From, const Function *To) { + assert(FunctionMap.count(From) && "No CallGraphNode for function!"); + assert(!FunctionMap.count(To) && + "Pointing CallGraphNode at a function that already exists"); + FunctionMapTy::iterator I = FunctionMap.find(From); + I->second->F = const_cast<Function*>(To); + FunctionMap[To] = I->second; + FunctionMap.erase(I); +} + // getOrInsertFunction - This method is identical to calling operator[], but // it will insert a new CallGraphNode for the specified function if one does // not already exist. @@ -274,7 +291,6 @@ void CallGraphNode::removeCallEdgeFor(CallSite CS) { } } - // removeAnyCallEdgeTo - This method removes any call edges from this node to // the specified callee function. This takes more time to execute than // removeCallEdgeTo, so it should not be used unless necessary. diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index b7a27cb288d9..725ab72f5595 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -582,7 +582,6 @@ namespace { public: static char ID; - PrintCallGraphPass() : CallGraphSCCPass(ID), Out(dbgs()) {} PrintCallGraphPass(const std::string &B, raw_ostream &o) : CallGraphSCCPass(ID), Banner(B), Out(o) {} diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index 8eed9d6f68bc..06ae34cfd989 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -24,7 +24,7 @@ using namespace llvm; char FindUsedTypes::ID = 0; INITIALIZE_PASS(FindUsedTypes, "print-used-types", - "Find Used Types", false, true); + "Find Used Types", false, true) // IncorporateType - Incorporate one type and all of its subtypes into the // collection of used types. diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 6759b0afdce3..116aaf418ea0 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" @@ -88,7 +89,9 @@ namespace { public: static char ID; - GlobalsModRef() : ModulePass(ID) {} + GlobalsModRef() : ModulePass(ID) { + initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M) { InitializeAliasAnalysis(this); // set up super class @@ -106,10 +109,9 @@ namespace { //------------------------------------------------ // Implement the AliasAnalysis API // - AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size); + AliasResult alias(const Location &LocA, const Location &LocB); ModRefResult getModRefInfo(ImmutableCallSite CS, - const Value *P, unsigned Size); + const Location &Loc); ModRefResult getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { return AliasAnalysis::getModRefInfo(CS1, CS2); @@ -119,32 +121,38 @@ namespace { /// called from the specified call site. The call site may be null in which /// case the most generic behavior of this function should be returned. ModRefBehavior getModRefBehavior(const Function *F) { + ModRefBehavior Min = UnknownModRefBehavior; + if (FunctionRecord *FR = getFunctionInfo(F)) { if (FR->FunctionEffect == 0) - return DoesNotAccessMemory; + Min = DoesNotAccessMemory; else if ((FR->FunctionEffect & Mod) == 0) - return OnlyReadsMemory; + Min = OnlyReadsMemory; } - return AliasAnalysis::getModRefBehavior(F); + + return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); } /// getModRefBehavior - Return the behavior of the specified function if /// called from the specified call site. The call site may be null in which /// case the most generic behavior of this function should be returned. ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { - const Function* F = CS.getCalledFunction(); - if (!F) return AliasAnalysis::getModRefBehavior(CS); - if (FunctionRecord *FR = getFunctionInfo(F)) { - if (FR->FunctionEffect == 0) - return DoesNotAccessMemory; - else if ((FR->FunctionEffect & Mod) == 0) - return OnlyReadsMemory; - } - return AliasAnalysis::getModRefBehavior(CS); + ModRefBehavior Min = UnknownModRefBehavior; + + if (const Function* F = CS.getCalledFunction()) + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + Min = DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + Min = OnlyReadsMemory; + } + + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); } virtual void deleteValue(Value *V); virtual void copyValue(Value *From, Value *To); + virtual void addEscapingUse(Use &U); /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it @@ -177,9 +185,13 @@ namespace { } char GlobalsModRef::ID = 0; -INITIALIZE_AG_PASS(GlobalsModRef, AliasAnalysis, +INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", "Simple mod/ref analysis for globals", - false, true, false); + false, true, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, + "globalsmodref-aa", "Simple mod/ref analysis for globals", + false, true, false) Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } @@ -314,7 +326,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { continue; // Check the value being stored. - Value *Ptr = SI->getOperand(0)->getUnderlyingObject(); + Value *Ptr = GetUnderlyingObject(SI->getOperand(0)); if (isMalloc(Ptr)) { // Okay, easy case. @@ -476,11 +488,11 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { /// other is some random pointer, we know there cannot be an alias, because the /// address of the global isn't taken. AliasAnalysis::AliasResult -GlobalsModRef::alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { +GlobalsModRef::alias(const Location &LocA, + const Location &LocB) { // Get the base object these pointers point to. - const Value *UV1 = V1->getUnderlyingObject(); - const Value *UV2 = V2->getUnderlyingObject(); + const Value *UV1 = GetUnderlyingObject(LocA.Ptr); + const Value *UV2 = GetUnderlyingObject(LocB.Ptr); // If either of the underlying values is a global, they may be non-addr-taken // globals, which we can answer queries about. @@ -528,17 +540,18 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size, if ((GV1 || GV2) && GV1 != GV2) return NoAlias; - return AliasAnalysis::alias(V1, V1Size, V2, V2Size); + return AliasAnalysis::alias(LocA, LocB); } AliasAnalysis::ModRefResult GlobalsModRef::getModRefInfo(ImmutableCallSite CS, - const Value *P, unsigned Size) { + const Location &Loc) { unsigned Known = ModRef; // If we are asking for mod/ref info of a direct call with a pointer to a // global we are tracking, return information if we have it. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject())) + if (const GlobalValue *GV = + dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr))) if (GV->hasLocalLinkage()) if (const Function *F = CS.getCalledFunction()) if (NonAddressTakenGlobals.count(GV)) @@ -547,7 +560,7 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS, if (Known == NoModRef) return NoModRef; // No need to query other mod/ref analyses - return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, P, Size)); + return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc)); } @@ -584,3 +597,13 @@ void GlobalsModRef::deleteValue(Value *V) { void GlobalsModRef::copyValue(Value *From, Value *To) { AliasAnalysis::copyValue(From, To); } + +void GlobalsModRef::addEscapingUse(Use &U) { + // For the purposes of this analysis, it is conservatively correct to treat + // a newly escaping value equivalently to a deleted one. We could perhaps + // be more precise by processing the new use and attempting to update our + // saved analysis results to accomodate it. + deleteValue(U); + + AliasAnalysis::addEscapingUse(U); +} diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp new file mode 100644 index 000000000000..0ba2e04c6302 --- /dev/null +++ b/lib/Analysis/IPA/IPA.cpp @@ -0,0 +1,29 @@ +//===-- IPA.cpp -----------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the common initialization routines for the IPA library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" + +using namespace llvm; + +/// initializeIPA - Initialize all passes linked into the IPA library. +void llvm::initializeIPA(PassRegistry &Registry) { + initializeBasicCallGraphPass(Registry); + initializeCallGraphAnalysisGroup(Registry); + initializeFindUsedTypesPass(Registry); + initializeGlobalsModRefPass(Registry); +} + +void LLVMInitializeIPA(LLVMPassRegistryRef R) { + initializeIPA(*unwrap(R)); +} diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index cdf667ad6eed..c8382186df3a 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Assembly/Writer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -28,7 +29,13 @@ using namespace llvm; char IVUsers::ID = 0; -INITIALIZE_PASS(IVUsers, "iv-users", "Induction Variable Users", false, true); +INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", + "Induction Variable Users", false, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_END(IVUsers, "iv-users", + "Induction Variable Users", false, true) Pass *llvm::createIVUsersPass() { return new IVUsers(); @@ -143,7 +150,8 @@ IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { } IVUsers::IVUsers() - : LoopPass(ID) { + : LoopPass(ID) { + initializeIVUsersPass(*PassRegistry::getPassRegistry()); } void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 3e550f35c255..47f91cfc3bed 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -16,97 +16,8 @@ #include "llvm/CallingConv.h" #include "llvm/IntrinsicInst.h" #include "llvm/ADT/SmallPtrSet.h" -using namespace llvm; - -// CountCodeReductionForConstant - Figure out an approximation for how many -// instructions will be constant folded if the specified value is constant. -// -unsigned InlineCostAnalyzer::FunctionInfo:: -CountCodeReductionForConstant(Value *V) { - unsigned Reduction = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - User *U = *UI; - if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { - // We will be able to eliminate all but one of the successors. - const TerminatorInst &TI = cast<TerminatorInst>(*U); - const unsigned NumSucc = TI.getNumSuccessors(); - unsigned Instrs = 0; - for (unsigned I = 0; I != NumSucc; ++I) - Instrs += Metrics.NumBBInsts[TI.getSuccessor(I)]; - // We don't know which blocks will be eliminated, so use the average size. - Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; - } else if (CallInst *CI = dyn_cast<CallInst>(U)) { - // Turning an indirect call into a direct call is a BIG win - if (CI->getCalledValue() == V) - Reduction += InlineConstants::IndirectCallBonus; - } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { - // Turning an indirect call into a direct call is a BIG win - if (II->getCalledValue() == V) - Reduction += InlineConstants::IndirectCallBonus; - } else { - // Figure out if this instruction will be removed due to simple constant - // propagation. - Instruction &Inst = cast<Instruction>(*U); - - // We can't constant propagate instructions which have effects or - // read memory. - // - // FIXME: It would be nice to capture the fact that a load from a - // pointer-to-constant-global is actually a *really* good thing to zap. - // Unfortunately, we don't know the pointer that may get propagated here, - // so we can't make this decision. - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || - isa<AllocaInst>(Inst)) - continue; - - bool AllOperandsConstant = true; - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) - if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { - AllOperandsConstant = false; - break; - } - if (AllOperandsConstant) { - // We will get to remove this instruction... - Reduction += InlineConstants::InstrCost; - - // And any other instructions that use it which become constants - // themselves. - Reduction += CountCodeReductionForConstant(&Inst); - } - } - } - return Reduction; -} - -// CountCodeReductionForAlloca - Figure out an approximation of how much smaller -// the function will be if it is inlined into a context where an argument -// becomes an alloca. -// -unsigned InlineCostAnalyzer::FunctionInfo:: - CountCodeReductionForAlloca(Value *V) { - if (!V->getType()->isPointerTy()) return 0; // Not a pointer - unsigned Reduction = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *I = cast<Instruction>(*UI); - if (isa<LoadInst>(I) || isa<StoreInst>(I)) - Reduction += InlineConstants::InstrCost; - else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - // If the GEP has variable indices, we won't be able to do much with it. - if (GEP->hasAllConstantIndices()) - Reduction += CountCodeReductionForAlloca(GEP); - } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { - // Track pointer through bitcasts. - Reduction += CountCodeReductionForAlloca(BCI); - } else { - // If there is some other strange instruction, we're not going to be able - // to do much if we inline this. - return 0; - } - } - - return Reduction; -} +using namespace llvm; /// callIsSmall - If a call is likely to lower to a single target instruction, /// or is otherwise deemed small return true. @@ -160,6 +71,12 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { // variables as volatile if they are live across a setjmp call, and they // probably won't do this in callers. if (const Function *F = CS.getCalledFunction()) { + // If a function is both internal and has a single use, then it is + // extremely likely to get inlined in the future (it was probably + // exposed by an interleaved devirtualization pass). + if (F->hasInternalLinkage() && F->hasOneUse()) + ++NumInlineCandidates; + if (F->isDeclaration() && (F->getName() == "setjmp" || F->getName() == "_setjmp")) callsSetJmp = true; @@ -226,6 +143,86 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; } +// CountCodeReductionForConstant - Figure out an approximation for how many +// instructions will be constant folded if the specified value is constant. +// +unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) { + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + User *U = *UI; + if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { + // We will be able to eliminate all but one of the successors. + const TerminatorInst &TI = cast<TerminatorInst>(*U); + const unsigned NumSucc = TI.getNumSuccessors(); + unsigned Instrs = 0; + for (unsigned I = 0; I != NumSucc; ++I) + Instrs += NumBBInsts[TI.getSuccessor(I)]; + // We don't know which blocks will be eliminated, so use the average size. + Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; + } else { + // Figure out if this instruction will be removed due to simple constant + // propagation. + Instruction &Inst = cast<Instruction>(*U); + + // We can't constant propagate instructions which have effects or + // read memory. + // + // FIXME: It would be nice to capture the fact that a load from a + // pointer-to-constant-global is actually a *really* good thing to zap. + // Unfortunately, we don't know the pointer that may get propagated here, + // so we can't make this decision. + if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || + isa<AllocaInst>(Inst)) + continue; + + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) { + // We will get to remove this instruction... + Reduction += InlineConstants::InstrCost; + + // And any other instructions that use it which become constants + // themselves. + Reduction += CountCodeReductionForConstant(&Inst); + } + } + } + return Reduction; +} + +// CountCodeReductionForAlloca - Figure out an approximation of how much smaller +// the function will be if it is inlined into a context where an argument +// becomes an alloca. +// +unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { + if (!V->getType()->isPointerTy()) return 0; // Not a pointer + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + Instruction *I = cast<Instruction>(*UI); + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + Reduction += InlineConstants::InstrCost; + else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (GEP->hasAllConstantIndices()) + Reduction += CountCodeReductionForAlloca(GEP); + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + // Track pointer through bitcasts. + Reduction += CountCodeReductionForAlloca(BCI); + } else { + // If there is some other strange instruction, we're not going to be able + // to do much if we inline this. + return 0; + } + } + + return Reduction; +} + /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. void CodeMetrics::analyzeFunction(Function *F) { @@ -245,76 +242,246 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { if (Metrics.NumRets==1) --Metrics.NumInsts; - // Don't bother calculating argument weights if we are never going to inline - // the function anyway. - if (NeverInline()) - return; - // Check out all of the arguments to the function, figuring out how much // code can be eliminated if one of the arguments is a constant. ArgumentWeights.reserve(F->arg_size()); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I), - CountCodeReductionForAlloca(I))); + ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I), + Metrics.CountCodeReductionForAlloca(I))); } /// NeverInline - returns true if the function should never be inlined into /// any caller -bool InlineCostAnalyzer::FunctionInfo::NeverInline() -{ +bool InlineCostAnalyzer::FunctionInfo::NeverInline() { return (Metrics.callsSetJmp || Metrics.isRecursive || Metrics.containsIndirectBr); +} +// getSpecializationBonus - The heuristic used to determine the per-call +// performance boost for using a specialization of Callee with argument +// specializedArgNo replaced by a constant. +int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, + SmallVectorImpl<unsigned> &SpecializedArgNos) +{ + if (Callee->mayBeOverridden()) + return 0; + + int Bonus = 0; + // If this function uses the coldcc calling convention, prefer not to + // specialize it. + if (Callee->getCallingConv() == CallingConv::Cold) + Bonus -= InlineConstants::ColdccPenalty; + + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + unsigned ArgNo = 0; + unsigned i = 0; + for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end(); + I != E; ++I, ++ArgNo) + if (ArgNo == SpecializedArgNos[i]) { + ++i; + Bonus += CountBonusForConstant(I); + } + + // Calls usually take a long time, so they make the specialization gain + // smaller. + Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; + + return Bonus; } -// getInlineCost - The heuristic used to determine if we should inline the -// function call or not. -// -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, - SmallPtrSet<const Function*, 16> &NeverInline) { - return getInlineCost(CS, CS.getCalledFunction(), NeverInline); + +// ConstantFunctionBonus - Figure out how much of a bonus we can get for +// possibly devirtualizing a function. We'll subtract the size of the function +// we may wish to inline from the indirect call bonus providing a limit on +// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize +// inlining because we decide we don't want to give a bonus for +// devirtualizing. +int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) { + + // This could just be NULL. + if (!C) return 0; + + Function *F = dyn_cast<Function>(C); + if (!F) return 0; + + int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); + return (Bonus > 0) ? 0 : Bonus; } -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, - Function *Callee, - SmallPtrSet<const Function*, 16> &NeverInline) { - Instruction *TheCall = CS.getInstruction(); - Function *Caller = TheCall->getParent()->getParent(); - bool isDirectCall = CS.getCalledFunction() == Callee; +// CountBonusForConstant - Figure out an approximation for how much per-call +// performance boost we can expect if the specified value is constant. +int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { + unsigned Bonus = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + User *U = *UI; + if (CallInst *CI = dyn_cast<CallInst>(U)) { + // Turning an indirect call into a direct call is a BIG win + if (CI->getCalledValue() == V) + Bonus += ConstantFunctionBonus(CallSite(CI), C); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { + // Turning an indirect call into a direct call is a BIG win + if (II->getCalledValue() == V) + Bonus += ConstantFunctionBonus(CallSite(II), C); + } + // FIXME: Eliminating conditional branches and switches should + // also yield a per-call performance boost. + else { + // Figure out the bonuses that wll accrue due to simple constant + // propagation. + Instruction &Inst = cast<Instruction>(*U); - // Don't inline functions which can be redefined at link-time to mean - // something else. Don't inline functions marked noinline or call sites - // marked noinline. - if (Callee->mayBeOverridden() || - Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || - CS.isNoInline()) - return llvm::InlineCost::getNever(); + // We can't constant propagate instructions which have effects or + // read memory. + // + // FIXME: It would be nice to capture the fact that a load from a + // pointer-to-constant-global is actually a *really* good thing to zap. + // Unfortunately, we don't know the pointer that may get propagated here, + // so we can't make this decision. + if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || + isa<AllocaInst>(Inst)) + continue; + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) + Bonus += CountBonusForConstant(&Inst); + } + } + + return Bonus; +} + +int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + // InlineCost - This value measures how good of an inline candidate this call // site is to inline. A lower inline cost make is more likely for the call to // be inlined. This value may go negative. // int InlineCost = 0; + // Compute any size reductions we can expect due to arguments being passed into + // the function. + // + unsigned ArgNo = 0; + CallSite::arg_iterator I = CS.arg_begin(); + for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); + FI != FE; ++I, ++FI, ++ArgNo) { + + // If an alloca is passed in, inlining this function is likely to allow + // significant future optimization possibilities (like scalar promotion, and + // scalarization), so encourage the inlining of the function. + // + if (isa<AllocaInst>(I)) + InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; + + // If this is a constant being passed into the function, use the argument + // weights calculated for the callee to determine how much will be folded + // away with this information. + else if (isa<Constant>(I)) + InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; + } + + // Each argument passed in has a cost at both the caller and the callee + // sides. Measurements show that each argument costs about the same as an + // instruction. + InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); + + // Now that we have considered all of the factors that make the call site more + // likely to be inlined, look at factors that make us not want to inline it. + + // Calls usually take a long time, so they make the inlining gain smaller. + InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; + + // Look at the size of the callee. Each instruction counts as 5. + InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; + + return InlineCost; +} + +int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + + bool isDirectCall = CS.getCalledFunction() == Callee; + Instruction *TheCall = CS.getInstruction(); + int Bonus = 0; + // If there is only one call of the function, and it has internal linkage, // make it almost guaranteed to be inlined. // if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) - InlineCost += InlineConstants::LastCallToStaticBonus; - - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (Callee->getCallingConv() == CallingConv::Cold) - InlineCost += InlineConstants::ColdccPenalty; + Bonus += InlineConstants::LastCallToStaticBonus; // If the instruction after the call, or if the normal destination of the // invoke is an unreachable instruction, the function is noreturn. As such, // there is little point in inlining this. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { if (isa<UnreachableInst>(II->getNormalDest()->begin())) - InlineCost += InlineConstants::NoreturnPenalty; + Bonus += InlineConstants::NoreturnPenalty; } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) - InlineCost += InlineConstants::NoreturnPenalty; + Bonus += InlineConstants::NoreturnPenalty; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (Callee->getCallingConv() == CallingConv::Cold) + Bonus += InlineConstants::ColdccPenalty; + // Add to the inline quality for properties that make the call valuable to + // inline. This includes factors that indicate that the result of inlining + // the function will be optimizable. Currently this just looks at arguments + // passed into the function. + // + CallSite::arg_iterator I = CS.arg_begin(); + for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); + FI != FE; ++I, ++FI) + // Compute any constant bonus due to inlining we want to give here. + if (isa<Constant>(I)) + Bonus += CountBonusForConstant(FI, cast<Constant>(I)); + + return Bonus; +} + +// getInlineCost - The heuristic used to determine if we should inline the +// function call or not. +// +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, + SmallPtrSet<const Function*, 16> &NeverInline) { + return getInlineCost(CS, CS.getCalledFunction(), NeverInline); +} + +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, + Function *Callee, + SmallPtrSet<const Function*, 16> &NeverInline) { + Instruction *TheCall = CS.getInstruction(); + Function *Caller = TheCall->getParent()->getParent(); + + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline or call sites + // marked noinline. + if (Callee->mayBeOverridden() || + Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || + CS.isNoInline()) + return llvm::InlineCost::getNever(); + // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; @@ -353,46 +520,45 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, return InlineCost::getNever(); } - // Add to the inline quality for properties that make the call valuable to - // inline. This includes factors that indicate that the result of inlining - // the function will be optimizable. Currently this just looks at arguments - // passed into the function. + // InlineCost - This value measures how good of an inline candidate this call + // site is to inline. A lower inline cost make is more likely for the call to + // be inlined. This value may go negative due to the fact that bonuses + // are negative numbers. // - unsigned ArgNo = 0; - for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I, ++ArgNo) { - // Each argument passed in has a cost at both the caller and the callee - // sides. Measurements show that each argument costs about the same as an - // instruction. - InlineCost -= InlineConstants::InstrCost; + int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee); + return llvm::InlineCost::get(InlineCost); +} - // If an alloca is passed in, inlining this function is likely to allow - // significant future optimization possibilities (like scalar promotion, and - // scalarization), so encourage the inlining of the function. - // - if (isa<AllocaInst>(I)) { - if (ArgNo < CalleeFI->ArgumentWeights.size()) - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; - - // If this is a constant being passed into the function, use the argument - // weights calculated for the callee to determine how much will be folded - // away with this information. - } else if (isa<Constant>(I)) { - if (ArgNo < CalleeFI->ArgumentWeights.size()) - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; - } - } +// getSpecializationCost - The heuristic used to determine the code-size +// impact of creating a specialized version of Callee with argument +// SpecializedArgNo replaced by a constant. +InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, + SmallVectorImpl<unsigned> &SpecializedArgNos) +{ + // Don't specialize functions which can be redefined at link-time to mean + // something else. + if (Callee->mayBeOverridden()) + return llvm::InlineCost::getNever(); - // Now that we have considered all of the factors that make the call site more - // likely to be inlined, look at factors that make us not want to inline it. + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); - // Calls usually take a long time, so they make the inlining gain smaller. - InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; + int Cost = 0; + + // Look at the orginal size of the callee. Each instruction counts as 5. + Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; - // Look at the size of the callee. Each instruction counts as 5. - InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; + // Offset that with the amount of code that can be constant-folded + // away with the given arguments replaced by constants. + for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(), + ae = SpecializedArgNos.end(); an != ae; ++an) + Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight; - return llvm::InlineCost::get(InlineCost); + return llvm::InlineCost::get(Cost); } // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index dcbcac005a2f..3b385d26ba3c 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -51,7 +51,9 @@ namespace { } public: static char ID; // Pass identification, replacement for typeid - InstCount() : FunctionPass(ID) {} + InstCount() : FunctionPass(ID) { + initializeInstCountPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); @@ -65,7 +67,7 @@ namespace { char InstCount::ID = 0; INITIALIZE_PASS(InstCount, "instcount", - "Counts the various types of Instructions", false, true); + "Counts the various types of Instructions", false, true) FunctionPass *llvm::createInstCountPass() { return new InstCount(); } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 24cd3433a2ca..a2f9862383fd 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -8,179 +8,1267 @@ //===----------------------------------------------------------------------===// // // This file implements routines for folding instructions into simpler forms -// that do not require creating new instructions. For example, this does -// constant folding, and can handle identities like (X&0)->0. +// that do not require creating new instructions. This does constant folding +// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either +// returning a constant ("and i32 %x, 0" -> "0") or an already existing value +// ("and i32 %x, %x" -> "%x"). All operands are assumed to have already been +// simplified: This is usually true and assuming it simplifies the logic (if +// they have not been simplified then results are correct but maybe suboptimal). // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "instsimplify" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Support/ValueHandle.h" -#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Target/TargetData.h" using namespace llvm; using namespace llvm::PatternMatch; +enum { RecursionLimit = 3 }; + +STATISTIC(NumExpand, "Number of expansions"); +STATISTIC(NumFactor , "Number of factorizations"); +STATISTIC(NumReassoc, "Number of reassociations"); + +static Value *SimplifyAndInst(Value *, Value *, const TargetData *, + const DominatorTree *, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *, + const DominatorTree *, unsigned); +static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *, + const DominatorTree *, unsigned); +static Value *SimplifyOrInst(Value *, Value *, const TargetData *, + const DominatorTree *, unsigned); +static Value *SimplifyXorInst(Value *, Value *, const TargetData *, + const DominatorTree *, unsigned); + +/// ValueDominatesPHI - Does the given value dominate the specified phi node? +static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + // Arguments and constants dominate all instructions. + return true; + + // If we have a DominatorTree then do a precise test. + if (DT) + return DT->dominates(I, P); + + // Otherwise, if the instruction is in the entry block, and is not an invoke, + // then it obviously dominates all phi nodes. + if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() && + !isa<InvokeInst>(I)) + return true; + + return false; +} + +/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning +/// it into "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is +/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS. +/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". +/// Returns the simplified value, or null if no simplification was performed. +static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, + unsigned OpcToExpand, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + // Check whether the expression has the form "(A op' B) op C". + if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS)) + if (Op0->getOpcode() == OpcodeToExpand) { + // It does! Try turning it into "(A op C) op' (B op C)". + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; + // Do "A op C" and "B op C" both simplify? + if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) { + // They do! Return "L op' R" if it simplifies or is already available. + // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. + if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand) + && L == B && R == A)) { + ++NumExpand; + return LHS; + } + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT, + MaxRecurse)) { + ++NumExpand; + return V; + } + } + } + + // Check whether the expression has the form "A op (B op' C)". + if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS)) + if (Op1->getOpcode() == OpcodeToExpand) { + // It does! Try turning it into "(A op B) op' (A op C)". + Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); + // Do "A op B" and "A op C" both simplify? + if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) { + // They do! Return "L op' R" if it simplifies or is already available. + // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. + if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand) + && L == C && R == B)) { + ++NumExpand; + return RHS; + } + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT, + MaxRecurse)) { + ++NumExpand; + return V; + } + } + } + + return 0; +} + +/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term +/// using the operation OpCodeToExtract. For example, when Opcode is Add and +/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)". +/// Returns the simplified value, or null if no simplification was performed. +static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, + unsigned OpcToExtract, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + + if (!Op0 || Op0->getOpcode() != OpcodeToExtract || + !Op1 || Op1->getOpcode() != OpcodeToExtract) + return 0; + + // The expression has the form "(A op' B) op (C op' D)". + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); + Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); + + // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)". + // Does the instruction have the form "(A op' B) op (A op' D)" or, in the + // commutative case, "(A op' B) op (C op' A)"? + if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) { + Value *DD = A == C ? D : C; + // Form "A op' (B op DD)" if it simplifies completely. + // Does "B op DD" simplify? + if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) { + // It does! Return "A op' V" if it simplifies or is already available. + // If V equals B then "A op' V" is just the LHS. If V equals DD then + // "A op' V" is just the RHS. + if (V == B || V == DD) { + ++NumFactor; + return V == B ? LHS : RHS; + } + // Otherwise return "A op' V" if it simplifies. + if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) { + ++NumFactor; + return W; + } + } + } + + // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)". + // Does the instruction have the form "(A op' B) op (C op' B)" or, in the + // commutative case, "(A op' B) op (B op' D)"? + if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) { + Value *CC = B == D ? C : D; + // Form "(A op CC) op' B" if it simplifies completely.. + // Does "A op CC" simplify? + if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) { + // It does! Return "V op' B" if it simplifies or is already available. + // If V equals A then "V op' B" is just the LHS. If V equals CC then + // "V op' B" is just the RHS. + if (V == A || V == CC) { + ++NumFactor; + return V == A ? LHS : RHS; + } + // Otherwise return "V op' B" if it simplifies. + if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) { + ++NumFactor; + return W; + } + } + } + + return 0; +} + +/// SimplifyAssociativeBinOp - Generic simplifications for associative binary +/// operations. Returns the simpler value, or null if none was found. +static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, + const TargetData *TD, + const DominatorTree *DT, + unsigned MaxRecurse) { + Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc; + assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); + + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + + // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = RHS; + + // Does "B op C" simplify? + if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) { + // It does! Return "A op V" if it simplifies or is already available. + // If V equals B then "A op V" is just the LHS. + if (V == B) return LHS; + // Otherwise return "A op V" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = LHS; + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "A op B" simplify? + if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) { + // It does! Return "V op C" if it simplifies or is already available. + // If V equals B then "V op C" is just the RHS. + if (V == B) return RHS; + // Otherwise return "V op C" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + // The remaining transforms require commutativity as well as associativity. + if (!Instruction::isCommutative(Opcode)) + return 0; + + // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = RHS; + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) { + // It does! Return "V op B" if it simplifies or is already available. + // If V equals A then "V op B" is just the LHS. + if (V == A) return LHS; + // Otherwise return "V op B" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = LHS; + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) { + // It does! Return "B op V" if it simplifies or is already available. + // If V equals C then "B op V" is just the RHS. + if (V == C) return RHS; + // Otherwise return "B op V" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + return 0; +} + +/// ThreadBinOpOverSelect - In the case of a binary operation with a select +/// instruction as an operand, try to simplify the binop by seeing whether +/// evaluating it on both branches of the select results in the same value. +/// Returns the common value if so, otherwise returns null. +static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, + const TargetData *TD, + const DominatorTree *DT, + unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + SelectInst *SI; + if (isa<SelectInst>(LHS)) { + SI = cast<SelectInst>(LHS); + } else { + assert(isa<SelectInst>(RHS) && "No select instruction operand!"); + SI = cast<SelectInst>(RHS); + } + + // Evaluate the BinOp on the true and false branches of the select. + Value *TV; + Value *FV; + if (SI == LHS) { + TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse); + FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse); + } else { + TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse); + FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse); + } + + // If they simplified to the same value, then return the common value. + // If they both failed to simplify then return null. + if (TV == FV) + return TV; + + // If one branch simplified to undef, return the other one. + if (TV && isa<UndefValue>(TV)) + return FV; + if (FV && isa<UndefValue>(FV)) + return TV; + + // If applying the operation did not change the true and false select values, + // then the result of the binop is the select itself. + if (TV == SI->getTrueValue() && FV == SI->getFalseValue()) + return SI; + + // If one branch simplified and the other did not, and the simplified + // value is equal to the unsimplified one, return the simplified value. + // For example, select (cond, X, X & Z) & Z -> X & Z. + if ((FV && !TV) || (TV && !FV)) { + // Check that the simplified value has the form "X op Y" where "op" is the + // same as the original operation. + Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV); + if (Simplified && Simplified->getOpcode() == Opcode) { + // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS". + // We already know that "op" is the same as for the simplified value. See + // if the operands match too. If so, return the simplified value. + Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue(); + Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS; + Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch; + if (Simplified->getOperand(0) == UnsimplifiedLHS && + Simplified->getOperand(1) == UnsimplifiedRHS) + return Simplified; + if (Simplified->isCommutative() && + Simplified->getOperand(1) == UnsimplifiedLHS && + Simplified->getOperand(0) == UnsimplifiedRHS) + return Simplified; + } + } + + return 0; +} + +/// ThreadCmpOverSelect - In the case of a comparison with a select instruction, +/// try to simplify the comparison by seeing whether both branches of the select +/// result in the same value. Returns the common value if so, otherwise returns +/// null. +static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, const TargetData *TD, + const DominatorTree *DT, + unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + // Make sure the select is on the LHS. + if (!isa<SelectInst>(LHS)) { + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!"); + SelectInst *SI = cast<SelectInst>(LHS); + + // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. + // Does "cmp TV, RHS" simplify? + if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT, + MaxRecurse)) { + // It does! Does "cmp FV, RHS" simplify? + if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT, + MaxRecurse)) { + // It does! If they simplified to the same value, then use it as the + // result of the original comparison. + if (TCmp == FCmp) + return TCmp; + Value *Cond = SI->getCondition(); + // If the false value simplified to false, then the result of the compare + // is equal to "Cond && TCmp". This also catches the case when the false + // value simplified to false and the true value to true, returning "Cond". + if (match(FCmp, m_Zero())) + if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) + return V; + // If the true value simplified to true, then the result of the compare + // is equal to "Cond || FCmp". + if (match(TCmp, m_One())) + if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) + return V; + // Finally, if the false value simplified to true and the true value to + // false, then the result of the compare is equal to "!Cond". + if (match(FCmp, m_One()) && match(TCmp, m_Zero())) + if (Value *V = + SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), + TD, DT, MaxRecurse)) + return V; + } + } + + return 0; +} + +/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that +/// is a PHI instruction, try to simplify the binop by seeing whether evaluating +/// it on the incoming phi values yields the same result for every value. If so +/// returns the common value, otherwise returns null. +static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + PHINode *PI; + if (isa<PHINode>(LHS)) { + PI = cast<PHINode>(LHS); + // Bail out if RHS and the phi may be mutually interdependent due to a loop. + if (!ValueDominatesPHI(RHS, PI, DT)) + return 0; + } else { + assert(isa<PHINode>(RHS) && "No PHI instruction operand!"); + PI = cast<PHINode>(RHS); + // Bail out if LHS and the phi may be mutually interdependent due to a loop. + if (!ValueDominatesPHI(LHS, PI, DT)) + return 0; + } + + // Evaluate the BinOp on the incoming phi values. + Value *CommonValue = 0; + for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PI->getIncomingValue(i); + // If the incoming value is the phi node itself, it can safely be skipped. + if (Incoming == PI) continue; + Value *V = PI == LHS ? + SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) : + SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse); + // If the operation failed to simplify, or simplified to a different value + // to previously, then give up. + if (!V || (CommonValue && V != CommonValue)) + return 0; + CommonValue = V; + } + + return CommonValue; +} + +/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try +/// try to simplify the comparison by seeing whether comparing with all of the +/// incoming phi values yields the same result every time. If so returns the +/// common result, otherwise returns null. +static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + // Make sure the phi is on the LHS. + if (!isa<PHINode>(LHS)) { + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!"); + PHINode *PI = cast<PHINode>(LHS); + + // Bail out if RHS and the phi may be mutually interdependent due to a loop. + if (!ValueDominatesPHI(RHS, PI, DT)) + return 0; + + // Evaluate the BinOp on the incoming phi values. + Value *CommonValue = 0; + for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PI->getIncomingValue(i); + // If the incoming value is the phi node itself, it can safely be skipped. + if (Incoming == PI) continue; + Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse); + // If the operation failed to simplify, or simplified to a different value + // to previously, then give up. + if (!V || (CommonValue && V != CommonValue)) + return 0; + CommonValue = V; + } + + return CommonValue; +} + /// SimplifyAddInst - Given operands for an Add, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD) { +static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops, 2, TD); } - + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X + undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // X + 0 -> X + if (match(Op1, m_Zero())) + return Op0; + + // X + (Y - X) -> Y + // (Y - X) + X -> Y + // Eg: X + -X -> 0 + Value *Y = 0; + if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) || + match(Op0, m_Sub(m_Value(Y), m_Specific(Op1)))) + return Y; + + // X + ~X -> -1 since ~X = -X-1 + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) + return Constant::getAllOnesValue(Op0->getType()); + + /// i1 add -> xor. + if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1)) + return V; + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // Mul distributes over Add. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul, + TD, DT, MaxRecurse)) + return V; + + // Threading Add over selects and phi nodes is pointless, so don't bother. + // Threading over the select in "A + select(cond, B, C)" means evaluating + // "A+B" and "A+C" and seeing if they are equal; but they are equal if and + // only if B and C are equal. If B and C are equal then (since we assume + // that operands have already been simplified) "select(cond, B, C)" should + // have been simplified to the common value of B and C already. Analysing + // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly + // for threading over phi nodes. + + return 0; +} + +Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); +} + +/// SimplifySubInst - Given operands for a Sub, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(), + Ops, 2, TD); + } + + // X - undef -> undef + // undef - X -> undef + if (match(Op0, m_Undef()) || match(Op1, m_Undef())) + return UndefValue::get(Op0->getType()); + + // X - 0 -> X + if (match(Op1, m_Zero())) + return Op0; + + // X - X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // (X*2) - X -> X + // (X<<1) - X -> X + Value *X = 0; + if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) || + match(Op0, m_Shl(m_Specific(Op1), m_One()))) + return Op1; + + // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. + // For example, (X + Y) - Y -> X; (Y + X) - Y -> X + Value *Y = 0, *Z = Op1; + if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z + // See if "V === Y - Z" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1)) + // It does! Now see if "X + V" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT, + MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + // See if "V === X - Z" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1)) + // It does! Now see if "Y + V" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT, + MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + } + + // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies. + // For example, X - (X + 1) -> -1 + X = Op0; + if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z) + // See if "V === X - Y" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1)) + // It does! Now see if "V - Z" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT, + MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + // See if "V === X - Z" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1)) + // It does! Now see if "V - Y" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT, + MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + } + + // Z - (X - Y) -> (Z - X) + Y if everything simplifies. + // For example, X - (X - Y) -> Y. + Z = Op0; + if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y) + // See if "V === Z - X" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1)) + // It does! Now see if "V + Y" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT, + MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + + // Mul distributes over Sub. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul, + TD, DT, MaxRecurse)) + return V; + + // i1 sub -> xor. + if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1)) + return V; + + // Threading Sub over selects and phi nodes is pointless, so don't bother. + // Threading over the select in "A - select(cond, B, C)" means evaluating + // "A-B" and "A-C" and seeing if they are equal; but they are equal if and + // only if B and C are equal. If B and C are equal then (since we assume + // that operands have already been simplified) "select(cond, B, C)" should + // have been simplified to the common value of B and C already. Analysing + // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly + // for threading over phi nodes. + + return 0; +} + +Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); +} + +/// SimplifyMulInst - Given operands for a Mul, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(), + Ops, 2, TD); + } + // Canonicalize the constant to the RHS. std::swap(Op0, Op1); } - - if (Constant *Op1C = dyn_cast<Constant>(Op1)) { - // X + undef -> undef - if (isa<UndefValue>(Op1C)) - return Op1C; - - // X + 0 --> X - if (Op1C->isNullValue()) - return Op0; - } - - // FIXME: Could pull several more out of instcombine. + + // X * undef -> 0 + if (match(Op1, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // X * 0 -> 0 + if (match(Op1, m_Zero())) + return Op1; + + // X * 1 -> X + if (match(Op1, m_One())) + return Op0; + + // (X / Y) * Y -> X if the division is exact. + Value *X = 0, *Y = 0; + if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y + (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y) + BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1); + if (Div->isExact()) + return X; + } + + // i1 mul -> and. + if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1)) + return V; + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // Mul distributes over Add. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add, + TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast<Constant>(Op0)) { + if (Constant *C1 = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + } + } + + bool isSigned = Opcode == Instruction::SDiv; + + // X / undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // undef / X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // 0 / X -> 0, we don't need to preserve faults! + if (match(Op0, m_Zero())) + return Op0; + + // X / 1 -> X + if (match(Op1, m_One())) + return Op0; + + if (Op0->getType()->isIntegerTy(1)) + // It can't be division by zero, hence it must be division by one. + return Op0; + + // X / X -> 1 + if (Op0 == Op1) + return ConstantInt::get(Op0->getType(), 1); + + // (X * Y) / Y -> X if the multiplication does not overflow. + Value *X = 0, *Y = 0; + if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { + if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 + BinaryOperator *Mul = cast<BinaryOperator>(Op0); + // If the Mul knows it does not overflow, then we are good to go. + if ((isSigned && Mul->hasNoSignedWrap()) || + (!isSigned && Mul->hasNoUnsignedWrap())) + return X; + // If X has the form X = A / Y then X * Y cannot overflow. + if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X)) + if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y) + return X; + } + + // (X rem Y) / Y -> 0 + if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) || + (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1))))) + return Constant::getNullValue(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifySDivInst - Given operands for an SDiv, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyUDivInst - Given operands for a UDiv, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit); +} + +static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *, + const DominatorTree *, unsigned) { + // undef / X -> undef (the undef could be a snan). + if (match(Op0, m_Undef())) + return Op0; + + // X / undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + return 0; +} + +Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast<Constant>(Op0)) { + if (Constant *C1 = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + } + } + + // 0 shift by X -> 0 + if (match(Op0, m_Zero())) + return Op0; + + // X shift by 0 -> X + if (match(Op1, m_Zero())) + return Op0; + + // X shift by undef -> undef because it may shift by the bitwidth. + if (match(Op1, m_Undef())) + return Op1; + + // Shifting by the bitwidth or more is undefined. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) + if (CI->getValue().getLimitedValue() >= + Op0->getType()->getScalarSizeInBits()) + return UndefValue::get(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifyShlInst - Given operands for an Shl, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // undef << X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // (X >> A) << A -> X + Value *X; + if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) && + cast<PossiblyExactOperator>(Op0)->isExact()) + return X; + return 0; +} + +Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); +} + +/// SimplifyLShrInst - Given operands for an LShr, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // undef >>l X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // (X << A) >> A -> X + Value *X; + if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) && + cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap()) + return X; + return 0; } +Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit); +} + +/// SimplifyAShrInst - Given operands for an AShr, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // all ones >>a X -> all ones + if (match(Op0, m_AllOnes())) + return Op0; + + // undef >>a X -> all ones + if (match(Op0, m_Undef())) + return Constant::getAllOnesValue(Op0->getType()); + + // (X << A) >> A -> X + Value *X; + if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) && + cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) + return X; + + return 0; +} + +Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit); +} + /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD) { +static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), Ops, 2, TD); } - + // Canonicalize the constant to the RHS. std::swap(Op0, Op1); } - + // X & undef -> 0 - if (isa<UndefValue>(Op1)) + if (match(Op1, m_Undef())) return Constant::getNullValue(Op0->getType()); - + // X & X = X if (Op0 == Op1) return Op0; - - // X & <0,0> = <0,0> - if (isa<ConstantAggregateZero>(Op1)) + + // X & 0 = 0 + if (match(Op1, m_Zero())) return Op1; - - // X & <-1,-1> = X - if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) - if (CP->isAllOnesValue()) - return Op0; - - if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) { - // X & 0 = 0 - if (Op1CI->isZero()) - return Op1CI; - // X & -1 = X - if (Op1CI->isAllOnesValue()) - return Op0; - } - + + // X & -1 = X + if (match(Op1, m_AllOnes())) + return Op0; + // A & ~A = ~A & A = 0 - Value *A, *B; - if ((match(Op0, m_Not(m_Value(A))) && A == Op1) || - (match(Op1, m_Not(m_Value(A))) && A == Op0)) + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) return Constant::getNullValue(Op0->getType()); - + // (A | ?) & A = A + Value *A = 0, *B = 0; if (match(Op0, m_Or(m_Value(A), m_Value(B))) && (A == Op1 || B == Op1)) return Op1; - + // A & (A | ?) = A if (match(Op1, m_Or(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) return Op0; - + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // And distributes over Or. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or, + TD, DT, MaxRecurse)) + return V; + + // And distributes over Xor. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor, + TD, DT, MaxRecurse)) + return V; + + // Or distributes over And. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or, + TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + return 0; } +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit); +} + /// SimplifyOrInst - Given operands for an Or, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD) { +static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), Ops, 2, TD); } - + // Canonicalize the constant to the RHS. std::swap(Op0, Op1); } - + // X | undef -> -1 - if (isa<UndefValue>(Op1)) + if (match(Op1, m_Undef())) return Constant::getAllOnesValue(Op0->getType()); - + // X | X = X if (Op0 == Op1) return Op0; - // X | <0,0> = X - if (isa<ConstantAggregateZero>(Op1)) + // X | 0 = X + if (match(Op1, m_Zero())) return Op0; - - // X | <-1,-1> = <-1,-1> - if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) - if (CP->isAllOnesValue()) - return Op1; - - if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) { - // X | 0 = X - if (Op1CI->isZero()) - return Op0; - // X | -1 = -1 - if (Op1CI->isAllOnesValue()) - return Op1CI; - } - + + // X | -1 = -1 + if (match(Op1, m_AllOnes())) + return Op1; + // A | ~A = ~A | A = -1 - Value *A, *B; - if ((match(Op0, m_Not(m_Value(A))) && A == Op1) || - (match(Op1, m_Not(m_Value(A))) && A == Op0)) + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Op0->getType()); - + // (A & ?) | A = A + Value *A = 0, *B = 0; if (match(Op0, m_And(m_Value(A), m_Value(B))) && (A == Op1 || B == Op1)) return Op1; - + // A | (A & ?) = A if (match(Op1, m_And(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) return Op0; - + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // Or distributes over And. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, + TD, DT, MaxRecurse)) + return V; + + // And distributes over Or. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And, + TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + return 0; } +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyXorInst - Given operands for a Xor, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // A ^ undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // A ^ 0 = A + if (match(Op1, m_Zero())) + return Op0; + + // A ^ A = 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // A ^ ~A = ~A ^ A = -1 + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) + return Constant::getAllOnesValue(Op0->getType()); + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // And distributes over Xor. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And, + TD, DT, MaxRecurse)) + return V; + + // Threading Xor over selects and phi nodes is pointless, so don't bother. + // Threading over the select in "A ^ select(cond, B, C)" means evaluating + // "A^B" and "A^C" and seeing if they are equal; but they are equal if and + // only if B and C are equal. If B and C are equal then (since we assume + // that operands have already been simplified) "select(cond, B, C)" should + // have been simplified to the common value of B and C already. Analysing + // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly + // for threading over phi nodes. + + return 0; +} + +Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit); +} static const Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } - /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD) { +static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); - + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); @@ -189,70 +1277,400 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, std::swap(LHS, RHS); Pred = CmpInst::getSwappedPredicate(Pred); } - - // ITy - This is the return type of the compare we're considering. - const Type *ITy = GetCompareTy(LHS); - + + const Type *ITy = GetCompareTy(LHS); // The return type. + const Type *OpTy = LHS->getType(); // The operand type. + // icmp X, X -> true/false // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false // because X could be 0. if (LHS == RHS || isa<UndefValue>(RHS)) return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); - - // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value - // addresses never equal each other! We already know that Op0 != Op1. - if ((isa<GlobalValue>(LHS) || isa<AllocaInst>(LHS) || - isa<ConstantPointerNull>(LHS)) && - (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) || - isa<ConstantPointerNull>(RHS))) - return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); - - // See if we are doing a comparison with a constant. - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { - // If we have an icmp le or icmp ge instruction, turn it into the - // appropriate icmp lt or icmp gt instruction. This allows us to rely on - // them being folded in the code below. + + // Special case logic when the operands have i1 type. + if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() && + cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) { switch (Pred) { default: break; + case ICmpInst::ICMP_EQ: + // X == 1 -> X + if (match(RHS, m_One())) + return LHS; + break; + case ICmpInst::ICMP_NE: + // X != 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_UGT: + // X >u 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_UGE: + // X >=u 1 -> X + if (match(RHS, m_One())) + return LHS; + break; + case ICmpInst::ICMP_SLT: + // X <s 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_SLE: + // X <=s -1 -> X + if (match(RHS, m_One())) + return LHS; + break; + } + } + + // icmp <alloca*>, <global/alloca*/null> - Different stack variables have + // different addresses, and what's more the address of a stack variable is + // never null or equal to the address of a global. Note that generalizing + // to the case where LHS is a global variable address or null is pointless, + // since if both LHS and RHS are constants then we already constant folded + // the compare, and if only one of them is then we moved it to RHS already. + if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) || + isa<ConstantPointerNull>(RHS))) + // We already know that LHS != LHS. + return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); + + // If we are comparing with zero then try hard since this is a common case. + if (match(RHS, m_Zero())) { + bool LHSKnownNonNegative, LHSKnownNegative; + switch (Pred) { + default: + assert(false && "Unknown ICmp predicate!"); + case ICmpInst::ICMP_ULT: + return ConstantInt::getFalse(LHS->getContext()); + case ICmpInst::ICMP_UGE: + return ConstantInt::getTrue(LHS->getContext()); + case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (CI->isMaxValue(false)) // A <=u MAX -> TRUE - return ConstantInt::getTrue(CI->getContext()); + if (isKnownNonZero(LHS, TD)) + return ConstantInt::getFalse(LHS->getContext()); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGT: + if (isKnownNonZero(LHS, TD)) + return ConstantInt::getTrue(LHS->getContext()); + break; + case ICmpInst::ICMP_SLT: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + if (LHSKnownNegative) + return ConstantInt::getTrue(LHS->getContext()); + if (LHSKnownNonNegative) + return ConstantInt::getFalse(LHS->getContext()); break; case ICmpInst::ICMP_SLE: - if (CI->isMaxValue(true)) // A <=s MAX -> TRUE - return ConstantInt::getTrue(CI->getContext()); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + if (LHSKnownNegative) + return ConstantInt::getTrue(LHS->getContext()); + if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) + return ConstantInt::getFalse(LHS->getContext()); + break; + case ICmpInst::ICMP_SGE: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + if (LHSKnownNegative) + return ConstantInt::getFalse(LHS->getContext()); + if (LHSKnownNonNegative) + return ConstantInt::getTrue(LHS->getContext()); + break; + case ICmpInst::ICMP_SGT: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + if (LHSKnownNegative) + return ConstantInt::getFalse(LHS->getContext()); + if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) + return ConstantInt::getTrue(LHS->getContext()); + break; + } + } + + // See if we are doing a comparison with a constant integer. + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + switch (Pred) { + default: break; + case ICmpInst::ICMP_UGT: + if (CI->isMaxValue(false)) // A >u MAX -> FALSE + return ConstantInt::getFalse(CI->getContext()); break; case ICmpInst::ICMP_UGE: if (CI->isMinValue(false)) // A >=u MIN -> TRUE return ConstantInt::getTrue(CI->getContext()); break; + case ICmpInst::ICMP_ULT: + if (CI->isMinValue(false)) // A <u MIN -> FALSE + return ConstantInt::getFalse(CI->getContext()); + break; + case ICmpInst::ICMP_ULE: + if (CI->isMaxValue(false)) // A <=u MAX -> TRUE + return ConstantInt::getTrue(CI->getContext()); + break; + case ICmpInst::ICMP_SGT: + if (CI->isMaxValue(true)) // A >s MAX -> FALSE + return ConstantInt::getFalse(CI->getContext()); + break; case ICmpInst::ICMP_SGE: if (CI->isMinValue(true)) // A >=s MIN -> TRUE return ConstantInt::getTrue(CI->getContext()); break; + case ICmpInst::ICMP_SLT: + if (CI->isMinValue(true)) // A <s MIN -> FALSE + return ConstantInt::getFalse(CI->getContext()); + break; + case ICmpInst::ICMP_SLE: + if (CI->isMaxValue(true)) // A <=s MAX -> TRUE + return ConstantInt::getTrue(CI->getContext()); + break; + } + } + + // Compare of cast, for example (zext X) != 0 -> X != 0 + if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) { + Instruction *LI = cast<CastInst>(LHS); + Value *SrcOp = LI->getOperand(0); + const Type *SrcTy = SrcOp->getType(); + const Type *DstTy = LI->getType(); + + // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input + // if the integer type is the same size as the pointer type. + if (MaxRecurse && TD && isa<PtrToIntInst>(LI) && + TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + // Transfer the cast to the constant. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, + ConstantExpr::getIntToPtr(RHSC, SrcTy), + TD, DT, MaxRecurse-1)) + return V; + } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) { + if (RI->getOperand(0)->getType() == SrcTy) + // Compare without the cast. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), + TD, DT, MaxRecurse-1)) + return V; + } + } + + if (isa<ZExtInst>(LHS)) { + // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the + // same type. + if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) { + if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) + // Compare X and Y. Note that signed predicates become unsigned. + if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), + SrcOp, RI->getOperand(0), TD, DT, + MaxRecurse-1)) + return V; + } + // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended + // too. If not, then try to deduce the result of the comparison. + else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DstTy. + Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); + Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy); + + // If the re-extended constant didn't change then this is effectively + // also a case of comparing two zero-extended values. + if (RExt == CI && MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), + SrcOp, Trunc, TD, DT, MaxRecurse-1)) + return V; + + // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit + // there. Use this to work out the result of the comparison. + if (RExt != CI) { + switch (Pred) { + default: + assert(false && "Unknown ICmp predicate!"); + // LHS <u RHS. + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return ConstantInt::getFalse(CI->getContext()); + + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return ConstantInt::getTrue(CI->getContext()); + + // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS + // is non-negative then LHS <s RHS. + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return CI->getValue().isNegative() ? + ConstantInt::getTrue(CI->getContext()) : + ConstantInt::getFalse(CI->getContext()); + + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return CI->getValue().isNegative() ? + ConstantInt::getFalse(CI->getContext()) : + ConstantInt::getTrue(CI->getContext()); + } + } + } + } + + if (isa<SExtInst>(LHS)) { + // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the + // same type. + if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) { + if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) + // Compare X and Y. Note that the predicate does not change. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), + TD, DT, MaxRecurse-1)) + return V; + } + // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended + // too. If not, then try to deduce the result of the comparison. + else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DstTy. + Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); + Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy); + + // If the re-extended constant didn't change then this is effectively + // also a case of comparing two sign-extended values. + if (RExt == CI && MaxRecurse) + if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT, + MaxRecurse-1)) + return V; + + // Otherwise the upper bits of LHS are all equal, while RHS has varying + // bits there. Use this to work out the result of the comparison. + if (RExt != CI) { + switch (Pred) { + default: + assert(false && "Unknown ICmp predicate!"); + case ICmpInst::ICMP_EQ: + return ConstantInt::getFalse(CI->getContext()); + case ICmpInst::ICMP_NE: + return ConstantInt::getTrue(CI->getContext()); + + // If RHS is non-negative then LHS <s RHS. If RHS is negative then + // LHS >s RHS. + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return CI->getValue().isNegative() ? + ConstantInt::getTrue(CI->getContext()) : + ConstantInt::getFalse(CI->getContext()); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return CI->getValue().isNegative() ? + ConstantInt::getFalse(CI->getContext()) : + ConstantInt::getTrue(CI->getContext()); + + // If LHS is non-negative then LHS <u RHS. If LHS is negative then + // LHS >u RHS. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // Comparison is true iff the LHS <s 0. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp, + Constant::getNullValue(SrcTy), + TD, DT, MaxRecurse-1)) + return V; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + // Comparison is true iff the LHS >=s 0. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, + Constant::getNullValue(SrcTy), + TD, DT, MaxRecurse-1)) + return V; + break; + } + } + } } } - - + + // Special logic for binary operators. + BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS); + if (MaxRecurse && (LBO || RBO)) { + // Analyze the case when either LHS or RHS is an add instruction. + Value *A = 0, *B = 0, *C = 0, *D = 0; + // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null). + bool NoLHSWrapProblem = false, NoRHSWrapProblem = false; + if (LBO && LBO->getOpcode() == Instruction::Add) { + A = LBO->getOperand(0); B = LBO->getOperand(1); + NoLHSWrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap()); + } + if (RBO && RBO->getOpcode() == Instruction::Add) { + C = RBO->getOperand(0); D = RBO->getOperand(1); + NoRHSWrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap()); + } + + // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. + if ((A == RHS || B == RHS) && NoLHSWrapProblem) + if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A, + Constant::getNullValue(RHS->getType()), + TD, DT, MaxRecurse-1)) + return V; + + // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. + if ((C == LHS || D == LHS) && NoRHSWrapProblem) + if (Value *V = SimplifyICmpInst(Pred, + Constant::getNullValue(LHS->getType()), + C == LHS ? D : C, TD, DT, MaxRecurse-1)) + return V; + + // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. + if (A && C && (A == C || A == D || B == C || B == D) && + NoLHSWrapProblem && NoRHSWrapProblem) { + // Determine Y and Z in the form icmp (X+Y), (X+Z). + Value *Y = (A == C || A == D) ? B : A; + Value *Z = (C == A || C == B) ? D : C; + if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1)) + return V; + } + } + + // If the comparison is with the result of a select instruction, check whether + // comparing with either branch of the select always yields the same value. + if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse)) + return V; + + // If the comparison is with the result of a phi instruction, check whether + // doing the compare with each incoming phi value yields a common result. + if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse)) + return V; + return 0; } +Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); +} + /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD) { +static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); - + // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); Pred = CmpInst::getSwappedPredicate(Pred); } - + // Fold trivial predicates. if (Pred == FCmpInst::FCMP_FALSE) return ConstantInt::get(GetCompareTy(LHS), 0); @@ -269,7 +1687,7 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (CmpInst::isFalseWhenEqual(Pred)) return ConstantInt::get(GetCompareTy(LHS), 0); } - + // Handle fcmp with constant RHS if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // If the constant is a nan, see if we can fold the comparison based on it. @@ -310,23 +1728,40 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } } - + + // If the comparison is with the result of a select instruction, check whether + // comparing with either branch of the select always yields the same value. + if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse)) + return V; + + // If the comparison is with the result of a phi instruction, check whether + // doing the compare with each incoming phi value yields a common result. + if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse)) + return V; + return 0; } +Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); +} + /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold /// the result. If not, this returns null. Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, - const TargetData *TD) { + const TargetData *TD, const DominatorTree *) { // select true, X, Y -> X // select false, X, Y -> Y if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal)) return CB->getZExtValue() ? TrueVal : FalseVal; - + // select C, X, X -> X if (TrueVal == FalseVal) return TrueVal; - + if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X return FalseVal; if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X @@ -336,98 +1771,249 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, return TrueVal; return FalseVal; } - - - + return 0; } - /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps, - const TargetData *TD) { + const TargetData *TD, const DominatorTree *) { + // The type of the GEP pointer operand. + const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()); + // getelementptr P -> P. if (NumOps == 1) return Ops[0]; - // TODO. - //if (isa<UndefValue>(Ops[0])) - // return UndefValue::get(GEP.getType()); + if (isa<UndefValue>(Ops[0])) { + // Compute the (pointer) type returned by the GEP instruction. + const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1], + NumOps-1); + const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace()); + return UndefValue::get(GEPTy); + } - // getelementptr P, 0 -> P. - if (NumOps == 2) + if (NumOps == 2) { + // getelementptr P, 0 -> P. if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1])) if (C->isZero()) return Ops[0]; - + // getelementptr P, N -> P if P points to a type of zero size. + if (TD) { + const Type *Ty = PtrTy->getElementType(); + if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0) + return Ops[0]; + } + } + // Check to see if this is constant foldable. for (unsigned i = 0; i != NumOps; ++i) if (!isa<Constant>(Ops[i])) return 0; - + return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), (Constant *const*)Ops+1, NumOps-1); } +/// SimplifyPHINode - See if we can fold the given phi. If not, returns null. +static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) { + // If all of the PHI's incoming values are the same then replace the PHI node + // with the common value. + Value *CommonValue = 0; + bool HasUndefInput = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PN->getIncomingValue(i); + // If the incoming value is the phi node itself, it can safely be skipped. + if (Incoming == PN) continue; + if (isa<UndefValue>(Incoming)) { + // Remember that we saw an undef value, but otherwise ignore them. + HasUndefInput = true; + continue; + } + if (CommonValue && Incoming != CommonValue) + return 0; // Not the same, bail out. + CommonValue = Incoming; + } + + // If CommonValue is null then all of the incoming values were either undef or + // equal to the phi node itself. + if (!CommonValue) + return UndefValue::get(PN->getType()); + + // If we have a PHI node like phi(X, undef, X), where X is defined by some + // instruction, we cannot return X as the result of the PHI node unless it + // dominates the PHI block. + if (HasUndefInput) + return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0; + + return CommonValue; +} + //=== Helper functions for higher up the class hierarchy. /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD) { +static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { switch (Opcode) { - case Instruction::And: return SimplifyAndInst(LHS, RHS, TD); - case Instruction::Or: return SimplifyOrInst(LHS, RHS, TD); + case Instruction::Add: + return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, + TD, DT, MaxRecurse); + case Instruction::Sub: + return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, + TD, DT, MaxRecurse); + case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse); + case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::Shl: + return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, + TD, DT, MaxRecurse); + case Instruction::LShr: + return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse); + case Instruction::AShr: + return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse); + case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse); + case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse); default: if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) { Constant *COps[] = {CLHS, CRHS}; return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD); } + + // If the operation is associative, try some generic simplifications. + if (Instruction::isAssociative(Opcode)) + if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT, + MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) + if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) + if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse)) + return V; + return 0; } } +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit); +} + /// SimplifyCmpInst - Given operands for a CmpInst, see if we can /// fold the result. -Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD) { +static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) - return SimplifyICmpInst(Predicate, LHS, RHS, TD); - return SimplifyFCmpInst(Predicate, LHS, RHS, TD); + return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse); + return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse); } +Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); +} /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. -Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) { +Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, + const DominatorTree *DT) { + Value *Result; + switch (I->getOpcode()) { default: - return ConstantFoldInstruction(I, TD); + Result = ConstantFoldInstruction(I, TD); + break; case Instruction::Add: - return SimplifyAddInst(I->getOperand(0), I->getOperand(1), - cast<BinaryOperator>(I)->hasNoSignedWrap(), - cast<BinaryOperator>(I)->hasNoUnsignedWrap(), TD); + Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->hasNoSignedWrap(), + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), + TD, DT); + break; + case Instruction::Sub: + Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->hasNoSignedWrap(), + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), + TD, DT); + break; + case Instruction::Mul: + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::SDiv: + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::UDiv: + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::FDiv: + Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::Shl: + Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->hasNoSignedWrap(), + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), + TD, DT); + break; + case Instruction::LShr: + Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->isExact(), + TD, DT); + break; + case Instruction::AShr: + Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->isExact(), + TD, DT); + break; case Instruction::And: - return SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD); + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; case Instruction::Or: - return SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD); + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::Xor: + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; case Instruction::ICmp: - return SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), TD); + Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), TD, DT); + break; case Instruction::FCmp: - return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), TD); + Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), TD, DT); + break; case Instruction::Select: - return SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), TD); + Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), + I->getOperand(2), TD, DT); + break; case Instruction::GetElementPtr: { SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); - return SimplifyGEPInst(&Ops[0], Ops.size(), TD); + Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT); + break; } + case Instruction::PHI: + Result = SimplifyPHINode(cast<PHINode>(I), DT); + break; } + + /// If called on unreachable code, the above logic may report that the + /// instruction simplified to itself. Make life easier for users by + /// detecting that case here, returning a safe value instead. + return Result == I ? UndefValue::get(I->getType()) : Result; } /// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then @@ -437,15 +2023,16 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) { /// simplifies and deletes scalar operations, it does not change the CFG. /// void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, - const TargetData *TD) { + const TargetData *TD, + const DominatorTree *DT) { assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!"); - + // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that // we can know if it gets deleted out from under us or replaced in a // recursive simplification. WeakVH FromHandle(From); WeakVH ToHandle(To); - + while (!From->use_empty()) { // Update the instruction to use the new value. Use &TheUse = From->use_begin().getUse(); @@ -460,27 +2047,26 @@ void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, // Sanity check to make sure 'User' doesn't dangle across // SimplifyInstruction. AssertingVH<> UserHandle(User); - - SimplifiedVal = SimplifyInstruction(User, TD); + + SimplifiedVal = SimplifyInstruction(User, TD, DT); if (SimplifiedVal == 0) continue; } - + // Recursively simplify this user to the new value. - ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD); + ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT); From = dyn_cast_or_null<Instruction>((Value*)FromHandle); To = ToHandle; - + assert(ToHandle && "To value deleted by recursive simplification?"); - + // If the recursive simplification ended up revisiting and deleting // 'From' then we're done. if (From == 0) return; } - + // If 'From' has value handles referring to it, do a real RAUW to update them. From->replaceAllUsesWith(To); - + From->eraseFromParent(); } - diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp index 1c9e14884316..2e259b147b8b 100644 --- a/lib/Analysis/IntervalPartition.cpp +++ b/lib/Analysis/IntervalPartition.cpp @@ -17,7 +17,7 @@ using namespace llvm; char IntervalPartition::ID = 0; INITIALIZE_PASS(IntervalPartition, "intervals", - "Interval Partition Construction", true, true); + "Interval Partition Construction", true, true) //===----------------------------------------------------------------------===// // IntervalPartition Implementation diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index e32dbc444713..9e7da6ce2de9 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -14,8 +14,10 @@ #define DEBUG_TYPE "lazy-value-info" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/CFG.h" @@ -26,11 +28,14 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include <map> +#include <set> +#include <stack> using namespace llvm; char LazyValueInfo::ID = 0; INITIALIZE_PASS(LazyValueInfo, "lazy-value-info", - "Lazy Value Information Analysis", false, true); + "Lazy Value Information Analysis", false, true) namespace llvm { FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); } @@ -50,18 +55,18 @@ namespace llvm { namespace { class LVILatticeVal { enum LatticeValueTy { - /// undefined - This LLVM Value has no known value yet. + /// undefined - This Value has no known value yet. undefined, - /// constant - This LLVM Value has a specific constant value. + /// constant - This Value has a specific constant value. constant, - /// notconstant - This LLVM value is known to not have the specified value. + /// notconstant - This Value is known to not have the specified value. notconstant, - /// constantrange + /// constantrange - The Value falls within this range. constantrange, - /// overdefined - This instruction is not known to be constant, and we know + /// overdefined - This value is not known to be constant, and we know that /// it has a value. overdefined }; @@ -77,17 +82,13 @@ public: static LVILatticeVal get(Constant *C) { LVILatticeVal Res; - if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) - Res.markConstantRange(ConstantRange(CI->getValue(), CI->getValue()+1)); - else if (!isa<UndefValue>(C)) + if (!isa<UndefValue>(C)) Res.markConstant(C); return Res; } static LVILatticeVal getNot(Constant *C) { LVILatticeVal Res; - if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) - Res.markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue())); - else + if (!isa<UndefValue>(C)) Res.markNotConstant(C); return Res; } @@ -129,32 +130,34 @@ public: /// markConstant - Return true if this is a change in status. bool markConstant(Constant *V) { - if (isConstant()) { - assert(getConstant() == V && "Marking constant with different value"); + assert(V && "Marking constant with NULL"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return markConstantRange(ConstantRange(CI->getValue())); + if (isa<UndefValue>(V)) return false; - } - + + assert((!isConstant() || getConstant() == V) && + "Marking constant with different value"); assert(isUndefined()); Tag = constant; - assert(V && "Marking constant with NULL"); Val = V; return true; } /// markNotConstant - Return true if this is a change in status. bool markNotConstant(Constant *V) { - if (isNotConstant()) { - assert(getNotConstant() == V && "Marking !constant with different value"); + assert(V && "Marking constant with NULL"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue())); + if (isa<UndefValue>(V)) return false; - } - - if (isConstant()) - assert(getConstant() != V && "Marking not constant with different value"); - else - assert(isUndefined()); + assert((!isConstant() || getConstant() != V) && + "Marking constant !constant with same value"); + assert((!isNotConstant() || getNotConstant() == V) && + "Marking !constant with different value"); + assert(isUndefined() || isConstant()); Tag = notconstant; - assert(V && "Marking constant with NULL"); Val = V; return true; } @@ -185,63 +188,81 @@ public: if (RHS.isUndefined() || isOverdefined()) return false; if (RHS.isOverdefined()) return markOverdefined(); - if (RHS.isNotConstant()) { - if (isNotConstant()) { - if (getNotConstant() != RHS.getNotConstant() || - isa<ConstantExpr>(getNotConstant()) || - isa<ConstantExpr>(RHS.getNotConstant())) - return markOverdefined(); - return false; - } else if (isConstant()) { - if (getConstant() == RHS.getNotConstant() || - isa<ConstantExpr>(RHS.getNotConstant()) || - isa<ConstantExpr>(getConstant())) + if (isUndefined()) { + Tag = RHS.Tag; + Val = RHS.Val; + Range = RHS.Range; + return true; + } + + if (isConstant()) { + if (RHS.isConstant()) { + if (Val == RHS.Val) + return false; + return markOverdefined(); + } + + if (RHS.isNotConstant()) { + if (Val == RHS.Val) return markOverdefined(); - return markNotConstant(RHS.getNotConstant()); - } else if (isConstantRange()) { + + // Unless we can prove that the two Constants are different, we must + // move to overdefined. + // FIXME: use TargetData for smarter constant folding. + if (ConstantInt *Res = dyn_cast<ConstantInt>( + ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, + getConstant(), + RHS.getNotConstant()))) + if (Res->isOne()) + return markNotConstant(RHS.getNotConstant()); + return markOverdefined(); } - - assert(isUndefined() && "Unexpected lattice"); - return markNotConstant(RHS.getNotConstant()); + + // RHS is a ConstantRange, LHS is a non-integer Constant. + + // FIXME: consider the case where RHS is a range [1, 0) and LHS is + // a function. The correct result is to pick up RHS. + + return markOverdefined(); } - - if (RHS.isConstantRange()) { - if (isConstantRange()) { - ConstantRange NewR = Range.unionWith(RHS.getConstantRange()); - if (NewR.isFullSet()) + + if (isNotConstant()) { + if (RHS.isConstant()) { + if (Val == RHS.Val) return markOverdefined(); - else - return markConstantRange(NewR); - } else if (!isUndefined()) { + + // Unless we can prove that the two Constants are different, we must + // move to overdefined. + // FIXME: use TargetData for smarter constant folding. + if (ConstantInt *Res = dyn_cast<ConstantInt>( + ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, + getNotConstant(), + RHS.getConstant()))) + if (Res->isOne()) + return false; + return markOverdefined(); } - - assert(isUndefined() && "Unexpected lattice"); - return markConstantRange(RHS.getConstantRange()); - } - - // RHS must be a constant, we must be undef, constant, or notconstant. - assert(!isConstantRange() && - "Constant and ConstantRange cannot be merged."); - - if (isUndefined()) - return markConstant(RHS.getConstant()); - - if (isConstant()) { - if (getConstant() != RHS.getConstant()) + + if (RHS.isNotConstant()) { + if (Val == RHS.Val) + return false; return markOverdefined(); - return false; + } + + return markOverdefined(); } - // If we are known "!=4" and RHS is "==5", stay at "!=4". - if (getNotConstant() == RHS.getConstant() || - isa<ConstantExpr>(getNotConstant()) || - isa<ConstantExpr>(RHS.getConstant())) + assert(isConstantRange() && "New LVILattice type?"); + if (!RHS.isConstantRange()) return markOverdefined(); - return false; + + ConstantRange NewR = Range.unionWith(RHS.getConstantRange()); + if (NewR.isFullSet()) + return markOverdefined(); + return markConstantRange(NewR); } - }; } // end anonymous namespace. @@ -267,49 +288,136 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { //===----------------------------------------------------------------------===// namespace { + /// LVIValueHandle - A callback value handle update the cache when + /// values are erased. + class LazyValueInfoCache; + struct LVIValueHandle : public CallbackVH { + LazyValueInfoCache *Parent; + + LVIValueHandle(Value *V, LazyValueInfoCache *P) + : CallbackVH(V), Parent(P) { } + + void deleted(); + void allUsesReplacedWith(Value *V) { + deleted(); + } + }; +} + +namespace llvm { + template<> + struct DenseMapInfo<LVIValueHandle> { + typedef DenseMapInfo<Value*> PointerInfo; + static inline LVIValueHandle getEmptyKey() { + return LVIValueHandle(PointerInfo::getEmptyKey(), + static_cast<LazyValueInfoCache*>(0)); + } + static inline LVIValueHandle getTombstoneKey() { + return LVIValueHandle(PointerInfo::getTombstoneKey(), + static_cast<LazyValueInfoCache*>(0)); + } + static unsigned getHashValue(const LVIValueHandle &Val) { + return PointerInfo::getHashValue(Val); + } + static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) { + return LHS == RHS; + } + }; + + template<> + struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > { + typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy; + typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo; + typedef DenseMapInfo<Value*> BPointerInfo; + static inline PairTy getEmptyKey() { + return std::make_pair(APointerInfo::getEmptyKey(), + BPointerInfo::getEmptyKey()); + } + static inline PairTy getTombstoneKey() { + return std::make_pair(APointerInfo::getTombstoneKey(), + BPointerInfo::getTombstoneKey()); + } + static unsigned getHashValue( const PairTy &Val) { + return APointerInfo::getHashValue(Val.first) ^ + BPointerInfo::getHashValue(Val.second); + } + static bool isEqual(const PairTy &LHS, const PairTy &RHS) { + return APointerInfo::isEqual(LHS.first, RHS.first) && + BPointerInfo::isEqual(LHS.second, RHS.second); + } + }; +} + +namespace { /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { - public: - /// BlockCacheEntryTy - This is a computed lattice value at the end of the - /// specified basic block for a Value* that depends on context. - typedef std::pair<AssertingVH<BasicBlock>, LVILatticeVal> BlockCacheEntryTy; - /// ValueCacheEntryTy - This is all of the cached block information for /// exactly one Value*. The entries are sorted by the BasicBlock* of the /// entries, allowing us to do a lookup with a binary search. typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy; - private: - /// LVIValueHandle - A callback value handle update the cache when - /// values are erased. - struct LVIValueHandle : public CallbackVH { + /// ValueCache - This is all of the cached information for all values, + /// mapped from Value* to key information. + DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache; + + /// OverDefinedCache - This tracks, on a per-block basis, the set of + /// values that are over-defined at the end of that block. This is required + /// for cache updating. + typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; + DenseSet<OverDefinedPairTy> OverDefinedCache; + + /// BlockValueStack - This stack holds the state of the value solver + /// during a query. It basically emulates the callstack of the naive + /// recursive value lookup process. + std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack; + + friend struct LVIValueHandle; + + /// OverDefinedCacheUpdater - A helper object that ensures that the + /// OverDefinedCache is updated whenever solveBlockValue returns. + struct OverDefinedCacheUpdater { LazyValueInfoCache *Parent; + Value *Val; + BasicBlock *BB; + LVILatticeVal &BBLV; - LVIValueHandle(Value *V, LazyValueInfoCache *P) - : CallbackVH(V), Parent(P) { } + OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV, + LazyValueInfoCache *P) + : Parent(P), Val(V), BB(B), BBLV(LV) { } - void deleted(); - void allUsesReplacedWith(Value* V) { - deleted(); - } - - LVIValueHandle &operator=(Value *V) { - return *this = LVIValueHandle(V, Parent); + bool markResult(bool changed) { + if (changed && BBLV.isOverdefined()) + Parent->OverDefinedCache.insert(std::make_pair(BB, Val)); + return changed; } }; + - /// ValueCache - This is all of the cached information for all values, - /// mapped from Value* to key information. - std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache; + + LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); + bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, + LVILatticeVal &Result); + bool hasBlockValue(Value *Val, BasicBlock *BB); + + // These methods process one work item and may add more. A false value + // returned means that the work item was not completely processed and must + // be revisited after going through the new items. + bool solveBlockValue(Value *Val, BasicBlock *BB); + bool solveBlockValueNonLocal(LVILatticeVal &BBLV, + Value *Val, BasicBlock *BB); + bool solveBlockValuePHINode(LVILatticeVal &BBLV, + PHINode *PN, BasicBlock *BB); + bool solveBlockValueConstantRange(LVILatticeVal &BBLV, + Instruction *BBI, BasicBlock *BB); + + void solve(); - /// OverDefinedCache - This tracks, on a per-block basis, the set of - /// values that are over-defined at the end of that block. This is required - /// for cache updating. - std::set<std::pair<AssertingVH<BasicBlock>, Value*> > OverDefinedCache; + ValueCacheEntryTy &lookup(Value *V) { + return ValueCache[LVIValueHandle(V, this)]; + } public: - /// getValueInBlock - This is the query interface to determine the lattice /// value for the specified Value* at the end of the specified block. LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB); @@ -335,199 +443,112 @@ namespace { }; } // end anonymous namespace -//===----------------------------------------------------------------------===// -// LVIQuery Impl -//===----------------------------------------------------------------------===// - -namespace { - /// LVIQuery - This is a transient object that exists while a query is - /// being performed. - /// - /// TODO: Reuse LVIQuery instead of recreating it for every query, this avoids - /// reallocation of the densemap on every query. - class LVIQuery { - typedef LazyValueInfoCache::BlockCacheEntryTy BlockCacheEntryTy; - typedef LazyValueInfoCache::ValueCacheEntryTy ValueCacheEntryTy; - - /// This is the current value being queried for. - Value *Val; - - /// This is a pointer to the owning cache, for recursive queries. - LazyValueInfoCache &Parent; - - /// This is all of the cached information about this value. - ValueCacheEntryTy &Cache; - - /// This tracks, for each block, what values are overdefined. - std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &OverDefinedCache; - - /// NewBlocks - This is a mapping of the new BasicBlocks which have been - /// added to cache but that are not in sorted order. - DenseSet<BasicBlock*> NewBlockInfo; - - public: - - LVIQuery(Value *V, LazyValueInfoCache &P, - ValueCacheEntryTy &VC, - std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &ODC) - : Val(V), Parent(P), Cache(VC), OverDefinedCache(ODC) { - } - - ~LVIQuery() { - // When the query is done, insert the newly discovered facts into the - // cache in sorted order. - if (NewBlockInfo.empty()) return; - - for (DenseSet<BasicBlock*>::iterator I = NewBlockInfo.begin(), - E = NewBlockInfo.end(); I != E; ++I) { - if (Cache[*I].isOverdefined()) - OverDefinedCache.insert(std::make_pair(*I, Val)); - } - } - - LVILatticeVal getBlockValue(BasicBlock *BB); - LVILatticeVal getEdgeValue(BasicBlock *FromBB, BasicBlock *ToBB); - - private: - LVILatticeVal getCachedEntryForBlock(BasicBlock *BB); - }; -} // end anonymous namespace - -void LazyValueInfoCache::LVIValueHandle::deleted() { - for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator +void LVIValueHandle::deleted() { + typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; + + SmallVector<OverDefinedPairTy, 4> ToErase; + for (DenseSet<OverDefinedPairTy>::iterator I = Parent->OverDefinedCache.begin(), E = Parent->OverDefinedCache.end(); - I != E; ) { - std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I; - ++I; - if (tmp->second == getValPtr()) - Parent->OverDefinedCache.erase(tmp); + I != E; ++I) { + if (I->second == getValPtr()) + ToErase.push_back(*I); } + for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(), + E = ToErase.end(); I != E; ++I) + Parent->OverDefinedCache.erase(*I); + // This erasure deallocates *this, so it MUST happen after we're done // using any and all members of *this. Parent->ValueCache.erase(*this); } void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { - for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator - I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ) { - std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I; - ++I; - if (tmp->first == BB) - OverDefinedCache.erase(tmp); + SmallVector<OverDefinedPairTy, 4> ToErase; + for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), + E = OverDefinedCache.end(); I != E; ++I) { + if (I->first == BB) + ToErase.push_back(*I); } + + for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(), + E = ToErase.end(); I != E; ++I) + OverDefinedCache.erase(*I); - for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator + for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) I->second.erase(BB); } -/// getCachedEntryForBlock - See if we already have a value for this block. If -/// so, return it, otherwise create a new entry in the Cache map to use. -LVILatticeVal LVIQuery::getCachedEntryForBlock(BasicBlock *BB) { - NewBlockInfo.insert(BB); - return Cache[BB]; +void LazyValueInfoCache::solve() { + while (!BlockValueStack.empty()) { + std::pair<BasicBlock*, Value*> &e = BlockValueStack.top(); + if (solveBlockValue(e.second, e.first)) + BlockValueStack.pop(); + } +} + +bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) { + // If already a constant, there is nothing to compute. + if (isa<Constant>(Val)) + return true; + + LVIValueHandle ValHandle(Val, this); + if (!ValueCache.count(ValHandle)) return false; + return ValueCache[ValHandle].count(BB); +} + +LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(Val)) + return LVILatticeVal::get(VC); + + return lookup(Val)[BB]; } -LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { - // See if we already have a value for this block. - LVILatticeVal BBLV = getCachedEntryForBlock(BB); +bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { + if (isa<Constant>(Val)) + return true; + + ValueCacheEntryTy &Cache = lookup(Val); + LVILatticeVal &BBLV = Cache[BB]; + // OverDefinedCacheUpdater is a helper object that will update + // the OverDefinedCache for us when this method exits. Make sure to + // call markResult on it as we exist, passing a bool to indicate if the + // cache needs updating, i.e. if we have solve a new value or not. + OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this); + // If we've already computed this block's value, return it. if (!BBLV.isUndefined()) { DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n'); - return BBLV; + + // Since we're reusing a cached value here, we don't need to update the + // OverDefinedCahce. The cache will have been properly updated + // whenever the cached value was inserted. + ODCacheUpdater.markResult(false); + return true; } // Otherwise, this is the first time we're seeing this block. Reset the // lattice value to overdefined, so that cycles will terminate and be // conservatively correct. BBLV.markOverdefined(); - Cache[BB] = BBLV; Instruction *BBI = dyn_cast<Instruction>(Val); if (BBI == 0 || BBI->getParent() != BB) { - LVILatticeVal Result; // Start Undefined. - - // If this is a pointer, and there's a load from that pointer in this BB, - // then we know that the pointer can't be NULL. - bool NotNull = false; - if (Val->getType()->isPointerTy()) { - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){ - LoadInst *L = dyn_cast<LoadInst>(BI); - if (L && L->getPointerAddressSpace() == 0 && - L->getPointerOperand()->getUnderlyingObject() == - Val->getUnderlyingObject()) { - NotNull = true; - break; - } - } - } - - unsigned NumPreds = 0; - // Loop over all of our predecessors, merging what we know from them into - // result. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - Result.mergeIn(getEdgeValue(*PI, BB)); - - // If we hit overdefined, exit early. The BlockVals entry is already set - // to overdefined. - if (Result.isOverdefined()) { - DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined because of pred.\n"); - // If we previously determined that this is a pointer that can't be null - // then return that rather than giving up entirely. - if (NotNull) { - const PointerType *PTy = cast<PointerType>(Val->getType()); - Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); - } - - return Result; - } - ++NumPreds; - } - - - // If this is the entry block, we must be asking about an argument. The - // value is overdefined. - if (NumPreds == 0 && BB == &BB->getParent()->front()) { - assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); - Result.markOverdefined(); - return Result; - } - - // Return the merged value, which is more precise than 'overdefined'. - assert(!Result.isOverdefined()); - return Cache[BB] = Result; + return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB)); } - - // If this value is defined by an instruction in this block, we have to - // process it here somehow or return overdefined. + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { - LVILatticeVal Result; // Start Undefined. - - // Loop over all of our predecessors, merging what we know from them into - // result. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - Value* PhiVal = PN->getIncomingValueForBlock(*PI); - Result.mergeIn(Parent.getValueOnEdge(PhiVal, *PI, BB)); - - // If we hit overdefined, exit early. The BlockVals entry is already set - // to overdefined. - if (Result.isOverdefined()) { - DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined because of pred.\n"); - return Result; - } - } - - // Return the merged value, which is more precise than 'overdefined'. - assert(!Result.isOverdefined()); - return Cache[BB] = Result; + return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB)); } - assert(Cache[BB].isOverdefined() && "Recursive query changed our cache?"); + if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) { + BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); + return ODCacheUpdater.markResult(true); + } // We can only analyze the definitions of certain classes of instructions // (integral binops and casts at the moment), so bail if this isn't one. @@ -536,10 +557,10 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { !BBI->getType()->isIntegerTy()) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because inst def found.\n"); - Result.markOverdefined(); - return Result; + BBLV.markOverdefined(); + return ODCacheUpdater.markResult(true); } - + // FIXME: We're currently limited to binops with a constant RHS. This should // be improved. BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI); @@ -547,34 +568,177 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because inst def found.\n"); - Result.markOverdefined(); - return Result; - } + BBLV.markOverdefined(); + return ODCacheUpdater.markResult(true); + } + + return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB)); +} + +static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { + if (LoadInst *L = dyn_cast<LoadInst>(I)) { + return L->getPointerAddressSpace() == 0 && + GetUnderlyingObject(L->getPointerOperand()) == + GetUnderlyingObject(Ptr); + } + if (StoreInst *S = dyn_cast<StoreInst>(I)) { + return S->getPointerAddressSpace() == 0 && + GetUnderlyingObject(S->getPointerOperand()) == + GetUnderlyingObject(Ptr); + } + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + if (MI->isVolatile()) return false; + if (MI->getAddressSpace() != 0) return false; + + // FIXME: check whether it has a valuerange that excludes zero? + ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength()); + if (!Len || Len->isZero()) return false; + + if (MI->getRawDest() == Ptr || MI->getDest() == Ptr) + return true; + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) + return MTI->getRawSource() == Ptr || MTI->getSource() == Ptr; + } + return false; +} + +bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, + Value *Val, BasicBlock *BB) { + LVILatticeVal Result; // Start Undefined. + + // If this is a pointer, and there's a load from that pointer in this BB, + // then we know that the pointer can't be NULL. + bool NotNull = false; + if (Val->getType()->isPointerTy()) { + if (isa<AllocaInst>(Val)) { + NotNull = true; + } else { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){ + if (InstructionDereferencesPointer(BI, Val)) { + NotNull = true; + break; + } + } + } + } + + // If this is the entry block, we must be asking about an argument. The + // value is overdefined. + if (BB == &BB->getParent()->getEntryBlock()) { + assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); + if (NotNull) { + const PointerType *PTy = cast<PointerType>(Val->getType()); + Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); + } else { + Result.markOverdefined(); + } + BBLV = Result; + return true; + } + + // Loop over all of our predecessors, merging what we know from them into + // result. + bool EdgesMissing = false; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + LVILatticeVal EdgeResult; + EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult); + if (EdgesMissing) + continue; + Result.mergeIn(EdgeResult); + + // If we hit overdefined, exit early. The BlockVals entry is already set + // to overdefined. + if (Result.isOverdefined()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred.\n"); + // If we previously determined that this is a pointer that can't be null + // then return that rather than giving up entirely. + if (NotNull) { + const PointerType *PTy = cast<PointerType>(Val->getType()); + Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); + } + + BBLV = Result; + return true; + } + } + if (EdgesMissing) + return false; + + // Return the merged value, which is more precise than 'overdefined'. + assert(!Result.isOverdefined()); + BBLV = Result; + return true; +} + +bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, + PHINode *PN, BasicBlock *BB) { + LVILatticeVal Result; // Start Undefined. + + // Loop over all of our predecessors, merging what we know from them into + // result. + bool EdgesMissing = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PhiBB = PN->getIncomingBlock(i); + Value *PhiVal = PN->getIncomingValue(i); + LVILatticeVal EdgeResult; + EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult); + if (EdgesMissing) + continue; + + Result.mergeIn(EdgeResult); + + // If we hit overdefined, exit early. The BlockVals entry is already set + // to overdefined. + if (Result.isOverdefined()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred.\n"); + + BBLV = Result; + return true; + } + } + if (EdgesMissing) + return false; + + // Return the merged value, which is more precise than 'overdefined'. + assert(!Result.isOverdefined() && "Possible PHI in entry block?"); + BBLV = Result; + return true; +} + +bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, + Instruction *BBI, + BasicBlock *BB) { // Figure out the range of the LHS. If that fails, bail. - LVILatticeVal LHSVal = Parent.getValueInBlock(BBI->getOperand(0), BB); + if (!hasBlockValue(BBI->getOperand(0), BB)) { + BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0))); + return false; + } + + LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); if (!LHSVal.isConstantRange()) { - Result.markOverdefined(); - return Result; + BBLV.markOverdefined(); + return true; } - ConstantInt *RHS = 0; ConstantRange LHSRange = LHSVal.getConstantRange(); ConstantRange RHSRange(1); const IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); if (isa<BinaryOperator>(BBI)) { - RHS = dyn_cast<ConstantInt>(BBI->getOperand(1)); - if (!RHS) { - Result.markOverdefined(); - return Result; + if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) { + RHSRange = ConstantRange(RHS->getValue()); + } else { + BBLV.markOverdefined(); + return true; } - - RHSRange = ConstantRange(RHS->getValue(), RHS->getValue()+1); } - + // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. + LVILatticeVal Result; switch (BBI->getOpcode()) { case Instruction::Add: Result.markConstantRange(LHSRange.add(RHSRange)); @@ -606,6 +770,12 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { case Instruction::BitCast: Result.markConstantRange(LHSRange); break; + case Instruction::And: + Result.markConstantRange(LHSRange.binaryAnd(RHSRange)); + break; + case Instruction::Or: + Result.markConstantRange(LHSRange.binaryOr(RHSRange)); + break; // Unhandled instructions are overdefined. default: @@ -615,12 +785,19 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { break; } - return Cache[BB] = Result; + BBLV = Result; + return true; } - /// getEdgeValue - This method attempts to infer more complex -LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) { +bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, + BasicBlock *BBTo, LVILatticeVal &Result) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(Val)) { + Result = LVILatticeVal::get(VC); + return true; + } + // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we // know that v != 0. if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { @@ -634,9 +811,11 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) { // If V is the condition of the branch itself, then we know exactly what // it is. - if (BI->getCondition() == Val) - return LVILatticeVal::get(ConstantInt::get( + if (BI->getCondition() == Val) { + Result = LVILatticeVal::get(ConstantInt::get( Type::getInt1Ty(Val->getContext()), isTrueDest)); + return true; + } // If the condition of the branch is an equality comparison, we may be // able to infer the value. @@ -647,30 +826,40 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) { // We know that V has the RHS constant if this is a true SETEQ or // false SETNE. if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) - return LVILatticeVal::get(cast<Constant>(ICI->getOperand(1))); - return LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1))); + Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1))); + else + Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1))); + return true; } - + if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) { // Calculate the range of values that would satisfy the comparison. ConstantRange CmpRange(CI->getValue(), CI->getValue()+1); ConstantRange TrueValues = ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); - + // If we're interested in the false dest, invert the condition. if (!isTrueDest) TrueValues = TrueValues.inverse(); // Figure out the possible values of the query BEFORE this branch. - LVILatticeVal InBlock = getBlockValue(BBFrom); - if (!InBlock.isConstantRange()) - return LVILatticeVal::getRange(TrueValues); - + if (!hasBlockValue(Val, BBFrom)) { + BlockValueStack.push(std::make_pair(BBFrom, Val)); + return false; + } + + LVILatticeVal InBlock = getBlockValue(Val, BBFrom); + if (!InBlock.isConstantRange()) { + Result = LVILatticeVal::getRange(TrueValues); + return true; + } + // Find all potential values that satisfy both the input and output // conditions. ConstantRange PossibleValues = TrueValues.intersectWith(InBlock.getConstantRange()); - - return LVILatticeVal::getRange(PossibleValues); + + Result = LVILatticeVal::getRange(PossibleValues); + return true; } } } @@ -682,9 +871,8 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) { if (SI->getCondition() == Val) { // We don't know anything in the default case. if (SI->getDefaultDest() == BBTo) { - LVILatticeVal Result; Result.markOverdefined(); - return Result; + return true; } // We only know something if there is exactly one value that goes from @@ -697,51 +885,48 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) { EdgeVal = SI->getCaseValue(i); } assert(EdgeVal && "Missing successor?"); - if (NumEdges == 1) - return LVILatticeVal::get(EdgeVal); + if (NumEdges == 1) { + Result = LVILatticeVal::get(EdgeVal); + return true; + } } } // Otherwise see if the value is known in the block. - return getBlockValue(BBFrom); + if (hasBlockValue(Val, BBFrom)) { + Result = getBlockValue(Val, BBFrom); + return true; + } + BlockValueStack.push(std::make_pair(BBFrom, Val)); + return false; } - -//===----------------------------------------------------------------------===// -// LazyValueInfoCache Impl -//===----------------------------------------------------------------------===// - LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) { - // If already a constant, there is nothing to compute. - if (Constant *VC = dyn_cast<Constant>(V)) - return LVILatticeVal::get(VC); - DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); - LVILatticeVal Result = LVIQuery(V, *this, - ValueCache[LVIValueHandle(V, this)], - OverDefinedCache).getBlockValue(BB); - + BlockValueStack.push(std::make_pair(BB, V)); + solve(); + LVILatticeVal Result = getBlockValue(V, BB); + DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } LVILatticeVal LazyValueInfoCache:: getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) { - // If already a constant, there is nothing to compute. - if (Constant *VC = dyn_cast<Constant>(V)) - return LVILatticeVal::get(VC); - DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); - LVILatticeVal Result = - LVIQuery(V, *this, ValueCache[LVIValueHandle(V, this)], - OverDefinedCache).getEdgeValue(FromBB, ToBB); - + LVILatticeVal Result; + if (!getEdgeValue(V, FromBB, ToBB, Result)) { + solve(); + bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result); + (void)WasFastQuery; + assert(WasFastQuery && "More work to do after problem solved?"); + } + DEBUG(dbgs() << " Result = " << Result << "\n"); - return Result; } @@ -761,8 +946,8 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, worklist.push_back(OldSucc); DenseSet<Value*> ClearSet; - for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator - I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) { + for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), + E = OverDefinedCache.end(); I != E; ++I) { if (I->first == OldSucc) ClearSet.insert(I->second); } @@ -779,17 +964,17 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, if (ToUpdate == NewSucc) continue; bool changed = false; - for (DenseSet<Value*>::iterator I = ClearSet.begin(),E = ClearSet.end(); + for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end(); I != E; ++I) { // If a value was marked overdefined in OldSucc, and is here too... - std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator OI = + DenseSet<OverDefinedPairTy>::iterator OI = OverDefinedCache.find(std::make_pair(ToUpdate, *I)); if (OI == OverDefinedCache.end()) continue; // Remove it from the caches. ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)]; ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); - + assert(CI != Entry.end() && "Couldn't find entry to update?"); Entry.erase(CI); OverDefinedCache.erase(OI); @@ -798,7 +983,7 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, // blocks successors too. changed = true; } - + if (!changed) continue; worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); @@ -838,7 +1023,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { if (Result.isConstant()) return Result.getConstant(); - else if (Result.isConstantRange()) { + if (Result.isConstantRange()) { ConstantRange CR = Result.getConstantRange(); if (const APInt *SingleVal = CR.getSingleElement()) return ConstantInt::get(V->getContext(), *SingleVal); @@ -854,7 +1039,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, if (Result.isConstant()) return Result.getConstant(); - else if (Result.isConstantRange()) { + if (Result.isConstantRange()) { ConstantRange CR = Result.getConstantRange(); if (const APInt *SingleVal = CR.getSingleElement()) return ConstantInt::get(V->getContext(), *SingleVal); @@ -874,7 +1059,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, Constant *Res = 0; if (Result.isConstant()) { Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD); - if (ConstantInt *ResCI = dyn_cast_or_null<ConstantInt>(Res)) + if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res)) return ResCI->isZero() ? False : True; return Unknown; } @@ -899,13 +1084,12 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, } // Handle more complex predicates. - ConstantRange RHS(CI->getValue(), CI->getValue()+1); - ConstantRange TrueValues = ConstantRange::makeICmpRegion(Pred, RHS); - if (CR.intersectWith(TrueValues).isEmptySet()) - return False; - else if (TrueValues.contains(CR)) + ConstantRange TrueValues = + ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue()); + if (TrueValues.contains(CR)) return True; - + if (TrueValues.inverse().contains(CR)) + return False; return Unknown; } @@ -932,7 +1116,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, } void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, - BasicBlock* NewSucc) { + BasicBlock *NewSucc) { if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc); } diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp index 7f51202ecb55..efb722bb97c4 100644 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -21,7 +21,7 @@ using namespace llvm; // Register this pass... char LibCallAliasAnalysis::ID = 0; INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa", - "LibCall Alias Analysis", false, true, false); + "LibCall Alias Analysis", false, true, false) FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) { return new LibCallAliasAnalysis(LCI); @@ -43,8 +43,8 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { /// vs the specified pointer/size. AliasAnalysis::ModRefResult LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, - ImmutableCallSite CS, const Value *P, - unsigned Size) { + ImmutableCallSite CS, + const Location &Loc) { // If we have a function, check to see what kind of mod/ref effects it // has. Start by including any info globally known about the function. AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior; @@ -64,9 +64,9 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, if (FI->DetailsType == LibCallFunctionInfo::DoesNot) { // Find out if the pointer refers to a known location. for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { - const LibCallLocationInfo &Loc = + const LibCallLocationInfo &LocInfo = LCI->getLocationInfo(Details[i].LocationID); - LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size); + LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); if (Res != LibCallLocationInfo::Yes) continue; // If we find a match against a location that we 'do not' interact with, @@ -85,9 +85,9 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, // Find out if the pointer refers to a known location. bool NoneMatch = true; for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { - const LibCallLocationInfo &Loc = + const LibCallLocationInfo &LocInfo = LCI->getLocationInfo(Details[i].LocationID); - LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size); + LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); if (Res == LibCallLocationInfo::No) continue; // If we don't know if this pointer points to the location, then we have to @@ -118,7 +118,7 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, // AliasAnalysis::ModRefResult LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS, - const Value *P, unsigned Size) { + const Location &Loc) { ModRefResult MRInfo = ModRef; // If this is a direct call to a function that LCI knows about, get the @@ -126,12 +126,12 @@ LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (LCI) { if (const Function *F = CS.getCalledFunction()) { if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) { - MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, P, Size)); + MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc)); if (MRInfo == NoModRef) return NoModRef; } } } // The AliasAnalysis base class has some smarts, lets use them. - return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, P, Size)); + return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc)); } diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index a9d972435f5f..fc7edc0525f9 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -70,7 +70,7 @@ namespace { void visitCallSite(CallSite CS); void visitMemoryReference(Instruction &I, Value *Ptr, - unsigned Size, unsigned Align, + uint64_t Size, unsigned Align, const Type *Ty, unsigned Flags); void visitCallInst(CallInst &I); @@ -108,7 +108,9 @@ namespace { raw_string_ostream MessagesStr; static char ID; // Pass identification, replacement for typeid - Lint() : FunctionPass(ID), MessagesStr(Messages) {} + Lint() : FunctionPass(ID), MessagesStr(Messages) { + initializeLintPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); @@ -129,12 +131,6 @@ namespace { } } - void WriteType(const Type *T) { - if (!T) return; - MessagesStr << ' '; - WriteTypeSymbolic(MessagesStr, T, Mod); - } - // CheckFailed - A check failed, so print out the condition and the message // that failed. This provides a nice place to put a breakpoint if you want // to see why something is not correct. @@ -147,27 +143,16 @@ namespace { WriteValue(V3); WriteValue(V4); } - - void CheckFailed(const Twine &Message, const Value *V1, - const Type *T2, const Value *V3 = 0) { - MessagesStr << Message.str() << "\n"; - WriteValue(V1); - WriteType(T2); - WriteValue(V3); - } - - void CheckFailed(const Twine &Message, const Type *T1, - const Type *T2 = 0, const Type *T3 = 0) { - MessagesStr << Message.str() << "\n"; - WriteType(T1); - WriteType(T2); - WriteType(T3); - } }; } char Lint::ID = 0; -INITIALIZE_PASS(Lint, "lint", "Statically lint-checks LLVM IR", false, true); +INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", + false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", + false, true) // Assert - We know that cond should be true, if not print an error message. #define Assert(C, M) \ @@ -208,7 +193,8 @@ void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); - visitMemoryReference(I, Callee, ~0u, 0, 0, MemRef::Callee); + visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, + 0, 0, MemRef::Callee); if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { Assert1(CS.getCallingConv() == F->getCallingConv(), @@ -240,15 +226,17 @@ void Lint::visitCallSite(CallSite CS) { "Undefined behavior: Call argument type mismatches " "callee parameter type", &I); - // Check that noalias arguments don't alias other arguments. The - // AliasAnalysis API isn't expressive enough for what we really want - // to do. Known partial overlap is not distinguished from the case - // where nothing is known. + // Check that noalias arguments don't alias other arguments. This is + // not fully precise because we don't know the sizes of the dereferenced + // memory regions. if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) - for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) { - Assert1(AI == BI || AA->alias(*AI, *BI) != AliasAnalysis::MustAlias, - "Unusual: noalias argument aliases another argument", &I); - } + for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) + if (AI != BI && (*BI)->getType()->isPointerTy()) { + AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI); + Assert1(Result != AliasAnalysis::MustAlias && + Result != AliasAnalysis::PartialAlias, + "Unusual: noalias argument aliases another argument", &I); + } // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { @@ -281,15 +269,17 @@ void Lint::visitCallSite(CallSite CS) { case Intrinsic::memcpy: { MemCpyInst *MCI = cast<MemCpyInst>(&I); // TODO: If the size is known, use it. - visitMemoryReference(I, MCI->getDest(), ~0u, MCI->getAlignment(), 0, + visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize, + MCI->getAlignment(), 0, MemRef::Write); - visitMemoryReference(I, MCI->getSource(), ~0u, MCI->getAlignment(), 0, + visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize, + MCI->getAlignment(), 0, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial // overlap is not distinguished from the case where nothing is known. - unsigned Size = 0; + uint64_t Size = 0; if (const ConstantInt *Len = dyn_cast<ConstantInt>(findValue(MCI->getLength(), /*OffsetOk=*/false))) @@ -303,16 +293,19 @@ void Lint::visitCallSite(CallSite CS) { case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); // TODO: If the size is known, use it. - visitMemoryReference(I, MMI->getDest(), ~0u, MMI->getAlignment(), 0, + visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize, + MMI->getAlignment(), 0, MemRef::Write); - visitMemoryReference(I, MMI->getSource(), ~0u, MMI->getAlignment(), 0, + visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize, + MMI->getAlignment(), 0, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); // TODO: If the size is known, use it. - visitMemoryReference(I, MSI->getDest(), ~0u, MSI->getAlignment(), 0, + visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize, + MSI->getAlignment(), 0, MemRef::Write); break; } @@ -322,24 +315,26 @@ void Lint::visitCallSite(CallSite CS) { "Undefined behavior: va_start called in a non-varargs function", &I); - visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, - MemRef::Read | MemRef::Write); + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: - visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Write); - visitMemoryReference(I, CS.getArgument(1), ~0u, 0, 0, MemRef::Read); + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Write); + visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read); break; case Intrinsic::vaend: - visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, - MemRef::Read | MemRef::Write); + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. - visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, - MemRef::Read | MemRef::Write); + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read | MemRef::Write); break; } } @@ -368,7 +363,7 @@ void Lint::visitReturnInst(ReturnInst &I) { // TODO: Check that the reference is in bounds. // TODO: Check readnone/readonly function attributes. void Lint::visitMemoryReference(Instruction &I, - Value *Ptr, unsigned Size, unsigned Align, + Value *Ptr, uint64_t Size, unsigned Align, const Type *Ty, unsigned Flags) { // If no memory is being referenced, it doesn't matter if the pointer // is valid. @@ -512,12 +507,13 @@ void Lint::visitAllocaInst(AllocaInst &I) { } void Lint::visitVAArgInst(VAArgInst &I) { - visitMemoryReference(I, I.getOperand(0), ~0u, 0, 0, + visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0, MemRef::Read | MemRef::Write); } void Lint::visitIndirectBrInst(IndirectBrInst &I) { - visitMemoryReference(I, I.getAddress(), ~0u, 0, 0, MemRef::Branchee); + visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0, + MemRef::Branchee); Assert1(I.getNumDestinations() != 0, "Undefined behavior: indirectbr with no destinations", &I); @@ -571,7 +567,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // TODO: Look through eliminable cast pairs. // TODO: Look through calls with unique return values. // TODO: Look through vector insert/extract/shuffle. - V = OffsetOk ? V->getUnderlyingObject() : V->stripPointerCasts(); + V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts(); if (LoadInst *L = dyn_cast<LoadInst>(V)) { BasicBlock::iterator BBI = L; BasicBlock *BB = L->getParent(); @@ -587,8 +583,9 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, BBI = BB->end(); } } else if (PHINode *PN = dyn_cast<PHINode>(V)) { - if (Value *W = PN->hasConstantValue(DT)) - return findValueImpl(W, OffsetOk, Visited); + if (Value *W = PN->hasConstantValue()) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast<CastInst>(V)) { if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : Type::getInt64Ty(V->getContext()))) @@ -620,9 +617,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Value *W = SimplifyInstruction(Inst, TD)) - if (W != Inst) - return findValueImpl(W, OffsetOk, Visited); + if (Value *W = SimplifyInstruction(Inst, TD, DT)) + return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (Value *W = ConstantFoldConstantExpression(CE, TD)) if (W != V) diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp index 0225f4fa2548..a0e603419f57 100644 --- a/lib/Analysis/LiveValues.cpp +++ b/lib/Analysis/LiveValues.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LiveValues.h" +#include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" using namespace llvm; @@ -22,10 +23,16 @@ namespace llvm { } char LiveValues::ID = 0; -INITIALIZE_PASS(LiveValues, "live-values", - "Value Liveness Analysis", false, true); - -LiveValues::LiveValues() : FunctionPass(ID) {} +INITIALIZE_PASS_BEGIN(LiveValues, "live-values", + "Value Liveness Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(LiveValues, "live-values", + "Value Liveness Analysis", false, true) + +LiveValues::LiveValues() : FunctionPass(ID) { + initializeLiveValuesPass(*PassRegistry::getPassRegistry()); +} void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 2ba1d86cdb40..2ea27fb62fcb 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -49,7 +49,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { /// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and /// bitcasts to get back to the underlying object being addressed, keeping /// track of the offset in bytes from the GEPs relative to the result. -/// This is closely related to Value::getUnderlyingObject but is located +/// This is closely related to GetUnderlyingObject but is located /// here to avoid making VMCore depend on TargetData. static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, uint64_t &ByteOffset, @@ -166,7 +166,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; // If we're using alias analysis to disambiguate get the size of *Ptr. - unsigned AccessSize = 0; + uint64_t AccessSize = 0; if (AA) { const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); AccessSize = AA->getTypeStoreSize(AccessTy); diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp index 82c02dcd1342..c1afe8fbd618 100644 --- a/lib/Analysis/LoopDependenceAnalysis.cpp +++ b/lib/Analysis/LoopDependenceAnalysis.cpp @@ -27,6 +27,8 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Instructions.h" #include "llvm/Operator.h" #include "llvm/Support/Allocator.h" @@ -46,8 +48,12 @@ LoopPass *llvm::createLoopDependenceAnalysisPass() { return new LoopDependenceAnalysis(); } -INITIALIZE_PASS(LoopDependenceAnalysis, "lda", - "Loop Dependence Analysis", false, true); +INITIALIZE_PASS_BEGIN(LoopDependenceAnalysis, "lda", + "Loop Dependence Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(LoopDependenceAnalysis, "lda", + "Loop Dependence Analysis", false, true) char LoopDependenceAnalysis::ID = 0; //===----------------------------------------------------------------------===// @@ -86,8 +92,8 @@ static Value *GetPointerOperand(Value *I) { static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA, const Value *A, const Value *B) { - const Value *aObj = A->getUnderlyingObject(); - const Value *bObj = B->getUnderlyingObject(); + const Value *aObj = GetUnderlyingObject(A); + const Value *bObj = GetUnderlyingObject(B); return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()), bObj, AA->getTypeStoreSize(bObj->getType())); } @@ -128,7 +134,7 @@ void LoopDependenceAnalysis::getLoops(const SCEV *S, DenseSet<const Loop*>* Loops) const { // Refactor this into an SCEVVisitor, if efficiency becomes a concern. for (const Loop *L = this->L; L != 0; L = L->getParentLoop()) - if (!S->isLoopInvariant(L)) + if (!SE->isLoopInvariant(S, L)) Loops->insert(L); } @@ -217,6 +223,7 @@ LoopDependenceAnalysis::analysePair(DependencePair *P) const { switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) { case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: // We can not analyse objects if we do not know about their aliasing. DEBUG(dbgs() << "---> [?] may alias\n"); return Unknown; diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 46219d1b6f55..05831402f409 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -38,7 +38,9 @@ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), cl::desc("Verify loop info (time consuming)")); char LoopInfo::ID = 0; -INITIALIZE_PASS(LoopInfo, "loops", "Natural Loop Information", true, true); +INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true) //===----------------------------------------------------------------------===// // Loop implementation @@ -48,15 +50,18 @@ INITIALIZE_PASS(LoopInfo, "loops", "Natural Loop Information", true, true); /// bool Loop::isLoopInvariant(Value *V) const { if (Instruction *I = dyn_cast<Instruction>(V)) - return isLoopInvariant(I); + return !contains(I); return true; // All non-instructions are loop invariant } -/// isLoopInvariant - Return true if the specified instruction is -/// loop-invariant. -/// -bool Loop::isLoopInvariant(Instruction *I) const { - return !contains(I); +/// hasLoopInvariantOperands - Return true if all the operands of the +/// specified instruction are loop invariant. +bool Loop::hasLoopInvariantOperands(Instruction *I) const { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!isLoopInvariant(I->getOperand(i))) + return false; + + return true; } /// makeLoopInvariant - If the given value is an instruciton inside of the @@ -105,6 +110,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt)) return false; + // Hoist. I->moveBefore(InsertPt); Changed = true; @@ -192,7 +198,7 @@ Value *Loop::getTripCount() const { /// getSmallConstantTripCount - Returns the trip count of this loop as a /// normal unsigned value, if possible. Returns 0 if the trip count is unknown -/// of not constant. Will also return 0 if the trip count is very large +/// or not constant. Will also return 0 if the trip count is very large /// (>= 2^32) unsigned Loop::getSmallConstantTripCount() const { Value* TripCount = this->getTripCount(); diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 15d4db8f5f98..8e1a7bfef699 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -30,7 +30,6 @@ private: public: static char ID; - PrintLoopPass() : LoopPass(ID), Out(dbgs()) {} PrintLoopPass(const std::string &B, raw_ostream &o) : LoopPass(ID), Banner(B), Out(o) {} diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp new file mode 100644 index 000000000000..64d215c37cc7 --- /dev/null +++ b/lib/Analysis/MemDepPrinter.cpp @@ -0,0 +1,167 @@ +//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/LLVMContext.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" +using namespace llvm; + +namespace { + struct MemDepPrinter : public FunctionPass { + const Function *F; + + typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag; + typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep; + typedef SmallSetVector<Dep, 4> DepSet; + typedef DenseMap<const Instruction *, DepSet> DepSetMap; + DepSetMap Deps; + + static char ID; // Pass identifcation, replacement for typeid + MemDepPrinter() : FunctionPass(ID) { + initializeMemDepPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + void print(raw_ostream &OS, const Module * = 0) const; + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive<AliasAnalysis>(); + AU.addRequiredTransitive<MemoryDependenceAnalysis>(); + AU.setPreservesAll(); + } + + virtual void releaseMemory() { + Deps.clear(); + F = 0; + } + }; +} + +char MemDepPrinter::ID = 0; +INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps", + "Print MemDeps of function", false, true) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps", + "Print MemDeps of function", false, true) + +FunctionPass *llvm::createMemDepPrinter() { + return new MemDepPrinter(); +} + +bool MemDepPrinter::runOnFunction(Function &F) { + this->F = &F; + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>(); + + // All this code uses non-const interfaces because MemDep is not + // const-friendly, though nothing is actually modified. + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + Instruction *Inst = &*I; + + if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory()) + continue; + + MemDepResult Res = MDA.getDependency(Inst); + if (!Res.isNonLocal()) { + assert(Res.isClobber() != Res.isDef() && + "Local dep should be def or clobber!"); + Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(), + Res.isClobber()), + static_cast<BasicBlock *>(0))); + } else if (CallSite CS = cast<Value>(Inst)) { + const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = + MDA.getNonLocalCallDependency(CS); + + DepSet &InstDeps = Deps[Inst]; + for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator + I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { + const MemDepResult &Res = I->getResult(); + assert(Res.isClobber() != Res.isDef() && + "Resolved non-local call dep should be def or clobber!"); + InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(), + Res.isClobber()), + I->getBB())); + } + } else { + SmallVector<NonLocalDepResult, 4> NLDI; + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + // FIXME: Volatile is not handled properly here. + AliasAnalysis::Location Loc = AA.getLocation(LI); + MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(), + LI->getParent(), NLDI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // FIXME: Volatile is not handled properly here. + AliasAnalysis::Location Loc = AA.getLocation(SI); + MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI); + } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) { + AliasAnalysis::Location Loc = AA.getLocation(VI); + MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI); + } else { + llvm_unreachable("Unknown memory instruction!"); + } + + DepSet &InstDeps = Deps[Inst]; + for (SmallVectorImpl<NonLocalDepResult>::const_iterator + I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { + const MemDepResult &Res = I->getResult(); + assert(Res.isClobber() != Res.isDef() && + "Resolved non-local pointer dep should be def or clobber!"); + InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(), + Res.isClobber()), + I->getBB())); + } + } + } + + return false; +} + +void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { + for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) { + const Instruction *Inst = &*I; + + DepSetMap::const_iterator DI = Deps.find(Inst); + if (DI == Deps.end()) + continue; + + const DepSet &InstDeps = DI->second; + + for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end(); + I != E; ++I) { + const Instruction *DepInst = I->first.getPointer(); + bool isClobber = I->first.getInt(); + const BasicBlock *DepBB = I->second; + + OS << " " << (isClobber ? "Clobber" : " Def"); + if (DepBB) { + OS << " in block "; + WriteAsOperand(OS, DepBB, /*PrintType=*/false, M); + } + OS << " from: "; + if (DepInst == Inst) + OS << "<unspecified>"; + else + DepInst->print(OS); + OS << "\n"; + } + + Inst->print(OS); + OS << "\n\n"; + } +} diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index d18d5ce0ea4c..35043bddfaf6 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -19,15 +19,18 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Function.h" +#include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/PredIteratorCache.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetData.h" using namespace llvm; STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); @@ -46,11 +49,15 @@ STATISTIC(NumCacheCompleteNonLocalPtr, char MemoryDependenceAnalysis::ID = 0; // Register this pass... -INITIALIZE_PASS(MemoryDependenceAnalysis, "memdep", - "Memory Dependence Analysis", false, true); +INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", + "Memory Dependence Analysis", false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", + "Memory Dependence Analysis", false, true) MemoryDependenceAnalysis::MemoryDependenceAnalysis() : FunctionPass(ID), PredCache(0) { + initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry()); } MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { } @@ -77,6 +84,7 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis<AliasAnalysis>(); + TD = getAnalysisIfAvailable<TargetData>(); if (PredCache == 0) PredCache.reset(new PredIteratorCache()); return false; @@ -92,11 +100,79 @@ static void RemoveFromReverseMap(DenseMap<Instruction*, InstIt = ReverseMap.find(Inst); assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); bool Found = InstIt->second.erase(Val); - assert(Found && "Invalid reverse map!"); Found=Found; + assert(Found && "Invalid reverse map!"); (void)Found; if (InstIt->second.empty()) ReverseMap.erase(InstIt); } +/// GetLocation - If the given instruction references a specific memory +/// location, fill in Loc with the details, otherwise set Loc.Ptr to null. +/// Return a ModRefInfo value describing the general behavior of the +/// instruction. +static +AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, + AliasAnalysis::Location &Loc, + AliasAnalysis *AA) { + if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + if (LI->isVolatile()) { + Loc = AliasAnalysis::Location(); + return AliasAnalysis::ModRef; + } + Loc = AA->getLocation(LI); + return AliasAnalysis::Ref; + } + + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->isVolatile()) { + Loc = AliasAnalysis::Location(); + return AliasAnalysis::ModRef; + } + Loc = AA->getLocation(SI); + return AliasAnalysis::Mod; + } + + if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { + Loc = AA->getLocation(V); + return AliasAnalysis::ModRef; + } + + if (const CallInst *CI = isFreeCall(Inst)) { + // calls to free() deallocate the entire structure + Loc = AliasAnalysis::Location(CI->getArgOperand(0)); + return AliasAnalysis::Mod; + } + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + Loc = AliasAnalysis::Location(II->getArgOperand(1), + cast<ConstantInt>(II->getArgOperand(0)) + ->getZExtValue(), + II->getMetadata(LLVMContext::MD_tbaa)); + // These intrinsics don't really modify the memory, but returning Mod + // will allow them to be handled conservatively. + return AliasAnalysis::Mod; + case Intrinsic::invariant_end: + Loc = AliasAnalysis::Location(II->getArgOperand(2), + cast<ConstantInt>(II->getArgOperand(1)) + ->getZExtValue(), + II->getMetadata(LLVMContext::MD_tbaa)); + // These intrinsics don't really modify the memory, but returning Mod + // will allow them to be handled conservatively. + return AliasAnalysis::Mod; + default: + break; + } + + // Otherwise, just do the coarse-grained thing that always works. + if (Inst->mayWriteToMemory()) + return AliasAnalysis::ModRef; + if (Inst->mayReadFromMemory()) + return AliasAnalysis::Ref; + return AliasAnalysis::NoModRef; +} /// getCallSiteDependencyFrom - Private helper for finding the local /// dependencies of a call site. @@ -108,19 +184,16 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, Instruction *Inst = --ScanIt; // If this inst is a memory op, get the pointer it accessed - Value *Pointer = 0; - uint64_t PointerSize = 0; - if (StoreInst *S = dyn_cast<StoreInst>(Inst)) { - Pointer = S->getPointerOperand(); - PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType()); - } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { - Pointer = V->getOperand(0); - PointerSize = AA->getTypeStoreSize(V->getType()); - } else if (const CallInst *CI = isFreeCall(Inst)) { - Pointer = CI->getArgOperand(0); - // calls to free() erase the entire structure - PointerSize = ~0ULL; - } else if (CallSite InstCS = cast<Value>(Inst)) { + AliasAnalysis::Location Loc; + AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); + if (Loc.Ptr) { + // A simple instruction. + if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef) + return MemDepResult::getClobber(Inst); + continue; + } + + if (CallSite InstCS = cast<Value>(Inst)) { // Debug intrinsics don't cause dependences. if (isa<DbgInfoIntrinsic>(Inst)) continue; // If these two calls do not interfere, look past it. @@ -128,23 +201,17 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, case AliasAnalysis::NoModRef: // If the two calls are the same, return InstCS as a Def, so that // CS can be found redundant and eliminated. - if (isReadOnlyCall && InstCS.onlyReadsMemory() && + if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) && CS.getInstruction()->isIdenticalToWhenDefined(Inst)) return MemDepResult::getDef(Inst); // Otherwise if the two calls don't interact (e.g. InstCS is readnone) // keep scanning. - continue; + break; default: return MemDepResult::getClobber(Inst); } - } else { - // Non-memory instruction. - continue; } - - if (AA->getModRefInfo(CS, Pointer, PointerSize) != AliasAnalysis::NoModRef) - return MemDepResult::getClobber(Inst); } // No dependence found. If this is the entry block of the function, it is a @@ -155,10 +222,11 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, } /// getPointerDependencyFrom - Return the instruction on which a memory -/// location depends. If isLoad is true, this routine ignore may-aliases with -/// read-only operations. +/// location depends. If isLoad is true, this routine ignores may-aliases with +/// read-only operations. If isLoad is false, this routine ignores may-aliases +/// with reads from read-only locations. MemDepResult MemoryDependenceAnalysis:: -getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, +getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB) { Value *InvariantTag = 0; @@ -175,8 +243,8 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { - // Debug intrinsics don't cause dependences. - if (isa<DbgInfoIntrinsic>(Inst)) continue; + // Debug intrinsics don't (and can't) cause dependences. + if (isa<DbgInfoIntrinsic>(II)) continue; // If we pass an invariant-end marker, then we've just entered an // invariant region and can start ignoring dependencies. @@ -184,43 +252,53 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point. - AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(2), MemPtr); - if (R == AliasAnalysis::MustAlias) { + AliasAnalysis::AliasResult R = + AA->alias(AliasAnalysis::Location(II->getArgOperand(2)), MemLoc); + if (R == AliasAnalysis::MustAlias) InvariantTag = II->getArgOperand(0); - continue; - } - + + continue; + } + // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. - } else if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start) { // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point. - AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(1), MemPtr); + AliasAnalysis::AliasResult R = + AA->alias(AliasAnalysis::Location(II->getArgOperand(1)), MemLoc); if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(II); + continue; } } // If we're querying on a load and we're in an invariant region, we're done // at this point. Nothing a load depends on can live in an invariant region. + // + // FIXME: this will prevent us from returning load/load must-aliases, so GVN + // won't remove redundant loads. if (isLoad && InvariantTag) continue; // Values depend on loads if the pointers are must aliased. This means that // a load depends on another must aliased load from the same value. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - Value *Pointer = LI->getPointerOperand(); - uint64_t PointerSize = AA->getTypeStoreSize(LI->getType()); + AliasAnalysis::Location LoadLoc = AA->getLocation(LI); // If we found a pointer, check if it could be the same as our pointer. - AliasAnalysis::AliasResult R = - AA->alias(Pointer, PointerSize, MemPtr, MemSize); + AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); if (R == AliasAnalysis::NoAlias) continue; // May-alias loads don't depend on each other without a dependence. - if (isLoad && R == AliasAnalysis::MayAlias) + if (isLoad && R != AliasAnalysis::MustAlias) continue; + + // Stores don't alias loads from read-only memory. + if (!isLoad && AA->pointsToConstantMemory(LoadLoc)) + continue; + // Stores depend on may and must aliased loads, loads depend on must-alias // loads. return MemDepResult::getDef(Inst); @@ -234,23 +312,21 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. - if (AA->getModRefInfo(SI, MemPtr, MemSize) == AliasAnalysis::NoModRef) + if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef) continue; // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. - Value *Pointer = SI->getPointerOperand(); - uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType()); + AliasAnalysis::Location StoreLoc = AA->getLocation(SI); // If we found a pointer, check if it could be the same as our pointer. - AliasAnalysis::AliasResult R = - AA->alias(Pointer, PointerSize, MemPtr, MemSize); + AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); if (R == AliasAnalysis::NoAlias) continue; - if (R == AliasAnalysis::MayAlias) - return MemDepResult::getClobber(Inst); - return MemDepResult::getDef(Inst); + if (R == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); + return MemDepResult::getClobber(Inst); } // If this is an allocation, and if we know that the accessed pointer is to @@ -263,7 +339,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // need to continue scanning until the malloc call. if (isa<AllocaInst>(Inst) || (isa<CallInst>(Inst) && extractMallocCall(Inst))) { - Value *AccessPtr = MemPtr->getUnderlyingObject(); + const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); if (AccessPtr == Inst || AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias) @@ -272,7 +348,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, } // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. - switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) { + switch (AA->getModRefInfo(Inst, MemLoc)) { case AliasAnalysis::NoModRef: // If the call has no effect on the queried pointer, just ignore it. continue; @@ -322,9 +398,6 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { BasicBlock *QueryParent = QueryInst->getParent(); - Value *MemPtr = 0; - uint64_t MemSize = 0; - // Do the scan. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { // No dependence found. If this is the entry block of the function, it is a @@ -333,65 +406,25 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { LocalCache = MemDepResult::getNonLocal(); else LocalCache = MemDepResult::getClobber(QueryInst); - } else if (StoreInst *SI = dyn_cast<StoreInst>(QueryInst)) { - // If this is a volatile store, don't mess around with it. Just return the - // previous instruction as a clobber. - if (SI->isVolatile()) - LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); - else { - MemPtr = SI->getPointerOperand(); - MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType()); - } - } else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) { - // If this is a volatile load, don't mess around with it. Just return the - // previous instruction as a clobber. - if (LI->isVolatile()) - LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); - else { - MemPtr = LI->getPointerOperand(); - MemSize = AA->getTypeStoreSize(LI->getType()); - } - } else if (const CallInst *CI = isFreeCall(QueryInst)) { - MemPtr = CI->getArgOperand(0); - // calls to free() erase the entire structure, not just a field. - MemSize = ~0UL; - } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { - int IntrinsicID = 0; // Intrinsic IDs start at 1. - IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst); - if (II) - IntrinsicID = II->getIntrinsicID(); - - switch (IntrinsicID) { - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: - MemPtr = II->getArgOperand(1); - MemSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); - break; - case Intrinsic::invariant_end: - MemPtr = II->getArgOperand(2); - MemSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); - break; - default: + } else { + AliasAnalysis::Location MemLoc; + AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); + if (MemLoc.Ptr) { + // If we can do a pointer scan, make it happen. + bool isLoad = !(MR & AliasAnalysis::Mod); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) + isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end; + + LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, + QueryParent); + } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { CallSite QueryCS(QueryInst); bool isReadOnly = AA->onlyReadsMemory(QueryCS); LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, QueryParent); - break; - } - } else { - // Non-memory instruction. - LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); - } - - // If we need to do a pointer scan, make it happen. - if (MemPtr) { - bool isLoad = !QueryInst->mayWriteToMemory(); - if (IntrinsicInst *II = dyn_cast<MemoryUseIntrinsic>(QueryInst)) { - isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end; - } - LocalCache = getPointerDependencyFrom(MemPtr, MemSize, isLoad, ScanPos, - QueryParent); + } else + // Non-memory instruction. + LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); } // Remember the result! @@ -565,31 +598,27 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { /// own block. /// void MemoryDependenceAnalysis:: -getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, +getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, + BasicBlock *FromBB, SmallVectorImpl<NonLocalDepResult> &Result) { - assert(Pointer->getType()->isPointerTy() && + assert(Loc.Ptr->getType()->isPointerTy() && "Can't get pointer deps of a non-pointer!"); Result.clear(); - // We know that the pointer value is live into FromBB find the def/clobbers - // from presecessors. - const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType(); - uint64_t PointeeSize = AA->getTypeStoreSize(EltTy); - - PHITransAddr Address(Pointer, TD); + PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD); // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI // translation. DenseMap<BasicBlock*, Value*> Visited; - if (!getNonLocalPointerDepFromBB(Address, PointeeSize, isLoad, FromBB, + if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB, Result, Visited, true)) return; Result.clear(); Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getClobber(FromBB->begin()), - Pointer)); + const_cast<Value *>(Loc.Ptr))); } /// GetNonLocalInfoForBlock - Compute the memdep value for BB with @@ -597,7 +626,7 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, /// lookup (which may use dirty cache info if available). If we do a lookup, /// add the result to the cache. MemDepResult MemoryDependenceAnalysis:: -GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, +GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { @@ -631,15 +660,14 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, ScanPos = ExistingResult->getResult().getInst(); // Eliminating the dirty entry from 'Cache', so update the reverse info. - ValueIsLoadPair CacheKey(Pointer, isLoad); + ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); } else { ++NumUncacheNonLocalPtr; } // Scan the block for the dependency. - MemDepResult Dep = getPointerDependencyFrom(Pointer, PointeeSize, isLoad, - ScanPos, BB); + MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. @@ -658,7 +686,7 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, // update MemDep when we remove instructions. Instruction *Inst = Dep.getInst(); assert(Inst && "Didn't depend on anything?"); - ValueIsLoadPair CacheKey(Pointer, isLoad); + ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); ReverseNonLocalPtrDeps[Inst].insert(CacheKey); return Dep; } @@ -712,7 +740,8 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, /// not compute dependence information for some reason. This should be treated /// as a clobber dependence on the first instruction in the predecessor block. bool MemoryDependenceAnalysis:: -getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, +getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, + const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl<NonLocalDepResult> &Result, DenseMap<BasicBlock*, Value*> &Visited, @@ -720,14 +749,68 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); - - std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo = - &NonLocalPointerDeps[CacheKey]; - NonLocalDepInfo *Cache = &CacheInfo->second; + + // Set up a temporary NLPI value. If the map doesn't yet have an entry for + // CacheKey, this value will be inserted as the associated value. Otherwise, + // it'll be ignored, and we'll have to check to see if the cached size and + // tbaa tag are consistent with the current query. + NonLocalPointerInfo InitialNLPI; + InitialNLPI.Size = Loc.Size; + InitialNLPI.TBAATag = Loc.TBAATag; + + // Get the NLPI for CacheKey, inserting one into the map if it doesn't + // already have one. + std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = + NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); + NonLocalPointerInfo *CacheInfo = &Pair.first->second; + + // If we already have a cache entry for this CacheKey, we may need to do some + // work to reconcile the cache entry and the current query. + if (!Pair.second) { + if (CacheInfo->Size < Loc.Size) { + // The query's Size is greater than the cached one. Throw out the + // cached data and procede with the query at the greater size. + CacheInfo->Pair = BBSkipFirstBlockPair(); + CacheInfo->Size = Loc.Size; + for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), + DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) + if (Instruction *Inst = DI->getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); + CacheInfo->NonLocalDeps.clear(); + } else if (CacheInfo->Size > Loc.Size) { + // This query's Size is less than the cached one. Conservatively restart + // the query using the greater size. + return getNonLocalPointerDepFromBB(Pointer, + Loc.getWithNewSize(CacheInfo->Size), + isLoad, StartBB, Result, Visited, + SkipFirstBlock); + } + + // If the query's TBAATag is inconsistent with the cached one, + // conservatively throw out the cached data and restart the query with + // no tag if needed. + if (CacheInfo->TBAATag != Loc.TBAATag) { + if (CacheInfo->TBAATag) { + CacheInfo->Pair = BBSkipFirstBlockPair(); + CacheInfo->TBAATag = 0; + for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), + DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) + if (Instruction *Inst = DI->getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); + CacheInfo->NonLocalDeps.clear(); + } + if (Loc.TBAATag) + return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(), + isLoad, StartBB, Result, Visited, + SkipFirstBlock); + } + } + + NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps; // If we have valid cached information for exactly the block we are // investigating, just return it with no recomputation. - if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { + if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { // We have a fully cached result for this query then we can just return the // cached results and populate the visited set. However, we have to verify // that we don't already have conflicting results for these blocks. Check @@ -763,9 +846,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, // than its valid cache info. If empty, the result will be valid cache info, // otherwise it isn't. if (Cache->empty()) - CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); + CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); else - CacheInfo->first = BBSkipFirstBlockPair(); + CacheInfo->Pair = BBSkipFirstBlockPair(); SmallVector<BasicBlock*, 32> Worklist; Worklist.push_back(StartBB); @@ -790,8 +873,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. DEBUG(AssertSorted(*Cache, NumSortedEntries)); - MemDepResult Dep = GetNonLocalInfoForBlock(Pointer.getAddr(), PointeeSize, - isLoad, BB, Cache, + MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. @@ -888,7 +970,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, // queries. Mark this in NonLocalPointerDeps by setting the // BBSkipFirstBlockPair pointer to null. This requires reuse of the // cached value to do more work but not miss the phi trans failure. - NonLocalPointerDeps[CacheKey].first = BBSkipFirstBlockPair(); + NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey]; + NLPI.Pair = BBSkipFirstBlockPair(); continue; } @@ -899,21 +982,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, // If we have a problem phi translating, fall through to the code below // to handle the failure condition. - if (getNonLocalPointerDepFromBB(PredPointer, PointeeSize, isLoad, Pred, + if (getNonLocalPointerDepFromBB(PredPointer, + Loc.getWithNewPtr(PredPointer.getAddr()), + isLoad, Pred, Result, Visited)) goto PredTranslationFailure; } // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; - Cache = &CacheInfo->second; + Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); // Since we did phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all // results from the set" Clear out the indicator for this. - CacheInfo->first = BBSkipFirstBlockPair(); + CacheInfo->Pair = BBSkipFirstBlockPair(); SkipFirstBlock = false; continue; @@ -922,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, if (Cache == 0) { // Refresh the CacheInfo/Cache pointer if it got invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; - Cache = &CacheInfo->second; + Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); } @@ -930,7 +1015,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all // results from the set". Clear out the indicator for this. - CacheInfo->first = BBSkipFirstBlockPair(); + CacheInfo->Pair = BBSkipFirstBlockPair(); // If *nothing* works, mark the pointer as being clobbered by the first // instruction in this block. @@ -972,7 +1057,7 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { // Remove all of the entries in the BB->val map. This involves removing // instructions from the reverse map. - NonLocalDepInfo &PInfo = It->second.second; + NonLocalDepInfo &PInfo = It->second.NonLocalDeps; for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { Instruction *Target = PInfo[i].getResult().getInst(); @@ -1143,10 +1228,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { assert(P.getPointer() != RemInst && "Already removed NonLocalPointerDeps info for RemInst"); - NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second; + NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; // The cache is not valid for any specific block anymore. - NonLocalPointerDeps[P].first = BBSkipFirstBlockPair(); + NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); // Update any entries for RemInst to use the instruction after it. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); @@ -1192,7 +1277,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), E = NonLocalPointerDeps.end(); I != E; ++I) { assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); - const NonLocalDepInfo &Val = I->second.second; + const NonLocalDepInfo &Val = I->second.NonLocalDeps; for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end(); II != E; ++II) assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index 2cc1c2aa005c..e7e999cebeb9 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -30,7 +30,9 @@ namespace { DebugInfoFinder Finder; public: static char ID; // Pass identification, replacement for typeid - ModuleDebugInfoPrinter() : ModulePass(ID) {} + ModuleDebugInfoPrinter() : ModulePass(ID) { + initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -43,7 +45,7 @@ namespace { char ModuleDebugInfoPrinter::ID = 0; INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo", - "Decodes module-level debug info", false, true); + "Decodes module-level debug info", false, true) ModulePass *llvm::createModuleDebugInfoPrinterPass() { return new ModuleDebugInfoPrinter(); diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp new file mode 100644 index 000000000000..101c2d5b0285 --- /dev/null +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -0,0 +1,88 @@ +//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the default implementation of the Alias Analysis interface +// that simply returns "I don't know" for all queries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +namespace { + /// NoAA - This class implements the -no-aa pass, which always returns "I + /// don't know" for alias queries. NoAA is unlike other alias analysis + /// implementations, in that it does not chain to a previous analysis. As + /// such it doesn't follow many of the rules that other alias analyses must. + /// + struct NoAA : public ImmutablePass, public AliasAnalysis { + static char ID; // Class identification, replacement for typeinfo + NoAA() : ImmutablePass(ID) { + initializeNoAAPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + } + + virtual void initializePass() { + // Note: NoAA does not call InitializeAliasAnalysis because it's + // special and does not support chaining. + TD = getAnalysisIfAvailable<TargetData>(); + } + + virtual AliasResult alias(const Location &LocA, const Location &LocB) { + return MayAlias; + } + + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + return UnknownModRefBehavior; + } + virtual ModRefBehavior getModRefBehavior(const Function *F) { + return UnknownModRefBehavior; + } + + virtual bool pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + return false; + } + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + return ModRef; + } + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return ModRef; + } + + virtual void deleteValue(Value *V) {} + virtual void copyValue(Value *From, Value *To) {} + virtual void addEscapingUse(Use &U) {} + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + }; +} // End of anonymous namespace + +// Register this pass... +char NoAA::ID = 0; +INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa", + "No Alias Analysis (always returns 'may' alias)", + true, true, true) + +ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 8e4fa03f2134..93da5a48518d 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -12,22 +12,27 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; static bool CanPHITrans(Instruction *Inst) { if (isa<PHINode>(Inst) || - isa<BitCastInst>(Inst) || isa<GetElementPtrInst>(Inst)) return true; - + + if (isa<CastInst>(Inst) && + Inst->isSafeToSpeculativelyExecute()) + return true; + if (Inst->getOpcode() == Instruction::Add && isa<ConstantInt>(Inst->getOperand(1))) return true; - + // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); @@ -50,7 +55,7 @@ static bool VerifySubExpr(Value *Expr, // If this is a non-instruction value, there is nothing to do. Instruction *I = dyn_cast<Instruction>(Expr); if (I == 0) return true; - + // If it's an instruction, it is either in Tmp or its operands recursively // are. SmallVectorImpl<Instruction*>::iterator Entry = @@ -59,16 +64,17 @@ static bool VerifySubExpr(Value *Expr, InstInputs.erase(Entry); return true; } - + // If it isn't in the InstInputs list it is a subexpr incorporated into the // address. Sanity check that it is phi translatable. if (!CanPHITrans(I)) { - errs() << "Non phi translatable instruction found in PHITransAddr, either " - "something is missing from InstInputs or CanPHITrans is wrong:\n"; + errs() << "Non phi translatable instruction found in PHITransAddr:\n"; errs() << *I << '\n'; + llvm_unreachable("Either something is missing from InstInputs or " + "CanPHITrans is wrong."); return false; } - + // Validate the operands of the instruction. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (!VerifySubExpr(I->getOperand(i), InstInputs)) @@ -82,19 +88,20 @@ static bool VerifySubExpr(Value *Expr, /// returns false. bool PHITransAddr::Verify() const { if (Addr == 0) return true; - - SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end()); - + + SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end()); + if (!VerifySubExpr(Addr, Tmp)) return false; - + if (!Tmp.empty()) { - errs() << "PHITransAddr inconsistent, contains extra instructions:\n"; + errs() << "PHITransAddr contains extra instructions:\n"; for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n"; + llvm_unreachable("This is unexpected."); return false; } - + // a-ok. return true; } @@ -111,11 +118,11 @@ bool PHITransAddr::IsPotentiallyPHITranslatable() const { } -static void RemoveInstInputs(Value *V, +static void RemoveInstInputs(Value *V, SmallVectorImpl<Instruction*> &InstInputs) { Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return; - + // If the instruction is in the InstInputs list, remove it. SmallVectorImpl<Instruction*>::iterator Entry = std::find(InstInputs.begin(), InstInputs.end(), I); @@ -123,9 +130,9 @@ static void RemoveInstInputs(Value *V, InstInputs.erase(Entry); return; } - + assert(!isa<PHINode>(I) && "Error, removing something that isn't an input"); - + // Otherwise, it must have instruction inputs itself. Zap them recursively. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i))) @@ -139,7 +146,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // If this is a non-instruction value, it can't require PHI translation. Instruction *Inst = dyn_cast<Instruction>(V); if (Inst == 0) return V; - + // Determine whether 'Inst' is an input to our PHI translatable expression. bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); @@ -156,16 +163,16 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // In either case, the instruction itself isn't an input any longer. InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst)); - + // If this is a PHI, go ahead and translate it. if (PHINode *PN = dyn_cast<PHINode>(Inst)) return AddAsInput(PN->getIncomingValueForBlock(PredBB)); - + // If this is a non-phi value, and it is analyzable, we can incorporate it // into the expression by making all instruction operands be inputs. if (!CanPHITrans(Inst)) return 0; - + // All instruction operands are now inputs (and of course, they may also be // defined in this block, so they may need to be phi translated themselves. for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) @@ -176,31 +183,34 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // Ok, it must be an intermediate result (either because it started that way // or because we just incorporated it into the expression). See if its // operands need to be phi translated, and if so, reconstruct it. - - if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { - Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB, DT); + + if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { + if (!Cast->isSafeToSpeculativelyExecute()) return 0; + Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT); if (PHIIn == 0) return 0; - if (PHIIn == BC->getOperand(0)) - return BC; - + if (PHIIn == Cast->getOperand(0)) + return Cast; + // Find an available version of this cast. - + // Constants are trivial to find. if (Constant *C = dyn_cast<Constant>(PHIIn)) - return AddAsInput(ConstantExpr::getBitCast(C, BC->getType())); - - // Otherwise we have to see if a bitcasted version of the incoming pointer + return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(), + C, Cast->getType())); + + // Otherwise we have to see if a casted version of the incoming pointer // is available. If so, we can use it, otherwise we have to fail. for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); UI != E; ++UI) { - if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) - if (BCI->getType() == BC->getType() && - (!DT || DT->dominates(BCI->getParent(), PredBB))) - return BCI; + if (CastInst *CastI = dyn_cast<CastInst>(*UI)) + if (CastI->getOpcode() == Cast->getOpcode() && + CastI->getType() == Cast->getType() && + (!DT || DT->dominates(CastI->getParent(), PredBB))) + return CastI; } return 0; } - + // Handle getelementptr with at least one PHI translatable operand. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { SmallVector<Value*, 8> GEPOps; @@ -208,22 +218,22 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT); if (GEPOp == 0) return 0; - + AnyChanged |= GEPOp != GEP->getOperand(i); GEPOps.push_back(GEPOp); } - + if (!AnyChanged) return GEP; - + // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) { + if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); - + return AddAsInput(V); } - + // Scan to see if we have this GEP available. Value *APHIOp = GEPOps[0]; for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end(); @@ -245,7 +255,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } return 0; } - + // Handle add with a constant RHS. if (Inst->getOpcode() == Instruction::Add && isa<ConstantInt>(Inst->getOperand(1))) { @@ -253,10 +263,10 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, Constant *RHS = cast<ConstantInt>(Inst->getOperand(1)); bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap(); bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); - + Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT); if (LHS == 0) return 0; - + // If the PHI translated LHS is an add of a constant, fold the immediates. if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS)) if (BOp->getOpcode() == Instruction::Add) @@ -264,16 +274,16 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, LHS = BOp->getOperand(0); RHS = ConstantExpr::getAdd(RHS, CI); isNSW = isNUW = false; - + // If the old 'LHS' was an input, add the new 'LHS' as an input. if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) { RemoveInstInputs(BOp, InstInputs); AddAsInput(LHS); } } - + // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) { + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); @@ -283,7 +293,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // If we didn't modify the add, just return it. if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1)) return Inst; - + // Otherwise, see if we have this add available somewhere. for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end(); UI != E; ++UI) { @@ -294,10 +304,10 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, (!DT || DT->dominates(BO->getParent(), PredBB))) return BO; } - + return 0; } - + // Otherwise, we failed. return 0; } @@ -335,13 +345,13 @@ PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree &DT, SmallVectorImpl<Instruction*> &NewInsts) { unsigned NISize = NewInsts.size(); - + // Attempt to PHI translate with insertion. Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts); - + // If successful, return the new value. if (Addr) return Addr; - + // If not, destroy any intermediate instructions inserted. while (NewInsts.size() != NISize) NewInsts.pop_back_val()->eraseFromParent(); @@ -367,21 +377,23 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, // If we don't have an available version of this value, it must be an // instruction. Instruction *Inst = cast<Instruction>(InVal); - - // Handle bitcast of PHI translatable value. - if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { - Value *OpVal = InsertPHITranslatedSubExpr(BC->getOperand(0), + + // Handle cast of PHI translatable value. + if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { + if (!Cast->isSafeToSpeculativelyExecute()) return 0; + Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0), CurBB, PredBB, DT, NewInsts); if (OpVal == 0) return 0; - - // Otherwise insert a bitcast at the end of PredBB. - BitCastInst *New = new BitCastInst(OpVal, InVal->getType(), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); + + // Otherwise insert a cast at the end of PredBB. + CastInst *New = CastInst::Create(Cast->getOpcode(), + OpVal, InVal->getType(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); NewInsts.push_back(New); return New; } - + // Handle getelementptr with at least one PHI operand. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { SmallVector<Value*, 8> GEPOps; @@ -392,8 +404,8 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, if (OpVal == 0) return 0; GEPOps.push_back(OpVal); } - - GetElementPtrInst *Result = + + GetElementPtrInst *Result = GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(), InVal->getName()+".phi.trans.insert", PredBB->getTerminator()); @@ -401,12 +413,12 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, NewInsts.push_back(Result); return Result; } - + #if 0 // FIXME: This code works, but it is unclear that we actually want to insert // a big chain of computation in order to make a value available in a block. // This needs to be evaluated carefully to consider its cost trade offs. - + // Handle add with a constant RHS. if (Inst->getOpcode() == Instruction::Add && isa<ConstantInt>(Inst->getOperand(1))) { @@ -414,7 +426,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0), CurBB, PredBB, DT, NewInsts); if (OpVal == 0) return 0; - + BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1), InVal->getName()+".phi.trans.insert", PredBB->getTerminator()); @@ -424,6 +436,6 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, return Res; } #endif - + return 0; } diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp new file mode 100644 index 000000000000..5d3f6bbc7b6e --- /dev/null +++ b/lib/Analysis/PathNumbering.cpp @@ -0,0 +1,525 @@ +//===- PathNumbering.cpp --------------------------------------*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Ball-Larus path numbers uniquely identify paths through a directed acyclic +// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony +// edges to obtain a DAG, and thus the unique path numbers [Ball96]. +// +// The purpose of this analysis is to enumerate the edges in a CFG in order +// to obtain paths from path numbers in a convenient manner. As described in +// [Ball96] edges can be enumerated such that given a path number by following +// the CFG and updating the path number, the path is obtained. +// +// [Ball96] +// T. Ball and J. R. Larus. "Efficient Path Profiling." +// International Symposium on Microarchitecture, pages 46-57, 1996. +// http://portal.acm.org/citation.cfm?id=243857 +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "ball-larus-numbering" + +#include "llvm/Analysis/PathNumbering.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InstrTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TypeBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include <map> +#include <queue> +#include <set> +#include <stack> +#include <string> +#include <utility> +#include <vector> +#include <sstream> + +using namespace llvm; + +// Are we enabling early termination +static cl::opt<bool> ProcessEarlyTermination( + "path-profile-early-termination", cl::Hidden, + cl::desc("In path profiling, insert extra instrumentation to account for " + "unexpected function termination.")); + +// Returns the basic block for the BallLarusNode +BasicBlock* BallLarusNode::getBlock() { + return(_basicBlock); +} + +// Returns the number of paths to the exit starting at the node. +unsigned BallLarusNode::getNumberPaths() { + return(_numberPaths); +} + +// Sets the number of paths to the exit starting at the node. +void BallLarusNode::setNumberPaths(unsigned numberPaths) { + _numberPaths = numberPaths; +} + +// Gets the NodeColor used in graph algorithms. +BallLarusNode::NodeColor BallLarusNode::getColor() { + return(_color); +} + +// Sets the NodeColor used in graph algorithms. +void BallLarusNode::setColor(BallLarusNode::NodeColor color) { + _color = color; +} + +// Returns an iterator over predecessor edges. Includes phony and +// backedges. +BLEdgeIterator BallLarusNode::predBegin() { + return(_predEdges.begin()); +} + +// Returns the end sentinel for the predecessor iterator. +BLEdgeIterator BallLarusNode::predEnd() { + return(_predEdges.end()); +} + +// Returns the number of predecessor edges. Includes phony and +// backedges. +unsigned BallLarusNode::getNumberPredEdges() { + return(_predEdges.size()); +} + +// Returns an iterator over successor edges. Includes phony and +// backedges. +BLEdgeIterator BallLarusNode::succBegin() { + return(_succEdges.begin()); +} + +// Returns the end sentinel for the successor iterator. +BLEdgeIterator BallLarusNode::succEnd() { + return(_succEdges.end()); +} + +// Returns the number of successor edges. Includes phony and +// backedges. +unsigned BallLarusNode::getNumberSuccEdges() { + return(_succEdges.size()); +} + +// Add an edge to the predecessor list. +void BallLarusNode::addPredEdge(BallLarusEdge* edge) { + _predEdges.push_back(edge); +} + +// Remove an edge from the predecessor list. +void BallLarusNode::removePredEdge(BallLarusEdge* edge) { + removeEdge(_predEdges, edge); +} + +// Add an edge to the successor list. +void BallLarusNode::addSuccEdge(BallLarusEdge* edge) { + _succEdges.push_back(edge); +} + +// Remove an edge from the successor list. +void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) { + removeEdge(_succEdges, edge); +} + +// Returns the name of the BasicBlock being represented. If BasicBlock +// is null then returns "<null>". If BasicBlock has no name, then +// "<unnamed>" is returned. Intended for use with debug output. +std::string BallLarusNode::getName() { + std::stringstream name; + + if(getBlock() != NULL) { + if(getBlock()->hasName()) { + std::string tempName(getBlock()->getName()); + name << tempName.c_str() << " (" << _uid << ")"; + } else + name << "<unnamed> (" << _uid << ")"; + } else + name << "<null> (" << _uid << ")"; + + return name.str(); +} + +// Removes an edge from an edgeVector. Used by removePredEdge and +// removeSuccEdge. +void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) { + // TODO: Avoid linear scan by using a set instead + for(BLEdgeIterator i = v.begin(), + end = v.end(); + i != end; + ++i) { + if((*i) == e) { + v.erase(i); + break; + } + } +} + +// Returns the source node of this edge. +BallLarusNode* BallLarusEdge::getSource() const { + return(_source); +} + +// Returns the target node of this edge. +BallLarusNode* BallLarusEdge::getTarget() const { + return(_target); +} + +// Sets the type of the edge. +BallLarusEdge::EdgeType BallLarusEdge::getType() const { + return _edgeType; +} + +// Gets the type of the edge. +void BallLarusEdge::setType(EdgeType type) { + _edgeType = type; +} + +// Returns the weight of this edge. Used to decode path numbers to sequences +// of basic blocks. +unsigned BallLarusEdge::getWeight() { + return(_weight); +} + +// Sets the weight of the edge. Used during path numbering. +void BallLarusEdge::setWeight(unsigned weight) { + _weight = weight; +} + +// Gets the phony edge originating at the root. +BallLarusEdge* BallLarusEdge::getPhonyRoot() { + return _phonyRoot; +} + +// Sets the phony edge originating at the root. +void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) { + _phonyRoot = phonyRoot; +} + +// Gets the phony edge terminating at the exit. +BallLarusEdge* BallLarusEdge::getPhonyExit() { + return _phonyExit; +} + +// Sets the phony edge terminating at the exit. +void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) { + _phonyExit = phonyExit; +} + +// Gets the associated real edge if this is a phony edge. +BallLarusEdge* BallLarusEdge::getRealEdge() { + return _realEdge; +} + +// Sets the associated real edge if this is a phony edge. +void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) { + _realEdge = realEdge; +} + +// Returns the duplicate number of the edge. +unsigned BallLarusEdge::getDuplicateNumber() { + return(_duplicateNumber); +} + +// Initialization that requires virtual functions which are not fully +// functional in the constructor. +void BallLarusDag::init() { + BLBlockNodeMap inDag; + std::stack<BallLarusNode*> dfsStack; + + _root = addNode(&(_function.getEntryBlock())); + _exit = addNode(NULL); + + // start search from root + dfsStack.push(getRoot()); + + // dfs to add each bb into the dag + while(dfsStack.size()) + buildNode(inDag, dfsStack); + + // put in the final edge + addEdge(getExit(),getRoot(),0); +} + +// Frees all memory associated with the DAG. +BallLarusDag::~BallLarusDag() { + for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end; + ++edge) + delete (*edge); + + for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end; + ++node) + delete (*node); +} + +// Calculate the path numbers by assigning edge increments as prescribed +// in Ball-Larus path profiling. +void BallLarusDag::calculatePathNumbers() { + BallLarusNode* node; + std::queue<BallLarusNode*> bfsQueue; + bfsQueue.push(getExit()); + + while(bfsQueue.size() > 0) { + node = bfsQueue.front(); + + DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n"); + + bfsQueue.pop(); + unsigned prevPathNumber = node->getNumberPaths(); + calculatePathNumbersFrom(node); + + // Check for DAG splitting + if( node->getNumberPaths() > 100000000 && node != getRoot() ) { + // Add new phony edge from the split-node to the DAG's exit + BallLarusEdge* exitEdge = addEdge(node, getExit(), 0); + exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); + + // Counters to handle the possibilty of a multi-graph + BasicBlock* oldTarget = 0; + unsigned duplicateNumber = 0; + + // Iterate through each successor edge, adding phony edges + for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd(); + succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) { + + if( (*succ)->getType() == BallLarusEdge::NORMAL ) { + // is this edge a duplicate? + if( oldTarget != (*succ)->getTarget()->getBlock() ) + duplicateNumber = 0; + + // create the new phony edge: root -> succ + BallLarusEdge* rootEdge = + addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++); + rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); + rootEdge->setRealEdge(*succ); + + // split on this edge and reference it's exit/root phony edges + (*succ)->setType(BallLarusEdge::SPLITEDGE); + (*succ)->setPhonyRoot(rootEdge); + (*succ)->setPhonyExit(exitEdge); + (*succ)->setWeight(0); + } + } + + calculatePathNumbersFrom(node); + } + + DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", " + << node->getNumberPaths() << ".\n"); + + if(prevPathNumber == 0 && node->getNumberPaths() != 0) { + DEBUG(dbgs() << "node ready : " << node->getName() << "\n"); + for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd(); + pred != end; pred++) { + if( (*pred)->getType() == BallLarusEdge::BACKEDGE || + (*pred)->getType() == BallLarusEdge::SPLITEDGE ) + continue; + + BallLarusNode* nextNode = (*pred)->getSource(); + // not yet visited? + if(nextNode->getNumberPaths() == 0) + bfsQueue.push(nextNode); + } + } + } + + DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n"); +} + +// Returns the number of paths for the Dag. +unsigned BallLarusDag::getNumberOfPaths() { + return(getRoot()->getNumberPaths()); +} + +// Returns the root (i.e. entry) node for the DAG. +BallLarusNode* BallLarusDag::getRoot() { + return _root; +} + +// Returns the exit node for the DAG. +BallLarusNode* BallLarusDag::getExit() { + return _exit; +} + +// Returns the function for the DAG. +Function& BallLarusDag::getFunction() { + return(_function); +} + +// Clears the node colors. +void BallLarusDag::clearColors(BallLarusNode::NodeColor color) { + for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++) + (*nodeIt)->setColor(color); +} + +// Processes one node and its imediate edges for building the DAG. +void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) { + BallLarusNode* currentNode = dfsStack.top(); + BasicBlock* currentBlock = currentNode->getBlock(); + + if(currentNode->getColor() != BallLarusNode::WHITE) { + // we have already visited this node + dfsStack.pop(); + currentNode->setColor(BallLarusNode::BLACK); + } else { + // are there any external procedure calls? + if( ProcessEarlyTermination ) { + for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(), + bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd; + bbCurrent++ ) { + Instruction& instr = *bbCurrent; + if( instr.getOpcode() == Instruction::Call ) { + BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0); + callEdge->setType(BallLarusEdge::CALLEDGE_PHONY); + break; + } + } + } + + TerminatorInst* terminator = currentNode->getBlock()->getTerminator(); + if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) + || isa<UnwindInst>(terminator)) + addEdge(currentNode, getExit(),0); + + currentNode->setColor(BallLarusNode::GRAY); + inDag[currentBlock] = currentNode; + + BasicBlock* oldSuccessor = 0; + unsigned duplicateNumber = 0; + + // iterate through this node's successors + for(succ_iterator successor = succ_begin(currentBlock), + succEnd = succ_end(currentBlock); successor != succEnd; + oldSuccessor = *successor, ++successor ) { + BasicBlock* succBB = *successor; + + // is this edge a duplicate? + if (oldSuccessor == succBB) + duplicateNumber++; + else + duplicateNumber = 0; + + buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber); + } + } +} + +// Process an edge in the CFG for DAG building. +void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>& + dfsStack, BallLarusNode* currentNode, + BasicBlock* succBB, unsigned duplicateCount) { + BallLarusNode* succNode = inDag[succBB]; + + if(succNode && succNode->getColor() == BallLarusNode::BLACK) { + // visited node and forward edge + addEdge(currentNode, succNode, duplicateCount); + } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) { + // visited node and back edge + DEBUG(dbgs() << "Backedge detected.\n"); + addBackedge(currentNode, succNode, duplicateCount); + } else { + BallLarusNode* childNode; + // not visited node and forward edge + if(succNode) // an unvisited node that is child of a gray node + childNode = succNode; + else { // an unvisited node that is a child of a an unvisted node + childNode = addNode(succBB); + inDag[succBB] = childNode; + } + addEdge(currentNode, childNode, duplicateCount); + dfsStack.push(childNode); + } +} + +// The weight on each edge is the increment required along any path that +// contains that edge. +void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) { + if(node == getExit()) + // The Exit node must be base case + node->setNumberPaths(1); + else { + unsigned sumPaths = 0; + BallLarusNode* succNode; + + for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd(); + succ != end; succ++) { + if( (*succ)->getType() == BallLarusEdge::BACKEDGE || + (*succ)->getType() == BallLarusEdge::SPLITEDGE ) + continue; + + (*succ)->setWeight(sumPaths); + succNode = (*succ)->getTarget(); + + if( !succNode->getNumberPaths() ) + return; + sumPaths += succNode->getNumberPaths(); + } + + node->setNumberPaths(sumPaths); + } +} + +// Allows subclasses to determine which type of Node is created. +// Override this method to produce subclasses of BallLarusNode if +// necessary. The destructor of BallLarusDag will call free on each +// pointer created. +BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) { + return( new BallLarusNode(BB) ); +} + +// Allows subclasses to determine which type of Edge is created. +// Override this method to produce subclasses of BallLarusEdge if +// necessary. The destructor of BallLarusDag will call free on each +// pointer created. +BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source, + BallLarusNode* target, + unsigned duplicateCount) { + return( new BallLarusEdge(source, target, duplicateCount) ); +} + +// Proxy to node's constructor. Updates the DAG state. +BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) { + BallLarusNode* newNode = createNode(BB); + _nodes.push_back(newNode); + return( newNode ); +} + +// Proxy to edge's constructor. Updates the DAG state. +BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source, + BallLarusNode* target, + unsigned duplicateCount) { + BallLarusEdge* newEdge = createEdge(source, target, duplicateCount); + _edges.push_back(newEdge); + source->addSuccEdge(newEdge); + target->addPredEdge(newEdge); + return(newEdge); +} + +// Adds a backedge with its phony edges. Updates the DAG state. +void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target, + unsigned duplicateCount) { + BallLarusEdge* childEdge = addEdge(source, target, duplicateCount); + childEdge->setType(BallLarusEdge::BACKEDGE); + + childEdge->setPhonyRoot(addEdge(getRoot(), target,0)); + childEdge->setPhonyExit(addEdge(source, getExit(),0)); + + childEdge->getPhonyRoot()->setRealEdge(childEdge); + childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY); + + childEdge->getPhonyExit()->setRealEdge(childEdge); + childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY); + _backEdges.push_back(childEdge); +} diff --git a/lib/Analysis/PathProfileInfo.cpp b/lib/Analysis/PathProfileInfo.cpp new file mode 100644 index 000000000000..b361d3f4fa94 --- /dev/null +++ b/lib/Analysis/PathProfileInfo.cpp @@ -0,0 +1,434 @@ +//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface used by optimizers to load path profiles, +// and provides a loader pass which reads a path profile file. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "path-profile-info" + +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/Analysis/PathProfileInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <cstdio> + +using namespace llvm; + +// command line option for loading path profiles +static cl::opt<std::string> +PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden); + +namespace { + class PathProfileLoaderPass : public ModulePass, public PathProfileInfo { + public: + PathProfileLoaderPass() : ModulePass(ID) { } + ~PathProfileLoaderPass(); + + // this pass doesn't change anything (only loads information) + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + // the full name of the loader pass + virtual const char* getPassName() const { + return "Path Profiling Information Loader"; + } + + // required since this pass implements multiple inheritance + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &PathProfileInfo::ID) + return (PathProfileInfo*)this; + return this; + } + + // entry point to run the pass + bool runOnModule(Module &M); + + // pass identification + static char ID; + + private: + // make a reference table to refer to function by number + void buildFunctionRefs(Module &M); + + // process argument info of a program from the input file + void handleArgumentInfo(); + + // process path number information from the input file + void handlePathInfo(); + + // array of references to the functions in the module + std::vector<Function*> _functions; + + // path profile file handle + FILE* _file; + + // path profile file name + std::string _filename; + }; +} + +// register PathLoader +char PathProfileLoaderPass::ID = 0; + +INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information", + NoPathProfileInfo) +INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo, + "path-profile-loader", + "Load path profile information from file", + false, true, false) + +char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID; + +// link PathLoader as a pass, and make it available as an optimisation +ModulePass *llvm::createPathProfileLoaderPass() { + return new PathProfileLoaderPass; +} + +// ---------------------------------------------------------------------------- +// PathEdge implementation +// +ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target, + unsigned duplicateNumber) + : _source(source), _target(target), _duplicateNumber(duplicateNumber) {} + +// ---------------------------------------------------------------------------- +// Path implementation +// + +ProfilePath::ProfilePath (unsigned int number, unsigned int count, + double countStdDev, PathProfileInfo* ppi) + : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {} + +double ProfilePath::getFrequency() const { + return 100 * double(_count) / + double(_ppi->_functionPathCounts[_ppi->_currentFunction]); +} + +static BallLarusEdge* getNextEdge (BallLarusNode* node, + unsigned int pathNumber) { + BallLarusEdge* best = 0; + + for( BLEdgeIterator next = node->succBegin(), + end = node->succEnd(); next != end; next++ ) { + if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges + (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges + (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber + (!best || (best->getWeight() < (*next)->getWeight())) ) // best one? + best = *next; + } + + return best; +} + +ProfilePathEdgeVector* ProfilePath::getPathEdges() const { + BallLarusNode* currentNode = _ppi->_currentDag->getRoot (); + unsigned int increment = _number; + ProfilePathEdgeVector* pev = new ProfilePathEdgeVector; + + while (currentNode != _ppi->_currentDag->getExit()) { + BallLarusEdge* next = getNextEdge(currentNode, increment); + + increment -= next->getWeight(); + + if( next->getType() != BallLarusEdge::BACKEDGE_PHONY && + next->getType() != BallLarusEdge::SPLITEDGE_PHONY && + next->getTarget() != _ppi->_currentDag->getExit() ) + pev->push_back(ProfilePathEdge( + next->getSource()->getBlock(), + next->getTarget()->getBlock(), + next->getDuplicateNumber())); + + if( next->getType() == BallLarusEdge::BACKEDGE_PHONY && + next->getTarget() == _ppi->_currentDag->getExit() ) + pev->push_back(ProfilePathEdge( + next->getRealEdge()->getSource()->getBlock(), + next->getRealEdge()->getTarget()->getBlock(), + next->getDuplicateNumber())); + + if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY && + next->getSource() == _ppi->_currentDag->getRoot() ) + pev->push_back(ProfilePathEdge( + next->getRealEdge()->getSource()->getBlock(), + next->getRealEdge()->getTarget()->getBlock(), + next->getDuplicateNumber())); + + // set the new node + currentNode = next->getTarget(); + } + + return pev; +} + +ProfilePathBlockVector* ProfilePath::getPathBlocks() const { + BallLarusNode* currentNode = _ppi->_currentDag->getRoot (); + unsigned int increment = _number; + ProfilePathBlockVector* pbv = new ProfilePathBlockVector; + + while (currentNode != _ppi->_currentDag->getExit()) { + BallLarusEdge* next = getNextEdge(currentNode, increment); + increment -= next->getWeight(); + + // add block to the block list if it is a real edge + if( next->getType() == BallLarusEdge::NORMAL) + pbv->push_back (currentNode->getBlock()); + // make the back edge the last edge since we are at the end + else if( next->getTarget() == _ppi->_currentDag->getExit() ) { + pbv->push_back (currentNode->getBlock()); + pbv->push_back (next->getRealEdge()->getTarget()->getBlock()); + } + + // set the new node + currentNode = next->getTarget(); + } + + return pbv; +} + +BasicBlock* ProfilePath::getFirstBlockInPath() const { + BallLarusNode* root = _ppi->_currentDag->getRoot(); + BallLarusEdge* edge = getNextEdge(root, _number); + + if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY || + edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) ) + return edge->getTarget()->getBlock(); + + return root->getBlock(); +} + +// ---------------------------------------------------------------------------- +// PathProfileInfo implementation +// + +// Pass identification +char llvm::PathProfileInfo::ID = 0; + +PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) { +} + +PathProfileInfo::~PathProfileInfo() { + if (_currentDag) + delete _currentDag; +} + +// set the function for which paths are currently begin processed +void PathProfileInfo::setCurrentFunction(Function* F) { + // Make sure it exists + if (!F) return; + + if (_currentDag) + delete _currentDag; + + _currentFunction = F; + _currentDag = new BallLarusDag(*F); + _currentDag->init(); + _currentDag->calculatePathNumbers(); +} + +// get the function for which paths are currently being processed +Function* PathProfileInfo::getCurrentFunction() const { + return _currentFunction; +} + +// get the entry block of the function +BasicBlock* PathProfileInfo::getCurrentFunctionEntry() { + return _currentDag->getRoot()->getBlock(); +} + +// return the path based on its number +ProfilePath* PathProfileInfo::getPath(unsigned int number) { + return _functionPaths[_currentFunction][number]; +} + +// return the number of paths which a function may potentially execute +unsigned int PathProfileInfo::getPotentialPathCount() { + return _currentDag ? _currentDag->getNumberOfPaths() : 0; +} + +// return an iterator for the beginning of a functions executed paths +ProfilePathIterator PathProfileInfo::pathBegin() { + return _functionPaths[_currentFunction].begin(); +} + +// return an iterator for the end of a functions executed paths +ProfilePathIterator PathProfileInfo::pathEnd() { + return _functionPaths[_currentFunction].end(); +} + +// returns the total number of paths run in the function +unsigned int PathProfileInfo::pathsRun() { + return _currentFunction ? _functionPaths[_currentFunction].size() : 0; +} + +// ---------------------------------------------------------------------------- +// PathLoader implementation +// + +// remove all generated paths +PathProfileLoaderPass::~PathProfileLoaderPass() { + for( FunctionPathIterator funcNext = _functionPaths.begin(), + funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++) + for( ProfilePathIterator pathNext = funcNext->second.begin(), + pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++) + delete pathNext->second; +} + +// entry point of the pass; this loads and parses a file +bool PathProfileLoaderPass::runOnModule(Module &M) { + // get the filename and setup the module's function references + _filename = PathProfileInfoFilename; + buildFunctionRefs (M); + + if (!(_file = fopen(_filename.c_str(), "rb"))) { + errs () << "error: input '" << _filename << "' file does not exist.\n"; + return false; + } + + ProfilingType profType; + + while( fread(&profType, sizeof(ProfilingType), 1, _file) ) { + switch (profType) { + case ArgumentInfo: + handleArgumentInfo (); + break; + case PathInfo: + handlePathInfo (); + break; + default: + errs () << "error: bad path profiling file syntax, " << profType << "\n"; + fclose (_file); + return false; + } + } + + fclose (_file); + + return true; +} + +// create a reference table for functions defined in the path profile file +void PathProfileLoaderPass::buildFunctionRefs (Module &M) { + _functions.push_back(0); // make the 0 index a null pointer + + for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) { + if (F->isDeclaration()) + continue; + _functions.push_back(F); + } +} + +// handle command like argument infor in the output file +void PathProfileLoaderPass::handleArgumentInfo() { + // get the argument list's length + unsigned savedArgsLength; + if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) { + errs() << "warning: argument info header/data mismatch\n"; + return; + } + + // allocate a buffer, and get the arguments + char* args = new char[savedArgsLength+1]; + if( fread(args, 1, savedArgsLength, _file) != savedArgsLength ) + errs() << "warning: argument info header/data mismatch\n"; + + args[savedArgsLength] = '\0'; + argList = std::string(args); + delete [] args; // cleanup dynamic string + + // byte alignment + if (savedArgsLength & 3) + fseek(_file, 4-(savedArgsLength&3), SEEK_CUR); +} + +// Handle path profile information in the output file +void PathProfileLoaderPass::handlePathInfo () { + // get the number of functions in this profile + unsigned functionCount; + if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) { + errs() << "warning: path info header/data mismatch\n"; + return; + } + + // gather path information for each function + for (unsigned i = 0; i < functionCount; i++) { + PathProfileHeader pathHeader; + if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) { + errs() << "warning: bad header for path function info\n"; + break; + } + + Function* f = _functions[pathHeader.fnNumber]; + + // dynamically allocate a table to store path numbers + PathProfileTableEntry* pathTable = + new PathProfileTableEntry[pathHeader.numEntries]; + + if( fread(pathTable, sizeof(PathProfileTableEntry), + pathHeader.numEntries, _file) != pathHeader.numEntries) { + delete [] pathTable; + errs() << "warning: path function info header/data mismatch\n"; + return; + } + + // Build a new path for the current function + unsigned int totalPaths = 0; + for (unsigned int j = 0; j < pathHeader.numEntries; j++) { + totalPaths += pathTable[j].pathCounter; + _functionPaths[f][pathTable[j].pathNumber] + = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter, + 0, this); + } + + _functionPathCounts[f] = totalPaths; + + delete [] pathTable; + } +} + +//===----------------------------------------------------------------------===// +// NoProfile PathProfileInfo implementation +// + +namespace { + struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo { + static char ID; // Class identification, replacement for typeinfo + NoPathProfileInfo() : ImmutablePass(ID) { + initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry()); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &PathProfileInfo::ID) + return (PathProfileInfo*)this; + return this; + } + + virtual const char *getPassName() const { + return "NoPathProfileInfo"; + } + }; +} // End of anonymous namespace + +char NoPathProfileInfo::ID = 0; +// Register this pass... +INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile", + "No Path Profile Information", false, true, true) + +ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); } diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp new file mode 100644 index 000000000000..c54977314207 --- /dev/null +++ b/lib/Analysis/PathProfileVerifier.cpp @@ -0,0 +1,207 @@ +//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This verifier derives an edge profile file from current path profile +// information +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "path-profile-verifier" + +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/Analysis/PathProfileInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +#include <stdio.h> + +using namespace llvm; + +namespace { + class PathProfileVerifier : public ModulePass { + private: + bool runOnModule(Module &M); + + public: + static char ID; // Pass identification, replacement for typeid + PathProfileVerifier() : ModulePass(ID) { + initializePathProfileVerifierPass(*PassRegistry::getPassRegistry()); + } + + + virtual const char *getPassName() const { + return "Path Profiler Verifier"; + } + + // The verifier requires the path profile and edge profile. + virtual void getAnalysisUsage(AnalysisUsage& AU) const; + }; +} + +static cl::opt<std::string> +EdgeProfileFilename("path-profile-verifier-file", + cl::init("edgefrompath.llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Edge profile file generated by -path-profile-verifier"), + cl::Hidden); + +char PathProfileVerifier::ID = 0; +INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier", + "Compare the path profile derived edge profile against the " + "edge profile.", true, true) + +ModulePass *llvm::createPathProfileVerifierPass() { + return new PathProfileVerifier(); +} + +// The verifier requires the path profile and edge profile. +void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const { + AU.addRequired<PathProfileInfo>(); + AU.addPreserved<PathProfileInfo>(); +} + +typedef std::map<unsigned, unsigned> DuplicateToIndexMap; +typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap; +typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap; + +// the verifier iterates through each path to gather the total +// number of edge frequencies +bool PathProfileVerifier::runOnModule (Module &M) { + PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>(); + + // setup a data structure to map path edges which index an + // array of edge counters + NestedBlockToIndexMap arrayMap; + unsigned i = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + + arrayMap[0][F->begin()][0] = i++; + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + + unsigned duplicate = 0; + BasicBlock* prev = 0; + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; + prev = TI->getSuccessor(s), ++s) { + if (prev == TI->getSuccessor(s)) + duplicate++; + else duplicate = 0; + + arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++; + } + } + } + + std::vector<unsigned> edgeArray(i); + + // iterate through each path and increment the edge counters as needed + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + + pathProfileInfo.setCurrentFunction(F); + + DEBUG(dbgs() << "function '" << F->getName() << "' ran " + << pathProfileInfo.pathsRun() + << "/" << pathProfileInfo.getPotentialPathCount() + << " potential paths\n"); + + for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(), + endPath = pathProfileInfo.pathEnd(); + nextPath != endPath; nextPath++ ) { + ProfilePath* currentPath = nextPath->second; + + ProfilePathEdgeVector* pev = currentPath->getPathEdges(); + DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": " + << currentPath->getCount() << "\n"); + // setup the entry edge (normally path profiling doens't care about this) + if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) + edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]] + += currentPath->getCount(); + + for( ProfilePathEdgeIterator nextEdge = pev->begin(), + endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) { + if (nextEdge != pev->begin()) + DEBUG(dbgs() << " :: "); + + BasicBlock* source = nextEdge->getSource(); + BasicBlock* target = nextEdge->getTarget(); + unsigned duplicateNumber = nextEdge->getDuplicateNumber(); + DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber + << "}--> " << target->getNameStr()); + + // Ensure all the referenced edges exist + // TODO: make this a separate function + if( !arrayMap.count(source) ) { + errs() << " error [" << F->getNameStr() << "()]: source '" + << source->getNameStr() + << "' does not exist in the array map.\n"; + } else if( !arrayMap[source].count(target) ) { + errs() << " error [" << F->getNameStr() << "()]: target '" + << target->getNameStr() + << "' does not exist in the array map.\n"; + } else if( !arrayMap[source][target].count(duplicateNumber) ) { + errs() << " error [" << F->getNameStr() << "()]: edge " + << source->getNameStr() << " -> " << target->getNameStr() + << " duplicate number " << duplicateNumber + << " does not exist in the array map.\n"; + } else { + edgeArray[arrayMap[source][target][duplicateNumber]] + += currentPath->getCount(); + } + } + + DEBUG(errs() << "\n"); + + delete pev; + } + } + + std::string errorInfo; + std::string filename = EdgeProfileFilename; + + // Open a handle to the file + FILE* edgeFile = fopen(filename.c_str(),"wb"); + + if (!edgeFile) { + errs() << "error: unable to open file '" << filename << "' for output.\n"; + return false; + } + + errs() << "Generating edge profile '" << filename << "' ...\n"; + + // write argument info + unsigned type = ArgumentInfo; + unsigned num = pathProfileInfo.argList.size(); + int zeros = 0; + + fwrite(&type,sizeof(unsigned),1,edgeFile); + fwrite(&num,sizeof(unsigned),1,edgeFile); + fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile); + if (num&3) + fwrite(&zeros, 1, 4-(num&3), edgeFile); + + type = EdgeInfo; + num = edgeArray.size(); + fwrite(&type,sizeof(unsigned),1,edgeFile); + fwrite(&num,sizeof(unsigned),1,edgeFile); + + // write each edge to the file + for( std::vector<unsigned>::iterator s = edgeArray.begin(), + e = edgeArray.end(); s != e; s++) + fwrite(&*s, sizeof (unsigned), 1, edgeFile); + + fclose (edgeFile); + + return true; +} diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp deleted file mode 100644 index 07f46824700a..000000000000 --- a/lib/Analysis/PointerTracking.cpp +++ /dev/null @@ -1,316 +0,0 @@ -//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements tracking of pointer bounds. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/PointerTracking.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Constants.h" -#include "llvm/Module.h" -#include "llvm/Value.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/InstIterator.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" -using namespace llvm; - -char PointerTracking::ID = 0; -PointerTracking::PointerTracking() : FunctionPass(ID) {} - -bool PointerTracking::runOnFunction(Function &F) { - predCache.clear(); - assert(analyzing.empty()); - FF = &F; - TD = getAnalysisIfAvailable<TargetData>(); - SE = &getAnalysis<ScalarEvolution>(); - LI = &getAnalysis<LoopInfo>(); - DT = &getAnalysis<DominatorTree>(); - return false; -} - -void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredTransitive<DominatorTree>(); - AU.addRequiredTransitive<LoopInfo>(); - AU.addRequiredTransitive<ScalarEvolution>(); - AU.setPreservesAll(); -} - -bool PointerTracking::doInitialization(Module &M) { - const Type *PTy = Type::getInt8PtrTy(M.getContext()); - - // Find calloc(i64, i64) or calloc(i32, i32). - callocFunc = M.getFunction("calloc"); - if (callocFunc) { - const FunctionType *Ty = callocFunc->getFunctionType(); - - std::vector<const Type*> args, args2; - args.push_back(Type::getInt64Ty(M.getContext())); - args.push_back(Type::getInt64Ty(M.getContext())); - args2.push_back(Type::getInt32Ty(M.getContext())); - args2.push_back(Type::getInt32Ty(M.getContext())); - const FunctionType *Calloc1Type = - FunctionType::get(PTy, args, false); - const FunctionType *Calloc2Type = - FunctionType::get(PTy, args2, false); - if (Ty != Calloc1Type && Ty != Calloc2Type) - callocFunc = 0; // Give up - } - - // Find realloc(i8*, i64) or realloc(i8*, i32). - reallocFunc = M.getFunction("realloc"); - if (reallocFunc) { - const FunctionType *Ty = reallocFunc->getFunctionType(); - std::vector<const Type*> args, args2; - args.push_back(PTy); - args.push_back(Type::getInt64Ty(M.getContext())); - args2.push_back(PTy); - args2.push_back(Type::getInt32Ty(M.getContext())); - - const FunctionType *Realloc1Type = - FunctionType::get(PTy, args, false); - const FunctionType *Realloc2Type = - FunctionType::get(PTy, args2, false); - if (Ty != Realloc1Type && Ty != Realloc2Type) - reallocFunc = 0; // Give up - } - return false; -} - -// Calculates the number of elements allocated for pointer P, -// the type of the element is stored in Ty. -const SCEV *PointerTracking::computeAllocationCount(Value *P, - const Type *&Ty) const { - Value *V = P->stripPointerCasts(); - if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - Value *arraySize = AI->getArraySize(); - Ty = AI->getAllocatedType(); - // arraySize elements of type Ty. - return SE->getSCEV(arraySize); - } - - if (CallInst *CI = extractMallocCall(V)) { - Value *arraySize = getMallocArraySize(CI, TD); - const Type* AllocTy = getMallocAllocatedType(CI); - if (!AllocTy || !arraySize) return SE->getCouldNotCompute(); - Ty = AllocTy; - // arraySize elements of type Ty. - return SE->getSCEV(arraySize); - } - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { - if (GV->hasDefinitiveInitializer()) { - Constant *C = GV->getInitializer(); - if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { - Ty = ATy->getElementType(); - return SE->getConstant(Type::getInt32Ty(P->getContext()), - ATy->getNumElements()); - } - } - Ty = GV->getType(); - return SE->getConstant(Type::getInt32Ty(P->getContext()), 1); - //TODO: implement more tracking for globals - } - - if (CallInst *CI = dyn_cast<CallInst>(V)) { - CallSite CS(CI); - Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); - const Loop *L = LI->getLoopFor(CI->getParent()); - if (F == callocFunc) { - Ty = Type::getInt8Ty(P->getContext()); - // calloc allocates arg0*arg1 bytes. - return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)), - SE->getSCEV(CS.getArgument(1))), - L); - } else if (F == reallocFunc) { - Ty = Type::getInt8Ty(P->getContext()); - // realloc allocates arg1 bytes. - return SE->getSCEVAtScope(CS.getArgument(1), L); - } - } - - return SE->getCouldNotCompute(); -} - -Value *PointerTracking::computeAllocationCountValue(Value *P, const Type *&Ty) const -{ - Value *V = P->stripPointerCasts(); - if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - Ty = AI->getAllocatedType(); - // arraySize elements of type Ty. - return AI->getArraySize(); - } - - if (CallInst *CI = extractMallocCall(V)) { - Ty = getMallocAllocatedType(CI); - if (!Ty) - return 0; - Value *arraySize = getMallocArraySize(CI, TD); - if (!arraySize) { - Ty = Type::getInt8Ty(P->getContext()); - return CI->getArgOperand(0); - } - // arraySize elements of type Ty. - return arraySize; - } - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { - if (GV->hasDefinitiveInitializer()) { - Constant *C = GV->getInitializer(); - if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { - Ty = ATy->getElementType(); - return ConstantInt::get(Type::getInt32Ty(P->getContext()), - ATy->getNumElements()); - } - } - Ty = cast<PointerType>(GV->getType())->getElementType(); - return ConstantInt::get(Type::getInt32Ty(P->getContext()), 1); - //TODO: implement more tracking for globals - } - - if (CallInst *CI = dyn_cast<CallInst>(V)) { - CallSite CS(CI); - Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); - if (F == reallocFunc) { - Ty = Type::getInt8Ty(P->getContext()); - // realloc allocates arg1 bytes. - return CS.getArgument(1); - } - } - - return 0; -} - -// Calculates the number of elements of type Ty allocated for P. -const SCEV *PointerTracking::computeAllocationCountForType(Value *P, - const Type *Ty) - const { - const Type *elementTy; - const SCEV *Count = computeAllocationCount(P, elementTy); - if (isa<SCEVCouldNotCompute>(Count)) - return Count; - if (elementTy == Ty) - return Count; - - if (!TD) // need TargetData from this point forward - return SE->getCouldNotCompute(); - - uint64_t elementSize = TD->getTypeAllocSize(elementTy); - uint64_t wantSize = TD->getTypeAllocSize(Ty); - if (elementSize == wantSize) - return Count; - if (elementSize % wantSize) //fractional counts not possible - return SE->getCouldNotCompute(); - return SE->getMulExpr(Count, SE->getConstant(Count->getType(), - elementSize/wantSize)); -} - -const SCEV *PointerTracking::getAllocationElementCount(Value *V) const { - // We only deal with pointers. - const PointerType *PTy = cast<PointerType>(V->getType()); - return computeAllocationCountForType(V, PTy->getElementType()); -} - -const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const { - return computeAllocationCountForType(V, Type::getInt8Ty(V->getContext())); -} - -// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too -enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L, - Predicate Pred, - const SCEV *A, - const SCEV *B) const { - if (SE->isLoopEntryGuardedByCond(L, Pred, A, B)) - return AlwaysTrue; - Pred = ICmpInst::getSwappedPredicate(Pred); - if (SE->isLoopEntryGuardedByCond(L, Pred, B, A)) - return AlwaysTrue; - - Pred = ICmpInst::getInversePredicate(Pred); - if (SE->isLoopEntryGuardedByCond(L, Pred, B, A)) - return AlwaysFalse; - Pred = ICmpInst::getSwappedPredicate(Pred); - if (SE->isLoopEntryGuardedByCond(L, Pred, A, B)) - return AlwaysTrue; - return Unknown; -} - -enum SolverResult PointerTracking::checkLimits(const SCEV *Offset, - const SCEV *Limit, - BasicBlock *BB) -{ - //FIXME: merge implementation - return Unknown; -} - -void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base, - const SCEV *&Limit, - const SCEV *&Offset) const -{ - Pointer = Pointer->stripPointerCasts(); - Base = Pointer->getUnderlyingObject(); - Limit = getAllocationSizeInBytes(Base); - if (isa<SCEVCouldNotCompute>(Limit)) { - Base = 0; - Offset = Limit; - return; - } - - Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base)); - if (isa<SCEVCouldNotCompute>(Offset)) { - Base = 0; - Limit = Offset; - } -} - -void PointerTracking::print(raw_ostream &OS, const Module* M) const { - // Calling some PT methods may cause caches to be updated, however - // this should be safe for the same reason its safe for SCEV. - PointerTracking &PT = *const_cast<PointerTracking*>(this); - for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) { - if (!I->getType()->isPointerTy()) - continue; - Value *Base; - const SCEV *Limit, *Offset; - getPointerOffset(&*I, Base, Limit, Offset); - if (!Base) - continue; - - if (Base == &*I) { - const SCEV *S = getAllocationElementCount(Base); - OS << *Base << " ==> " << *S << " elements, "; - OS << *Limit << " bytes allocated\n"; - continue; - } - OS << &*I << " -- base: " << *Base; - OS << " offset: " << *Offset; - - enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent()); - switch (res) { - case AlwaysTrue: - OS << " always safe\n"; - break; - case AlwaysFalse: - OS << " always unsafe\n"; - break; - case Unknown: - OS << " <<unknown>>\n"; - break; - } - } -} - -INITIALIZE_PASS(PointerTracking, "pointertracking", - "Track pointer bounds", false, true); diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index cbe8d1867e4f..3f0deab9ea87 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Debug.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Analysis/DominatorInternals.h" using namespace llvm; @@ -29,7 +30,7 @@ using namespace llvm; char PostDominatorTree::ID = 0; char PostDominanceFrontier::ID = 0; INITIALIZE_PASS(PostDominatorTree, "postdomtree", - "Post-Dominator Tree Construction", true, true); + "Post-Dominator Tree Construction", true, true) bool PostDominatorTree::runOnFunction(Function &F) { DT->recalculate(F); @@ -53,8 +54,11 @@ FunctionPass* llvm::createPostDomTree() { // PostDominanceFrontier Implementation //===----------------------------------------------------------------------===// -INITIALIZE_PASS(PostDominanceFrontier, "postdomfrontier", - "Post-Dominance Frontier Construction", true, true); +INITIALIZE_PASS_BEGIN(PostDominanceFrontier, "postdomfrontier", + "Post-Dominance Frontier Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_END(PostDominanceFrontier, "postdomfrontier", + "Post-Dominance Frontier Construction", true, true) const DominanceFrontier::DomSetType & PostDominanceFrontier::calculate(const PostDominatorTree &DT, diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index ecc0a1845307..667ee1cc348a 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -39,7 +39,8 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo explicit ProfileEstimatorPass(const double execcount = 0) - : FunctionPass(ID), ExecCount(execcount) { + : FunctionPass(ID), ExecCount(execcount) { + initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry()); if (execcount == 0) ExecCount = LoopWeight; } @@ -72,8 +73,11 @@ namespace { } // End of anonymous namespace char ProfileEstimatorPass::ID = 0; -INITIALIZE_AG_PASS(ProfileEstimatorPass, ProfileInfo, "profile-estimator", - "Estimate profiling information", false, true, false); +INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator", + "Estimate profiling information", false, true, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator", + "Estimate profiling information", false, true, false) namespace llvm { char &ProfileEstimatorPassID = ProfileEstimatorPass::ID; @@ -319,6 +323,7 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) { FunctionInformation.erase(&F); BlockInformation[&F].clear(); EdgeInformation[&F].clear(); + BBToVisit.clear(); // Mark all blocks as to visit. for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi) diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index fc7f28662c01..36f211e858d2 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -24,8 +24,12 @@ #include <limits> using namespace llvm; +namespace llvm { + template<> char ProfileInfoT<Function,BasicBlock>::ID = 0; +} + // Register the ProfileInfo interface, providing a nice name to refer to. -static RegisterAnalysisGroup<ProfileInfo> Z("Profile Information"); +INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo) namespace llvm { @@ -44,9 +48,6 @@ ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() { } template<> -char ProfileInfoT<Function,BasicBlock>::ID = 0; - -template<> char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0; template<> @@ -888,7 +889,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { FI = Unvisited.begin(), FE = Unvisited.end(); while(FI != FE && !FoundPath) { const BasicBlock *BB = *FI; ++FI; - const BasicBlock *Dest; + const BasicBlock *Dest = 0; Path P; bool BackEdgeFound = false; for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); @@ -1076,7 +1077,9 @@ raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, con namespace { struct NoProfileInfo : public ImmutablePass, public ProfileInfo { static char ID; // Class identification, replacement for typeinfo - NoProfileInfo() : ImmutablePass(ID) {} + NoProfileInfo() : ImmutablePass(ID) { + initializeNoProfileInfoPass(*PassRegistry::getPassRegistry()); + } /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it @@ -1097,6 +1100,6 @@ namespace { char NoProfileInfo::ID = 0; // Register this pass... INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile", - "No Profile Information", false, true, true); + "No Profile Information", false, true, true) ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); } diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index d325b574e848..098079bcffc4 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -46,6 +46,7 @@ namespace { static char ID; // Class identification, replacement for typeinfo explicit LoaderPass(const std::string &filename = "") : ModulePass(ID), Filename(filename) { + initializeLoaderPassPass(*PassRegistry::getPassRegistry()); if (filename.empty()) Filename = ProfileInfoFilename; } @@ -80,7 +81,7 @@ namespace { char LoaderPass::ID = 0; INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader", - "Load profile information from llvmprof.out", false, true, false); + "Load profile information from llvmprof.out", false, true, false) char &llvm::ProfileLoaderPassID = LoaderPass::ID; diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp index 3f01b2d592bc..a01751849c51 100644 --- a/lib/Analysis/ProfileVerifierPass.cpp +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -60,10 +60,12 @@ namespace llvm { static char ID; // Class identification, replacement for typeinfo explicit ProfileVerifierPassT () : FunctionPass(ID) { + initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry()); DisableAssertions = ProfileVerifierDisableAssertions; } explicit ProfileVerifierPassT (bool da) : FunctionPass(ID), DisableAssertions(da) { + initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const { @@ -287,7 +289,7 @@ namespace llvm { i != ie; ++i) { if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { FType *F = CI->getCalledFunction(); - if (F && (F->getNameStr() == "_setjmp")) { + if (F && (F->getName() == "_setjmp")) { isSetJmpTarget = true; break; } } @@ -366,8 +368,11 @@ namespace llvm { char ProfileVerifierPassT<FType, BType>::ID = 0; } -INITIALIZE_PASS(ProfileVerifierPass, "profile-verifier", - "Verify profiling information", false, true); +INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier", + "Verify profiling information", false, true) +INITIALIZE_AG_DEPENDENCY(ProfileInfo) +INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier", + "Verify profiling information", false, true) namespace llvm { FunctionPass *createProfileVerifierPass() { diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index abc057a773a9..e2f6a8bf5d9a 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -16,8 +16,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Assembly/Writer.h" #define DEBUG_TYPE "region" #include "llvm/Support/Debug.h" @@ -45,7 +45,7 @@ STATISTIC(numSimpleRegions, "The # of simple regions"); /// PrintStyle - Print region in difference ways. enum PrintStyle { PrintNone, PrintBB, PrintRN }; -cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden, +static cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden, cl::desc("style of printing regions"), cl::values( clEnumValN(PrintNone, "none", "print no details"), @@ -72,6 +72,15 @@ Region::~Region() { delete *I; } +void Region::replaceEntry(BasicBlock *BB) { + entry.setPointer(BB); +} + +void Region::replaceExit(BasicBlock *BB) { + assert(exit && "No exit to replace!"); + exit = BB; +} + bool Region::contains(const BasicBlock *B) const { BasicBlock *BB = const_cast<BasicBlock*>(B); @@ -125,41 +134,49 @@ Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const { return outermostLoopInRegion(L); } -bool Region::isSimple() const { - bool isSimple = true; - bool found = false; - - BasicBlock *entry = getEntry(), *exit = getExit(); - - // TopLevelRegion - if (!exit) - return false; +BasicBlock *Region::getEnteringBlock() const { + BasicBlock *entry = getEntry(); + BasicBlock *Pred; + BasicBlock *enteringBlock = 0; for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE; ++PI) { - BasicBlock *Pred = *PI; + Pred = *PI; if (DT->getNode(Pred) && !contains(Pred)) { - if (found) { - isSimple = false; - break; - } - found = true; + if (enteringBlock) + return 0; + + enteringBlock = Pred; } } - found = false; + return enteringBlock; +} + +BasicBlock *Region::getExitingBlock() const { + BasicBlock *exit = getExit(); + BasicBlock *Pred; + BasicBlock *exitingBlock = 0; + + if (!exit) + return 0; for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE; - ++PI) - if (contains(*PI)) { - if (found) { - isSimple = false; - break; - } - found = true; + ++PI) { + Pred = *PI; + if (contains(Pred)) { + if (exitingBlock) + return 0; + + exitingBlock = Pred; } + } - return isSimple; + return exitingBlock; +} + +bool Region::isSimple() const { + return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock(); } std::string Region::getNameStr() const { @@ -311,13 +328,38 @@ void Region::transferChildrenTo(Region *To) { children.clear(); } -void Region::addSubRegion(Region *SubRegion) { +void Region::addSubRegion(Region *SubRegion, bool moveChildren) { assert(SubRegion->parent == 0 && "SubRegion already has a parent!"); + assert(std::find(begin(), end(), SubRegion) == children.end() + && "Subregion already exists!"); + SubRegion->parent = this; - // Set up the region node. - assert(std::find(children.begin(), children.end(), SubRegion) == children.end() - && "Node already exist!"); children.push_back(SubRegion); + + if (!moveChildren) + return; + + assert(SubRegion->children.size() == 0 + && "SubRegions that contain children are not supported"); + + for (element_iterator I = element_begin(), E = element_end(); I != E; ++I) + if (!(*I)->isSubRegion()) { + BasicBlock *BB = (*I)->getNodeAs<BasicBlock>(); + + if (SubRegion->contains(BB)) + RI->setRegionFor(BB, SubRegion); + } + + std::vector<Region*> Keep; + for (iterator I = begin(), E = end(); I != E; ++I) + if (SubRegion->contains(*I) && *I != SubRegion) { + SubRegion->children.push_back(*I); + (*I)->parent = SubRegion; + } else + Keep.push_back(*I); + + children.clear(); + children.insert(children.begin(), Keep.begin(), Keep.end()); } @@ -339,6 +381,38 @@ unsigned Region::getDepth() const { return Depth; } +Region *Region::getExpandedRegion() const { + unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors(); + + if (NumSuccessors == 0) + return NULL; + + for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); + PI != PE; ++PI) + if (!DT->dominates(getEntry(), *PI)) + return NULL; + + Region *R = RI->getRegionFor(exit); + + if (R->getEntry() != exit) { + if (exit->getTerminator()->getNumSuccessors() == 1) + return new Region(getEntry(), *succ_begin(exit), RI, DT); + else + return NULL; + } + + while (R->getParent() && R->getParent()->getEntry() == exit) + R = R->getParent(); + + if (!DT->dominates(getEntry(), R->getExit())) + for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); + PI != PE; ++PI) + if (!DT->dominates(R->getExit(), *PI)) + return NULL; + + return new Region(getEntry(), R->getExit(), RI, DT); +} + void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const { if (print_tree) OS.indent(level*2) << "[" << level << "] " << getNameStr(); @@ -376,6 +450,11 @@ void Region::dump() const { } void Region::clearNodeCache() { + // Free the cached nodes. + for (BBNodeMapT::iterator I = BBNodeMap.begin(), + IE = BBNodeMap.end(); I != IE; ++I) + delete I->second; + BBNodeMap.clear(); for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI) (*RI)->clearNodeCache(); @@ -592,6 +671,7 @@ void RegionInfo::releaseMemory() { } RegionInfo::RegionInfo() : FunctionPass(ID) { + initializeRegionInfoPass(*PassRegistry::getPassRegistry()); TopLevelRegion = 0; } @@ -654,11 +734,14 @@ Region *RegionInfo::getRegionFor(BasicBlock *BB) const { return I != BBtoRegion.end() ? I->second : 0; } +void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) { + BBtoRegion[BB] = R; +} + Region *RegionInfo::operator[](BasicBlock *BB) const { return getRegionFor(BB); } - BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const { BasicBlock *Exit = NULL; @@ -733,9 +816,28 @@ RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const { return ret; } +void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB) +{ + Region *R = getRegionFor(OldBB); + + setRegionFor(NewBB, R); + + while (R->getEntry() == OldBB && !R->isTopLevelRegion()) { + R->replaceEntry(NewBB); + R = R->getParent(); + } + + setRegionFor(OldBB, R); +} + char RegionInfo::ID = 0; -INITIALIZE_PASS(RegionInfo, "regions", - "Detect single entry single exit regions", true, true); +INITIALIZE_PASS_BEGIN(RegionInfo, "regions", + "Detect single entry single exit regions", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominanceFrontier) +INITIALIZE_PASS_END(RegionInfo, "regions", + "Detect single entry single exit regions", true, true) // Create methods available outside of this file, to use them // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp new file mode 100644 index 000000000000..3269dcc63d5e --- /dev/null +++ b/lib/Analysis/RegionPass.cpp @@ -0,0 +1,275 @@ +//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements RegionPass and RGPassManager. All region optimization +// and transformation passes are derived from RegionPass. RGPassManager is +// responsible for managing RegionPasses. +// most of these codes are COPY from LoopPass.cpp +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/RegionPass.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Support/Timer.h" + +#define DEBUG_TYPE "regionpassmgr" +#include "llvm/Support/Debug.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// RGPassManager +// + +char RGPassManager::ID = 0; + +RGPassManager::RGPassManager(int Depth) + : FunctionPass(ID), PMDataManager(Depth) { + skipThisRegion = false; + redoThisRegion = false; + RI = NULL; + CurrentRegion = NULL; +} + +// Recurse through all subregions and all regions into RQ. +static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) { + RQ.push_back(R); + for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I) + addRegionIntoQueue(*I, RQ); +} + +/// Pass Manager itself does not invalidate any analysis info. +void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const { + Info.addRequired<RegionInfo>(); + Info.setPreservesAll(); +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the function, and if so, return true. +bool RGPassManager::runOnFunction(Function &F) { + RI = &getAnalysis<RegionInfo>(); + bool Changed = false; + + // Collect inherited analysis from Module level pass manager. + populateInheritedAnalysis(TPM->activeStack); + + addRegionIntoQueue(RI->getTopLevelRegion(), RQ); + + if (RQ.empty()) // No regions, skip calling finalizers + return false; + + // Initialization + for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end(); + I != E; ++I) { + Region *R = *I; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + RegionPass *RP = (RegionPass *)getContainedPass(Index); + Changed |= RP->doInitialization(R, *this); + } + } + + // Walk Regions + while (!RQ.empty()) { + + CurrentRegion = RQ.back(); + skipThisRegion = false; + redoThisRegion = false; + + // Run all passes on the current Region. + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + RegionPass *P = (RegionPass*)getContainedPass(Index); + + dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG, + CurrentRegion->getNameStr()); + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + { + PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry()); + + TimeRegion PassTimer(getPassTimer(P)); + Changed |= P->runOnRegion(CurrentRegion, *this); + } + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG, + skipThisRegion ? "<deleted>" : + CurrentRegion->getNameStr()); + dumpPreservedSet(P); + + if (!skipThisRegion) { + // Manually check that this region is still healthy. This is done + // instead of relying on RegionInfo::verifyRegion since RegionInfo + // is a function pass and it's really expensive to verify every + // Region in the function every time. That level of checking can be + // enabled with the -verify-region-info option. + { + TimeRegion PassTimer(getPassTimer(P)); + CurrentRegion->verifyRegion(); + } + + // Then call the regular verifyAnalysis functions. + verifyPreservedAnalysis(P); + } + + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, + skipThisRegion ? "<deleted>" : + CurrentRegion->getNameStr(), + ON_REGION_MSG); + + if (skipThisRegion) + // Do not run other passes on this region. + break; + } + + // If the region was deleted, release all the region passes. This frees up + // some memory, and avoids trouble with the pass manager trying to call + // verifyAnalysis on them. + if (skipThisRegion) + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + freePass(P, "<deleted>", ON_REGION_MSG); + } + + // Pop the region from queue after running all passes. + RQ.pop_back(); + + if (redoThisRegion) + RQ.push_back(CurrentRegion); + + // Free all region nodes created in region passes. + RI->clearNodeCache(); + } + + // Finalization + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + RegionPass *P = (RegionPass*)getContainedPass(Index); + Changed |= P->doFinalization(); + } + + // Print the region tree after all pass. + DEBUG( + dbgs() << "\nRegion tree of function " << F.getName() + << " after all region Pass:\n"; + RI->dump(); + dbgs() << "\n"; + ); + + return Changed; +} + +/// Print passes managed by this manager +void RGPassManager::dumpPassStructure(unsigned Offset) { + errs().indent(Offset*2) << "Region Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } +} + +namespace { +//===----------------------------------------------------------------------===// +// PrintRegionPass +class PrintRegionPass : public RegionPass { +private: + std::string Banner; + raw_ostream &Out; // raw_ostream to print on. + +public: + static char ID; + PrintRegionPass() : RegionPass(ID), Out(dbgs()) {} + PrintRegionPass(const std::string &B, raw_ostream &o) + : RegionPass(ID), Banner(B), Out(o) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual bool runOnRegion(Region *R, RGPassManager &RGM) { + Out << Banner; + for (Region::block_iterator I = R->block_begin(), E = R->block_end(); + I != E; ++I) + (*I)->getEntry()->print(Out); + + return false; + } +}; + +char PrintRegionPass::ID = 0; +} //end anonymous namespace + +//===----------------------------------------------------------------------===// +// RegionPass + +// Check if this pass is suitable for the current RGPassManager, if +// available. This pass P is not suitable for a RGPassManager if P +// is not preserving higher level analysis info used by other +// RGPassManager passes. In such case, pop RGPassManager from the +// stack. This will force assignPassManager() to create new +// LPPassManger as expected. +void RegionPass::preparePassManager(PMStack &PMS) { + + // Find RGPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_RegionPassManager) + PMS.pop(); + + + // If this pass is destroying high level information that is used + // by other passes that are managed by LPM then do not insert + // this pass in current LPM. Use new RGPassManager. + if (PMS.top()->getPassManagerType() == PMT_RegionPassManager && + !PMS.top()->preserveHigherLevelAnalysis(this)) + PMS.pop(); +} + +/// Assign pass manager to manage this pass. +void RegionPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find RGPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_RegionPassManager) + PMS.pop(); + + RGPassManager *RGPM; + + // Create new Region Pass Manager if it does not exist. + if (PMS.top()->getPassManagerType() == PMT_RegionPassManager) + RGPM = (RGPassManager*)PMS.top(); + else { + + assert (!PMS.empty() && "Unable to create Region Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Call Graph Pass Manager + RGPM = new RGPassManager(PMD->getDepth() + 1); + RGPM->populateInheritedAnalysis(PMS); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(RGPM); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + TPM->schedulePass(RGPM); + + // [4] Push new manager into PMS + PMS.push(RGPM); + } + + RGPM->add(this); +} + +/// Get the printer pass +Pass *RegionPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return new PrintRegionPass(Banner, O); +} diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index fee5c1bae976..0cf0f9050504 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -121,35 +121,41 @@ namespace { struct RegionViewer : public DOTGraphTraitsViewer<RegionInfo, false> { static char ID; - RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){} + RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){ + initializeRegionViewerPass(*PassRegistry::getPassRegistry()); + } }; - char RegionViewer::ID = 0; -INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function", - true, true); struct RegionOnlyViewer : public DOTGraphTraitsViewer<RegionInfo, true> { static char ID; - RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID){} + RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) { + initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry()); + } }; - char RegionOnlyViewer::ID = 0; -INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", - "View regions of function (with no function bodies)", - true, true); struct RegionPrinter : public DOTGraphTraitsPrinter<RegionInfo, false> { static char ID; RegionPrinter() : - DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {} + DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) { + initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); + } }; +char RegionPrinter::ID = 0; } //end anonymous namespace -char RegionPrinter::ID = 0; INITIALIZE_PASS(RegionPrinter, "dot-regions", - "Print regions of function to 'dot' file", true, true); + "Print regions of function to 'dot' file", true, true) + +INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function", + true, true) + +INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", + "View regions of function (with no function bodies)", + true, true) namespace { @@ -157,7 +163,9 @@ struct RegionOnlyPrinter : public DOTGraphTraitsPrinter<RegionInfo, true> { static char ID; RegionOnlyPrinter() : - DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {} + DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) { + initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } }; } @@ -166,7 +174,7 @@ char RegionOnlyPrinter::ID = 0; INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only", "Print regions of function to 'dot' file " "(with no function bodies)", - true, true); + true, true) FunctionPass* llvm::createRegionViewerPass() { return new RegionViewer(); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index b892d85f9f4a..62244ccb3a03 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -69,6 +69,7 @@ #include "llvm/Operator.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" @@ -103,8 +104,12 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, "derived loop"), cl::init(100)); -INITIALIZE_PASS(ScalarEvolution, "scalar-evolution", - "Scalar Evolution Analysis", false, true); +INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", + "Scalar Evolution Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", + "Scalar Evolution Analysis", false, true) char ScalarEvolution::ID = 0; //===----------------------------------------------------------------------===// @@ -115,13 +120,139 @@ char ScalarEvolution::ID = 0; // Implementation of the SCEV class. // -SCEV::~SCEV() {} - void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } +void SCEV::print(raw_ostream &OS) const { + switch (getSCEVType()) { + case scConstant: + WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false); + return; + case scTruncate: { + const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this); + const SCEV *Op = Trunc->getOperand(); + OS << "(trunc " << *Op->getType() << " " << *Op << " to " + << *Trunc->getType() << ")"; + return; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this); + const SCEV *Op = ZExt->getOperand(); + OS << "(zext " << *Op->getType() << " " << *Op << " to " + << *ZExt->getType() << ")"; + return; + } + case scSignExtend: { + const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this); + const SCEV *Op = SExt->getOperand(); + OS << "(sext " << *Op->getType() << " " << *Op << " to " + << *SExt->getType() << ")"; + return; + } + case scAddRecExpr: { + const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this); + OS << "{" << *AR->getOperand(0); + for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i) + OS << ",+," << *AR->getOperand(i); + OS << "}<"; + if (AR->hasNoUnsignedWrap()) + OS << "nuw><"; + if (AR->hasNoSignedWrap()) + OS << "nsw><"; + WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false); + OS << ">"; + return; + } + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this); + const char *OpStr = 0; + switch (NAry->getSCEVType()) { + case scAddExpr: OpStr = " + "; break; + case scMulExpr: OpStr = " * "; break; + case scUMaxExpr: OpStr = " umax "; break; + case scSMaxExpr: OpStr = " smax "; break; + } + OS << "("; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + OS << **I; + if (llvm::next(I) != E) + OS << OpStr; + } + OS << ")"; + return; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this); + OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")"; + return; + } + case scUnknown: { + const SCEVUnknown *U = cast<SCEVUnknown>(this); + const Type *AllocTy; + if (U->isSizeOf(AllocTy)) { + OS << "sizeof(" << *AllocTy << ")"; + return; + } + if (U->isAlignOf(AllocTy)) { + OS << "alignof(" << *AllocTy << ")"; + return; + } + + const Type *CTy; + Constant *FieldNo; + if (U->isOffsetOf(CTy, FieldNo)) { + OS << "offsetof(" << *CTy << ", "; + WriteAsOperand(OS, FieldNo, false); + OS << ")"; + return; + } + + // Otherwise just print it normally. + WriteAsOperand(OS, U->getValue(), false); + return; + } + case scCouldNotCompute: + OS << "***COULDNOTCOMPUTE***"; + return; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); +} + +const Type *SCEV::getType() const { + switch (getSCEVType()) { + case scConstant: + return cast<SCEVConstant>(this)->getType(); + case scTruncate: + case scZeroExtend: + case scSignExtend: + return cast<SCEVCastExpr>(this)->getType(); + case scAddRecExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + return cast<SCEVNAryExpr>(this)->getType(); + case scAddExpr: + return cast<SCEVAddExpr>(this)->getType(); + case scUDivExpr: + return cast<SCEVUDivExpr>(this)->getType(); + case scUnknown: + return cast<SCEVUnknown>(this)->getType(); + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return 0; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); + return 0; +} + bool SCEV::isZero() const { if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) return SC->getValue()->isZero(); @@ -143,30 +274,6 @@ bool SCEV::isAllOnesValue() const { SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} -bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const { - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return false; -} - -const Type *SCEVCouldNotCompute::getType() const { - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return 0; -} - -bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const { - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return false; -} - -bool SCEVCouldNotCompute::hasOperand(const SCEV *) const { - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return false; -} - -void SCEVCouldNotCompute::print(raw_ostream &OS) const { - OS << "***COULDNOTCOMPUTE***"; -} - bool SCEVCouldNotCompute::classof(const SCEV *S) { return S->getSCEVType() == scCouldNotCompute; } @@ -192,24 +299,10 @@ ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { return getConstant(ConstantInt::get(ITy, V, isSigned)); } -const Type *SCEVConstant::getType() const { return V->getType(); } - -void SCEVConstant::print(raw_ostream &OS) const { - WriteAsOperand(OS, V, false); -} - SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, unsigned SCEVTy, const SCEV *op, const Type *ty) : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} -bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { - return Op->dominates(BB, DT); -} - -bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { - return Op->properlyDominates(BB, DT); -} - SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scTruncate, op, ty) { @@ -218,10 +311,6 @@ SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, "Cannot truncate non-integer value!"); } -void SCEVTruncateExpr::print(raw_ostream &OS) const { - OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; -} - SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scZeroExtend, op, ty) { @@ -230,10 +319,6 @@ SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, "Cannot zero extend non-integer value!"); } -void SCEVZeroExtendExpr::print(raw_ostream &OS) const { - OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; -} - SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scSignExtend, op, ty) { @@ -242,139 +327,9 @@ SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, "Cannot sign extend non-integer value!"); } -void SCEVSignExtendExpr::print(raw_ostream &OS) const { - OS << "(sext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; -} - -void SCEVCommutativeExpr::print(raw_ostream &OS) const { - const char *OpStr = getOperationStr(); - OS << "("; - for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { - OS << **I; - if (llvm::next(I) != E) - OS << OpStr; - } - OS << ")"; -} - -bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { - for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) - if (!(*I)->dominates(BB, DT)) - return false; - return true; -} - -bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { - for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) - if (!(*I)->properlyDominates(BB, DT)) - return false; - return true; -} - -bool SCEVNAryExpr::isLoopInvariant(const Loop *L) const { - for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) - if (!(*I)->isLoopInvariant(L)) - return false; - return true; -} - -// hasComputableLoopEvolution - N-ary expressions have computable loop -// evolutions iff they have at least one operand that varies with the loop, -// but that all varying operands are computable. -bool SCEVNAryExpr::hasComputableLoopEvolution(const Loop *L) const { - bool HasVarying = false; - for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { - const SCEV *S = *I; - if (!S->isLoopInvariant(L)) { - if (S->hasComputableLoopEvolution(L)) - HasVarying = true; - else - return false; - } - } - return HasVarying; -} - -bool SCEVNAryExpr::hasOperand(const SCEV *O) const { - for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { - const SCEV *S = *I; - if (O == S || S->hasOperand(O)) - return true; - } - return false; -} - -bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { - return LHS->dominates(BB, DT) && RHS->dominates(BB, DT); -} - -bool SCEVUDivExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { - return LHS->properlyDominates(BB, DT) && RHS->properlyDominates(BB, DT); -} - -void SCEVUDivExpr::print(raw_ostream &OS) const { - OS << "(" << *LHS << " /u " << *RHS << ")"; -} - -const Type *SCEVUDivExpr::getType() const { - // In most cases the types of LHS and RHS will be the same, but in some - // crazy cases one or the other may be a pointer. ScalarEvolution doesn't - // depend on the type for correctness, but handling types carefully can - // avoid extra casts in the SCEVExpander. The LHS is more likely to be - // a pointer type than the RHS, so use the RHS' type here. - return RHS->getType(); -} - -bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const { - // Add recurrences are never invariant in the function-body (null loop). - if (!QueryLoop) - return false; - - // This recurrence is variant w.r.t. QueryLoop if QueryLoop contains L. - if (QueryLoop->contains(L)) - return false; - - // This recurrence is invariant w.r.t. QueryLoop if L contains QueryLoop. - if (L->contains(QueryLoop)) - return true; - - // This recurrence is variant w.r.t. QueryLoop if any of its operands - // are variant. - for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) - if (!(*I)->isLoopInvariant(QueryLoop)) - return false; - - // Otherwise it's loop-invariant. - return true; -} - -bool -SCEVAddRecExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { - return DT->dominates(L->getHeader(), BB) && - SCEVNAryExpr::dominates(BB, DT); -} - -bool -SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { - // This uses a "dominates" query instead of "properly dominates" query because - // the instruction which produces the addrec's value is a PHI, and a PHI - // effectively properly dominates its entire containing block. - return DT->dominates(L->getHeader(), BB) && - SCEVNAryExpr::properlyDominates(BB, DT); -} - -void SCEVAddRecExpr::print(raw_ostream &OS) const { - OS << "{" << *Operands[0]; - for (unsigned i = 1, e = NumOperands; i != e; ++i) - OS << ",+," << *Operands[i]; - OS << "}<"; - WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); - OS << ">"; -} - void SCEVUnknown::deleted() { - // Clear this SCEVUnknown from ValuesAtScopes. - SE->ValuesAtScopes.erase(this); + // Clear this SCEVUnknown from various maps. + SE->forgetMemoizedResults(this); // Remove this SCEVUnknown from the uniquing map. SE->UniqueSCEVs.RemoveNode(this); @@ -384,8 +339,8 @@ void SCEVUnknown::deleted() { } void SCEVUnknown::allUsesReplacedWith(Value *New) { - // Clear this SCEVUnknown from ValuesAtScopes. - SE->ValuesAtScopes.erase(this); + // Clear this SCEVUnknown from various maps. + SE->forgetMemoizedResults(this); // Remove this SCEVUnknown from the uniquing map. SE->UniqueSCEVs.RemoveNode(this); @@ -396,32 +351,6 @@ void SCEVUnknown::allUsesReplacedWith(Value *New) { setValPtr(New); } -bool SCEVUnknown::isLoopInvariant(const Loop *L) const { - // All non-instruction values are loop invariant. All instructions are loop - // invariant if they are not contained in the specified loop. - // Instructions are never considered invariant in the function body - // (null loop) because they are defined within the "loop". - if (Instruction *I = dyn_cast<Instruction>(getValue())) - return L && !L->contains(I); - return true; -} - -bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const { - if (Instruction *I = dyn_cast<Instruction>(getValue())) - return DT->dominates(I->getParent(), BB); - return true; -} - -bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { - if (Instruction *I = dyn_cast<Instruction>(getValue())) - return DT->properlyDominates(I->getParent(), BB); - return true; -} - -const Type *SCEVUnknown::getType() const { - return getValue()->getType(); -} - bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) @@ -486,30 +415,6 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { return false; } -void SCEVUnknown::print(raw_ostream &OS) const { - const Type *AllocTy; - if (isSizeOf(AllocTy)) { - OS << "sizeof(" << *AllocTy << ")"; - return; - } - if (isAlignOf(AllocTy)) { - OS << "alignof(" << *AllocTy << ")"; - return; - } - - const Type *CTy; - Constant *FieldNo; - if (isOffsetOf(CTy, FieldNo)) { - OS << "offsetof(" << *CTy << ", "; - WriteAsOperand(OS, FieldNo, false); - OS << ")"; - return; - } - - // Otherwise just print it normally. - WriteAsOperand(OS, getValue(), false); -} - //===----------------------------------------------------------------------===// // SCEV Utilities //===----------------------------------------------------------------------===// @@ -914,6 +819,36 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) return getTruncateOrZeroExtend(SZ->getOperand(), Ty); + // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can + // eliminate all the truncates. + if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) { + SmallVector<const SCEV *, 4> Operands; + bool hasTrunc = false; + for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) { + const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty); + hasTrunc = isa<SCEVTruncateExpr>(S); + Operands.push_back(S); + } + if (!hasTrunc) + return getAddExpr(Operands, false, false); + UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + } + + // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can + // eliminate all the truncates. + if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) { + SmallVector<const SCEV *, 4> Operands; + bool hasTrunc = false; + for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) { + const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty); + hasTrunc = isa<SCEVTruncateExpr>(S); + Operands.push_back(S); + } + if (!hasTrunc) + return getMulExpr(Operands, false, false); + UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + } + // If the input value is a chrec scev, truncate the chrec's operands. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { SmallVector<const SCEV *, 4> Operands; @@ -965,6 +900,19 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // zext(trunc(x)) --> zext(x) or x or trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { + // It's possible the bits taken off by the truncate were all zero bits. If + // so, we should be able to simplify this further. + const SCEV *X = ST->getOperand(); + ConstantRange CR = getUnsignedRange(X); + unsigned TruncBits = getTypeSizeInBits(ST->getType()); + unsigned NewBits = getTypeSizeInBits(Ty); + if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( + CR.zextOrTrunc(NewBits))) + return getTruncateOrZeroExtend(X, Ty); + } + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can zero extend all of the // operands (often constants). This allows analysis of something like @@ -1089,6 +1037,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) return getSignExtendExpr(SS->getOperand(), Ty); + // sext(zext(x)) --> zext(x) + if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) + return getZeroExtendExpr(SZ->getOperand(), Ty); + // Before doing any expensive analysis, check to see if we've already // computed a SCEV for this Op and Ty. FoldingSetNodeID ID; @@ -1098,6 +1050,23 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // If the input value is provably positive, build a zext instead. + if (isKnownNonNegative(Op)) + return getZeroExtendExpr(Op, Ty); + + // sext(trunc(x)) --> sext(x) or x or trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { + // It's possible the bits taken off by the truncate were all sign bits. If + // so, we should be able to simplify this further. + const SCEV *X = ST->getOperand(); + ConstantRange CR = getSignedRange(X); + unsigned TruncBits = getTypeSizeInBits(ST->getType()); + unsigned NewBits = getTypeSizeInBits(Ty); + if (CR.truncate(TruncBits).signExtend(NewBits).contains( + CR.sextOrTrunc(NewBits))) + return getTruncateOrSignExtend(X, Ty); + } + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the // operands (often constants). This allows analysis of something like @@ -1639,7 +1608,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (Ops[i]->isLoopInvariant(AddRecLoop)) { + if (isLoopInvariant(Ops[i], AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -1711,7 +1680,6 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // already have one, otherwise create a new one. FoldingSetNodeID ID; ID.AddInteger(scAddExpr); - ID.AddInteger(Ops.size()); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = 0; @@ -1846,7 +1814,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (Ops[i]->isLoopInvariant(AddRecLoop)) { + if (isLoopInvariant(Ops[i], AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -1917,7 +1885,6 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // already have one, otherwise create a new one. FoldingSetNodeID ID; ID.AddInteger(scMulExpr); - ID.AddInteger(Ops.size()); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = 0; @@ -2066,6 +2033,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, for (unsigned i = 1, e = Operands.size(); i != e; ++i) assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && "SCEVAddRecExpr operand types don't match!"); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + assert(isLoopInvariant(Operands[i], L) && + "SCEVAddRecExpr operand is not loop-invariant!"); #endif if (Operands.back()->isZero()) { @@ -2106,7 +2076,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, // requirement. bool AllInvariant = true; for (unsigned i = 0, e = Operands.size(); i != e; ++i) - if (!Operands[i]->isLoopInvariant(L)) { + if (!isLoopInvariant(Operands[i], L)) { AllInvariant = false; break; } @@ -2114,7 +2084,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, NestedOperands[0] = getAddRecExpr(Operands, L); AllInvariant = true; for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i) - if (!NestedOperands[i]->isLoopInvariant(NestedLoop)) { + if (!isLoopInvariant(NestedOperands[i], NestedLoop)) { AllInvariant = false; break; } @@ -2131,7 +2101,6 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, // already have one, otherwise create a new one. FoldingSetNodeID ID; ID.AddInteger(scAddRecExpr); - ID.AddInteger(Operands.size()); for (unsigned i = 0, e = Operands.size(); i != e; ++i) ID.AddPointer(Operands[i]); ID.AddPointer(L); @@ -2242,7 +2211,6 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { // already have one, otherwise create a new one. FoldingSetNodeID ID; ID.AddInteger(scSMaxExpr); - ID.AddInteger(Ops.size()); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = 0; @@ -2347,7 +2315,6 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { // already have one, otherwise create a new one. FoldingSetNodeID ID; ID.AddInteger(scUMaxExpr); - ID.AddInteger(Ops.size()); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = 0; @@ -2543,24 +2510,24 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { return getMinusSCEV(AllOnes, V); } -/// getMinusSCEV - Return a SCEV corresponding to LHS - RHS. -/// -const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, - const SCEV *RHS) { +/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1, +/// and thus the HasNUW and HasNSW bits apply to the resultant add, not +/// whether the sub would have overflowed. +const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, + bool HasNUW, bool HasNSW) { // Fast path: X - X --> 0. if (LHS == RHS) return getConstant(LHS->getType(), 0); // X - Y --> X + -Y - return getAddExpr(LHS, getNegativeSCEV(RHS)); + return getAddExpr(LHS, getNegativeSCEV(RHS), HasNUW, HasNSW); } /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is zero /// extended. const SCEV * -ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, - const Type *Ty) { +ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && @@ -2714,9 +2681,11 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { + const SCEV *Old = It->second; + // Short-circuit the def-use traversal if the symbolic name // ceases to appear in expressions. - if (It->second != SymName && !It->second->hasOperand(SymName)) + if (Old != SymName && !hasOperand(Old, SymName)) continue; // SCEVUnknown for a PHI either means that it has an unrecognized @@ -2727,9 +2696,9 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { // updates on its own when it gets to that point. In the third, we do // want to forget the SCEVUnknown. if (!isa<PHINode>(I) || - !isa<SCEVUnknown>(It->second) || - (I != PN && It->second == SymName)) { - ValuesAtScopes.erase(It->second); + !isa<SCEVUnknown>(Old) || + (I != PN && Old == SymName)) { + forgetMemoizedResults(Old); ValueExprMap.erase(It); } } @@ -2801,7 +2770,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // This is not a valid addrec if the step amount is varying each // loop iteration, but is not itself an addrec in this loop. - if (Accum->isLoopInvariant(L) || + if (isLoopInvariant(Accum, L) || (isa<SCEVAddRecExpr>(Accum) && cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { bool HasNUW = false; @@ -2814,6 +2783,23 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { HasNUW = true; if (OBO->hasNoSignedWrap()) HasNSW = true; + } else if (const GEPOperator *GEP = + dyn_cast<GEPOperator>(BEValueV)) { + // If the increment is a GEP, then we know it won't perform a + // signed overflow, because the address space cannot be + // wrapped around. + // + // NOTE: This isn't strictly true, because you could have an + // object straddling the 2G address boundary in a 32-bit address + // space (for example). We really want to model this as a "has + // no signed/unsigned wrap" where the base pointer is treated as + // unsigned and the increment is known to not have signed + // wrapping. + // + // This is a highly theoretical concern though, and this is good + // enough for all cases we know of at this point. :) + // + HasNSW |= GEP->isInBounds(); } const SCEV *StartVal = getSCEV(StartValueV); @@ -2822,7 +2808,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // Since the no-wrap flags are on the increment, they apply to the // post-incremented value as well. - if (Accum->isLoopInvariant(L)) + if (isLoopInvariant(Accum, L)) (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, HasNUW, HasNSW); @@ -2867,17 +2853,9 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = PN->hasConstantValue(DT)) { - bool AllSameLoop = true; - Loop *PNLoop = LI->getLoopFor(PN->getParent()); - for (size_t i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (LI->getLoopFor(PN->getIncomingBlock(i)) != PNLoop) { - AllSameLoop = false; - break; - } - if (AllSameLoop) + if (Value *V = SimplifyInstruction(PN, TD, DT)) + if (LI->replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); - } // If it's not a loop phi, we can't handle it yet. return getUnknown(PN); @@ -2892,6 +2870,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { // Add expression, because the Instruction may be guarded by control flow // and the no-overflow bits may not be valid for the expression in any // context. + bool isInBounds = GEP->isInBounds(); const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); Value *Base = GEP->getOperand(0); @@ -2920,7 +2899,8 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); // Multiply the index by the element size to compute the element offset. - const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize); + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, /*NUW*/ false, + /*NSW*/ isInBounds); // Add the element offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, LocalOffset); @@ -2931,7 +2911,8 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { const SCEV *BaseS = getSCEV(Base); // Add the total offset from all the GEP indices to the base. - return getAddExpr(BaseS, TotalOffset); + return getAddExpr(BaseS, TotalOffset, /*NUW*/ false, + /*NSW*/ isInBounds); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -3019,9 +3000,13 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { /// ConstantRange ScalarEvolution::getUnsignedRange(const SCEV *S) { + // See if we've computed this range already. + DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S); + if (I != UnsignedRanges.end()) + return I->second; if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) - return ConstantRange(C->getValue()->getValue()); + return setUnsignedRange(C, ConstantRange(C->getValue()->getValue())); unsigned BitWidth = getTypeSizeInBits(S->getType()); ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); @@ -3038,49 +3023,52 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { ConstantRange X = getUnsignedRange(Add->getOperand(0)); for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) X = X.add(getUnsignedRange(Add->getOperand(i))); - return ConservativeResult.intersectWith(X); + return setUnsignedRange(Add, ConservativeResult.intersectWith(X)); } if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { ConstantRange X = getUnsignedRange(Mul->getOperand(0)); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) X = X.multiply(getUnsignedRange(Mul->getOperand(i))); - return ConservativeResult.intersectWith(X); + return setUnsignedRange(Mul, ConservativeResult.intersectWith(X)); } if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { ConstantRange X = getUnsignedRange(SMax->getOperand(0)); for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) X = X.smax(getUnsignedRange(SMax->getOperand(i))); - return ConservativeResult.intersectWith(X); + return setUnsignedRange(SMax, ConservativeResult.intersectWith(X)); } if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { ConstantRange X = getUnsignedRange(UMax->getOperand(0)); for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) X = X.umax(getUnsignedRange(UMax->getOperand(i))); - return ConservativeResult.intersectWith(X); + return setUnsignedRange(UMax, ConservativeResult.intersectWith(X)); } if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { ConstantRange X = getUnsignedRange(UDiv->getLHS()); ConstantRange Y = getUnsignedRange(UDiv->getRHS()); - return ConservativeResult.intersectWith(X.udiv(Y)); + return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y))); } if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { ConstantRange X = getUnsignedRange(ZExt->getOperand()); - return ConservativeResult.intersectWith(X.zeroExtend(BitWidth)); + return setUnsignedRange(ZExt, + ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); } if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { ConstantRange X = getUnsignedRange(SExt->getOperand()); - return ConservativeResult.intersectWith(X.signExtend(BitWidth)); + return setUnsignedRange(SExt, + ConservativeResult.intersectWith(X.signExtend(BitWidth))); } if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { ConstantRange X = getUnsignedRange(Trunc->getOperand()); - return ConservativeResult.intersectWith(X.truncate(BitWidth)); + return setUnsignedRange(Trunc, + ConservativeResult.intersectWith(X.truncate(BitWidth))); } if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { @@ -3120,19 +3108,20 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1); if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != ExtEndRange) - return ConservativeResult; + return setUnsignedRange(AddRec, ConservativeResult); APInt Min = APIntOps::umin(StartRange.getUnsignedMin(), EndRange.getUnsignedMin()); APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), EndRange.getUnsignedMax()); if (Min.isMinValue() && Max.isMaxValue()) - return ConservativeResult; - return ConservativeResult.intersectWith(ConstantRange(Min, Max+1)); + return setUnsignedRange(AddRec, ConservativeResult); + return setUnsignedRange(AddRec, + ConservativeResult.intersectWith(ConstantRange(Min, Max+1))); } } - return ConservativeResult; + return setUnsignedRange(AddRec, ConservativeResult); } if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { @@ -3141,20 +3130,25 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); if (Ones == ~Zeros + 1) - return ConservativeResult; - return ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); + return setUnsignedRange(U, ConservativeResult); + return setUnsignedRange(U, + ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1))); } - return ConservativeResult; + return setUnsignedRange(S, ConservativeResult); } /// getSignedRange - Determine the signed range for a particular SCEV. /// ConstantRange ScalarEvolution::getSignedRange(const SCEV *S) { + // See if we've computed this range already. + DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S); + if (I != SignedRanges.end()) + return I->second; if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) - return ConstantRange(C->getValue()->getValue()); + return setSignedRange(C, ConstantRange(C->getValue()->getValue())); unsigned BitWidth = getTypeSizeInBits(S->getType()); ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); @@ -3171,49 +3165,52 @@ ScalarEvolution::getSignedRange(const SCEV *S) { ConstantRange X = getSignedRange(Add->getOperand(0)); for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) X = X.add(getSignedRange(Add->getOperand(i))); - return ConservativeResult.intersectWith(X); + return setSignedRange(Add, ConservativeResult.intersectWith(X)); } if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { ConstantRange X = getSignedRange(Mul->getOperand(0)); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) X = X.multiply(getSignedRange(Mul->getOperand(i))); - return ConservativeResult.intersectWith(X); + return setSignedRange(Mul, ConservativeResult.intersectWith(X)); } if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { ConstantRange X = getSignedRange(SMax->getOperand(0)); for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) X = X.smax(getSignedRange(SMax->getOperand(i))); - return ConservativeResult.intersectWith(X); + return setSignedRange(SMax, ConservativeResult.intersectWith(X)); } if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { ConstantRange X = getSignedRange(UMax->getOperand(0)); for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) X = X.umax(getSignedRange(UMax->getOperand(i))); - return ConservativeResult.intersectWith(X); + return setSignedRange(UMax, ConservativeResult.intersectWith(X)); } if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { ConstantRange X = getSignedRange(UDiv->getLHS()); ConstantRange Y = getSignedRange(UDiv->getRHS()); - return ConservativeResult.intersectWith(X.udiv(Y)); + return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y))); } if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { ConstantRange X = getSignedRange(ZExt->getOperand()); - return ConservativeResult.intersectWith(X.zeroExtend(BitWidth)); + return setSignedRange(ZExt, + ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); } if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { ConstantRange X = getSignedRange(SExt->getOperand()); - return ConservativeResult.intersectWith(X.signExtend(BitWidth)); + return setSignedRange(SExt, + ConservativeResult.intersectWith(X.signExtend(BitWidth))); } if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { ConstantRange X = getSignedRange(Trunc->getOperand()); - return ConservativeResult.intersectWith(X.truncate(BitWidth)); + return setSignedRange(Trunc, + ConservativeResult.intersectWith(X.truncate(BitWidth))); } if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { @@ -3263,34 +3260,35 @@ ScalarEvolution::getSignedRange(const SCEV *S) { ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1); if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != ExtEndRange) - return ConservativeResult; + return setSignedRange(AddRec, ConservativeResult); APInt Min = APIntOps::smin(StartRange.getSignedMin(), EndRange.getSignedMin()); APInt Max = APIntOps::smax(StartRange.getSignedMax(), EndRange.getSignedMax()); if (Min.isMinSignedValue() && Max.isMaxSignedValue()) - return ConservativeResult; - return ConservativeResult.intersectWith(ConstantRange(Min, Max+1)); + return setSignedRange(AddRec, ConservativeResult); + return setSignedRange(AddRec, + ConservativeResult.intersectWith(ConstantRange(Min, Max+1))); } } - return ConservativeResult; + return setSignedRange(AddRec, ConservativeResult); } if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. if (!U->getValue()->getType()->isIntegerTy() && !TD) - return ConservativeResult; + return setSignedRange(U, ConservativeResult); unsigned NS = ComputeNumSignBits(U->getValue(), TD); if (NS == 1) - return ConservativeResult; - return ConservativeResult.intersectWith( + return setSignedRange(U, ConservativeResult); + return setSignedRange(U, ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), - APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)); + APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1))); } - return ConservativeResult; + return setSignedRange(S, ConservativeResult); } /// createSCEV - We know that there is no SCEV for the specified value. @@ -3458,8 +3456,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // If C is a single bit, it may be in the sign-bit position // before the zero-extend. In this case, represent the xor // using an add, which is equivalent, and re-apply the zext. - APInt Trunc = APInt(CI->getValue()).trunc(Z0TySize); - if (APInt(Trunc).zext(getTypeSizeInBits(UTy)) == CI->getValue() && + APInt Trunc = CI->getValue().trunc(Z0TySize); + if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && Trunc.isSignBit()) return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), UTy); @@ -3699,58 +3697,61 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // backedge-taken count, which could result in infinite recursion. std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); - if (Pair.second) { - BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L); - if (BECount.Exact != getCouldNotCompute()) { - assert(BECount.Exact->isLoopInvariant(L) && - BECount.Max->isLoopInvariant(L) && - "Computed backedge-taken count isn't loop invariant for loop!"); - ++NumTripCountsComputed; + if (!Pair.second) + return Pair.first->second; + BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L); + if (BECount.Exact != getCouldNotCompute()) { + assert(isLoopInvariant(BECount.Exact, L) && + isLoopInvariant(BECount.Max, L) && + "Computed backedge-taken count isn't loop invariant for loop!"); + ++NumTripCountsComputed; + + // Update the value in the map. + Pair.first->second = BECount; + } else { + if (BECount.Max != getCouldNotCompute()) // Update the value in the map. Pair.first->second = BECount; - } else { - if (BECount.Max != getCouldNotCompute()) - // Update the value in the map. - Pair.first->second = BECount; - if (isa<PHINode>(L->getHeader()->begin())) - // Only count loops that have phi nodes as not being computable. - ++NumTripCountsNotComputed; - } - - // Now that we know more about the trip count for this loop, forget any - // existing SCEV values for PHI nodes in this loop since they are only - // conservative estimates made without the benefit of trip count - // information. This is similar to the code in forgetLoop, except that - // it handles SCEVUnknown PHI nodes specially. - if (BECount.hasAnyInfo()) { - SmallVector<Instruction *, 16> Worklist; - PushLoopPHIs(L, Worklist); - - SmallPtrSet<Instruction *, 8> Visited; - while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - if (!Visited.insert(I)) continue; - - ValueExprMapType::iterator It = - ValueExprMap.find(static_cast<Value *>(I)); - if (It != ValueExprMap.end()) { - // SCEVUnknown for a PHI either means that it has an unrecognized - // structure, or it's a PHI that's in the progress of being computed - // by createNodeForPHI. In the former case, additional loop trip - // count information isn't going to change anything. In the later - // case, createNodeForPHI will perform the necessary updates on its - // own when it gets to that point. - if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) { - ValuesAtScopes.erase(It->second); - ValueExprMap.erase(It); - } - if (PHINode *PN = dyn_cast<PHINode>(I)) - ConstantEvolutionLoopExitValue.erase(PN); + if (isa<PHINode>(L->getHeader()->begin())) + // Only count loops that have phi nodes as not being computable. + ++NumTripCountsNotComputed; + } + + // Now that we know more about the trip count for this loop, forget any + // existing SCEV values for PHI nodes in this loop since they are only + // conservative estimates made without the benefit of trip count + // information. This is similar to the code in forgetLoop, except that + // it handles SCEVUnknown PHI nodes specially. + if (BECount.hasAnyInfo()) { + SmallVector<Instruction *, 16> Worklist; + PushLoopPHIs(L, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = + ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + const SCEV *Old = It->second; + + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, or it's a PHI that's in the progress of being computed + // by createNodeForPHI. In the former case, additional loop trip + // count information isn't going to change anything. In the later + // case, createNodeForPHI will perform the necessary updates on its + // own when it gets to that point. + if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) { + forgetMemoizedResults(Old); + ValueExprMap.erase(It); } - - PushDefUseChildren(I, Worklist); + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); } + + PushDefUseChildren(I, Worklist); } } return Pair.first->second; @@ -3774,7 +3775,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) { ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { - ValuesAtScopes.erase(It->second); + forgetMemoizedResults(It->second); ValueExprMap.erase(It); if (PHINode *PN = dyn_cast<PHINode>(I)) ConstantEvolutionLoopExitValue.erase(PN); @@ -3782,6 +3783,11 @@ void ScalarEvolution::forgetLoop(const Loop *L) { PushDefUseChildren(I, Worklist); } + + // Forget all contained loops too, to avoid dangling entries in the + // ValuesAtScopes map. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + forgetLoop(*I); } /// forgetValue - This method should be called by the client when it has @@ -3802,7 +3808,7 @@ void ScalarEvolution::forgetValue(Value *V) { ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { - ValuesAtScopes.erase(It->second); + forgetMemoizedResults(It->second); ValueExprMap.erase(It); if (PHINode *PN = dyn_cast<PHINode>(I)) ConstantEvolutionLoopExitValue.erase(PN); @@ -4016,6 +4022,105 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); } +static const SCEVAddRecExpr * +isSimpleUnwrappingAddRec(const SCEV *S, const Loop *L) { + const SCEVAddRecExpr *SA = dyn_cast<SCEVAddRecExpr>(S); + + // The SCEV must be an addrec of this loop. + if (!SA || SA->getLoop() != L || !SA->isAffine()) + return 0; + + // The SCEV must be known to not wrap in some way to be interesting. + if (!SA->hasNoUnsignedWrap() && !SA->hasNoSignedWrap()) + return 0; + + // The stride must be a constant so that we know if it is striding up or down. + if (!isa<SCEVConstant>(SA->getOperand(1))) + return 0; + return SA; +} + +/// getMinusSCEVForExitTest - When considering an exit test for a loop with a +/// "x != y" exit test, we turn this into a computation that evaluates x-y != 0, +/// and this function returns the expression to use for x-y. We know and take +/// advantage of the fact that this subtraction is only being used in a +/// comparison by zero context. +/// +static const SCEV *getMinusSCEVForExitTest(const SCEV *LHS, const SCEV *RHS, + const Loop *L, ScalarEvolution &SE) { + // If either LHS or RHS is an AddRec SCEV (of this loop) that is known to not + // wrap (either NSW or NUW), then we know that the value will either become + // the other one (and thus the loop terminates), that the loop will terminate + // through some other exit condition first, or that the loop has undefined + // behavior. This information is useful when the addrec has a stride that is + // != 1 or -1, because it means we can't "miss" the exit value. + // + // In any of these three cases, it is safe to turn the exit condition into a + // "counting down" AddRec (to zero) by subtracting the two inputs as normal, + // but since we know that the "end cannot be missed" we can force the + // resulting AddRec to be a NUW addrec. Since it is counting down, this means + // that the AddRec *cannot* pass zero. + + // See if LHS and RHS are addrec's we can handle. + const SCEVAddRecExpr *LHSA = isSimpleUnwrappingAddRec(LHS, L); + const SCEVAddRecExpr *RHSA = isSimpleUnwrappingAddRec(RHS, L); + + // If neither addrec is interesting, just return a minus. + if (RHSA == 0 && LHSA == 0) + return SE.getMinusSCEV(LHS, RHS); + + // If only one of LHS and RHS are an AddRec of this loop, make sure it is LHS. + if (RHSA && LHSA == 0) { + // Safe because a-b === b-a for comparisons against zero. + std::swap(LHS, RHS); + std::swap(LHSA, RHSA); + } + + // Handle the case when only one is advancing in a non-overflowing way. + if (RHSA == 0) { + // If RHS is loop varying, then we can't predict when LHS will cross it. + if (!SE.isLoopInvariant(RHS, L)) + return SE.getMinusSCEV(LHS, RHS); + + // If LHS has a positive stride, then we compute RHS-LHS, because the loop + // is counting up until it crosses RHS (which must be larger than LHS). If + // it is negative, we compute LHS-RHS because we're counting down to RHS. + const ConstantInt *Stride = + cast<SCEVConstant>(LHSA->getOperand(1))->getValue(); + if (Stride->getValue().isNegative()) + std::swap(LHS, RHS); + + return SE.getMinusSCEV(RHS, LHS, true /*HasNUW*/); + } + + // If both LHS and RHS are interesting, we have something like: + // a+i*4 != b+i*8. + const ConstantInt *LHSStride = + cast<SCEVConstant>(LHSA->getOperand(1))->getValue(); + const ConstantInt *RHSStride = + cast<SCEVConstant>(RHSA->getOperand(1))->getValue(); + + // If the strides are equal, then this is just a (complex) loop invariant + // comparison of a and b. + if (LHSStride == RHSStride) + return SE.getMinusSCEV(LHSA->getStart(), RHSA->getStart()); + + // If the signs of the strides differ, then the negative stride is counting + // down to the positive stride. + if (LHSStride->getValue().isNegative() != RHSStride->getValue().isNegative()){ + if (RHSStride->getValue().isNegative()) + std::swap(LHS, RHS); + } else { + // If LHS's stride is smaller than RHS's stride, then "b" must be less than + // "a" and "b" is RHS is counting up (catching up) to LHS. This is true + // whether the strides are positive or negative. + if (RHSStride->getValue().slt(LHSStride->getValue())) + std::swap(LHS, RHS); + } + + return SE.getMinusSCEV(LHS, RHS, true /*HasNUW*/); +} + /// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. @@ -4050,7 +4155,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, // At this point, we would like to compute how many iterations of the // loop the predicate will return true for these inputs. - if (LHS->isLoopInvariant(L) && !RHS->isLoopInvariant(L)) { + if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) { // If there is a loop-invariant, force it into the RHS. std::swap(LHS, RHS); Cond = ICmpInst::getSwappedPredicate(Cond); @@ -4075,7 +4180,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L); + BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEVForExitTest(LHS, RHS, L, + *this), L); if (BTI.hasAnyInfo()) return BTI; break; } @@ -4212,7 +4318,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( // We can only recognize very limited forms of loop index expressions, in // particular, only affine AddRec's like {C1,+,C2}. const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx); - if (!IdxExpr || !IdxExpr->isAffine() || IdxExpr->isLoopInvariant(L) || + if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) || !isa<SCEVConstant>(IdxExpr->getOperand(0)) || !isa<SCEVConstant>(IdxExpr->getOperand(1))) return getCouldNotCompute(); @@ -4686,7 +4792,7 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, // bit width during computations. APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D APInt Mod(BW + 1, 0); - Mod.set(BW - Mult2); // Mod = N / D + Mod.setBit(BW - Mult2); // Mod = N / D APInt I = AD.multiplicativeInverse(Mod); // 4. Compute the minimum unsigned root of the equation: @@ -4778,58 +4884,26 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { if (!AddRec || AddRec->getLoop() != L) return getCouldNotCompute(); - if (AddRec->isAffine()) { - // If this is an affine expression, the execution count of this branch is - // the minimum unsigned root of the following equation: - // - // Start + Step*N = 0 (mod 2^BW) - // - // equivalent to: - // - // Step*N = -Start (mod 2^BW) - // - // where BW is the common bit width of Start and Step. - - // Get the initial value for the loop. - const SCEV *Start = getSCEVAtScope(AddRec->getStart(), - L->getParentLoop()); - const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), - L->getParentLoop()); - - if (const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step)) { - // For now we handle only constant steps. - - // First, handle unitary steps. - if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so: - return getNegativeSCEV(Start); // N = -Start (as unsigned) - if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so: - return Start; // N = Start (as unsigned) - - // Then, try to solve the above equation provided that Start is constant. - if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) - return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), - -StartC->getValue()->getValue(), - *this); - } - } else if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { - // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of - // the quadratic equation to solve it. - std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec, - *this); + // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of + // the quadratic equation to solve it. + if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { + std::pair<const SCEV *,const SCEV *> Roots = + SolveQuadraticEquation(AddRec, *this); const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); - if (R1) { + if (R1 && R2) { #if 0 dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1 << " sol#2: " << *R2 << "\n"; #endif // Pick the smallest positive root value. if (ConstantInt *CB = - dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, - R1->getValue(), R2->getValue()))) { + dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT, + R1->getValue(), + R2->getValue()))) { if (CB->getZExtValue() == false) std::swap(R1, R2); // R1 is the minimum root now. - + // We can only use this value if the chrec ends up with an exact zero // value at this index. When solving for "X*X != 5", for example, we // should not accept a root of 2. @@ -4838,8 +4912,54 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { return R1; // We found a quadratic root! } } + return getCouldNotCompute(); } + // Otherwise we can only handle this if it is affine. + if (!AddRec->isAffine()) + return getCouldNotCompute(); + + // If this is an affine expression, the execution count of this branch is + // the minimum unsigned root of the following equation: + // + // Start + Step*N = 0 (mod 2^BW) + // + // equivalent to: + // + // Step*N = -Start (mod 2^BW) + // + // where BW is the common bit width of Start and Step. + + // Get the initial value for the loop. + const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); + const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); + + // If the AddRec is NUW, then (in an unsigned sense) it cannot be counting up + // to wrap to 0, it must be counting down to equal 0. Also, while counting + // down, it cannot "miss" 0 (which would cause it to wrap), regardless of what + // the stride is. As such, NUW addrec's will always become zero in + // "start / -stride" steps, and we know that the division is exact. + if (AddRec->hasNoUnsignedWrap()) + // FIXME: We really want an "isexact" bit for udiv. + return getUDivExpr(Start, getNegativeSCEV(Step)); + + // For now we handle only constant steps. + const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); + if (StepC == 0) + return getCouldNotCompute(); + + // First, handle unitary steps. + if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so: + return getNegativeSCEV(Start); // N = -Start (as unsigned) + + if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so: + return Start; // N = Start (as unsigned) + + // Then, try to solve the above equation provided that Start is constant. + if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) + return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), + -StartC->getValue()->getValue(), + *this); return getCouldNotCompute(); } @@ -4939,7 +5059,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, // as both operands could be addrecs loop-invariant in each other's loop. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) { const Loop *L = AR->getLoop(); - if (LHS->isLoopInvariant(L) && LHS->properlyDominates(L->getHeader(), DT)) { + if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) { std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); Changed = true; @@ -5159,13 +5279,13 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, trivially_true: // Return 0 == 0. - LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0); + LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); Pred = ICmpInst::ICMP_EQ; return true; trivially_false: // Return 0 != 0. - LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0); + LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); Pred = ICmpInst::ICMP_NE; return true; } @@ -5556,7 +5676,7 @@ ScalarEvolution::BackedgeTakenInfo ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned) { // Only handle: "ADDREC < LoopInvariant". - if (!RHS->isLoopInvariant(L)) return getCouldNotCompute(); + if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS); if (!AddRec || AddRec->getLoop() != L) @@ -5836,6 +5956,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) ScalarEvolution::ScalarEvolution() : FunctionPass(ID), FirstUnknown(0) { + initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } bool ScalarEvolution::runOnFunction(Function &F) { @@ -5857,6 +5978,10 @@ void ScalarEvolution::releaseMemory() { BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); ValuesAtScopes.clear(); + LoopDispositions.clear(); + BlockDispositions.clear(); + UnsignedRanges.clear(); + SignedRanges.clear(); UniqueSCEVs.clear(); SCEVAllocator.Reset(); } @@ -5936,7 +6061,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { if (L) { OS << "\t\t" "Exits: "; const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); - if (!ExitValue->isLoopInvariant(L)) { + if (!SE.isLoopInvariant(ExitValue, L)) { OS << "<<Unknown>>"; } else { OS << *ExitValue; @@ -5953,3 +6078,240 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { PrintLoopInfo(OS, &SE, *I); } +ScalarEvolution::LoopDisposition +ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { + std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S]; + std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair = + Values.insert(std::make_pair(L, LoopVariant)); + if (!Pair.second) + return Pair.first->second; + + LoopDisposition D = computeLoopDisposition(S, L); + return LoopDispositions[S][L] = D; +} + +ScalarEvolution::LoopDisposition +ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { + switch (S->getSCEVType()) { + case scConstant: + return LoopInvariant; + case scTruncate: + case scZeroExtend: + case scSignExtend: + return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L); + case scAddRecExpr: { + const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); + + // If L is the addrec's loop, it's computable. + if (AR->getLoop() == L) + return LoopComputable; + + // Add recurrences are never invariant in the function-body (null loop). + if (!L) + return LoopVariant; + + // This recurrence is variant w.r.t. L if L contains AR's loop. + if (L->contains(AR->getLoop())) + return LoopVariant; + + // This recurrence is invariant w.r.t. L if AR's loop contains L. + if (AR->getLoop()->contains(L)) + return LoopInvariant; + + // This recurrence is variant w.r.t. L if any of its operands + // are variant. + for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) + if (!isLoopInvariant(*I, L)) + return LoopVariant; + + // Otherwise it's loop-invariant. + return LoopInvariant; + } + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); + bool HasVarying = false; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + LoopDisposition D = getLoopDisposition(*I, L); + if (D == LoopVariant) + return LoopVariant; + if (D == LoopComputable) + HasVarying = true; + } + return HasVarying ? LoopComputable : LoopInvariant; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); + LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L); + if (LD == LoopVariant) + return LoopVariant; + LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L); + if (RD == LoopVariant) + return LoopVariant; + return (LD == LoopInvariant && RD == LoopInvariant) ? + LoopInvariant : LoopComputable; + } + case scUnknown: + // All non-instruction values are loop invariant. All instructions are loop + // invariant if they are not contained in the specified loop. + // Instructions are never considered invariant in the function body + // (null loop) because they are defined within the "loop". + if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) + return (L && !L->contains(I)) ? LoopInvariant : LoopVariant; + return LoopInvariant; + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return LoopVariant; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); + return LoopVariant; +} + +bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { + return getLoopDisposition(S, L) == LoopInvariant; +} + +bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { + return getLoopDisposition(S, L) == LoopComputable; +} + +ScalarEvolution::BlockDisposition +ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { + std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S]; + std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool> + Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock)); + if (!Pair.second) + return Pair.first->second; + + BlockDisposition D = computeBlockDisposition(S, BB); + return BlockDispositions[S][BB] = D; +} + +ScalarEvolution::BlockDisposition +ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { + switch (S->getSCEVType()) { + case scConstant: + return ProperlyDominatesBlock; + case scTruncate: + case scZeroExtend: + case scSignExtend: + return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB); + case scAddRecExpr: { + // This uses a "dominates" query instead of "properly dominates" query + // to test for proper dominance too, because the instruction which + // produces the addrec's value is a PHI, and a PHI effectively properly + // dominates its entire containing block. + const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); + if (!DT->dominates(AR->getLoop()->getHeader(), BB)) + return DoesNotDominateBlock; + } + // FALL THROUGH into SCEVNAryExpr handling. + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); + bool Proper = true; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + BlockDisposition D = getBlockDisposition(*I, BB); + if (D == DoesNotDominateBlock) + return DoesNotDominateBlock; + if (D == DominatesBlock) + Proper = false; + } + return Proper ? ProperlyDominatesBlock : DominatesBlock; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); + const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); + BlockDisposition LD = getBlockDisposition(LHS, BB); + if (LD == DoesNotDominateBlock) + return DoesNotDominateBlock; + BlockDisposition RD = getBlockDisposition(RHS, BB); + if (RD == DoesNotDominateBlock) + return DoesNotDominateBlock; + return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ? + ProperlyDominatesBlock : DominatesBlock; + } + case scUnknown: + if (Instruction *I = + dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) { + if (I->getParent() == BB) + return DominatesBlock; + if (DT->properlyDominates(I->getParent(), BB)) + return ProperlyDominatesBlock; + return DoesNotDominateBlock; + } + return ProperlyDominatesBlock; + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return DoesNotDominateBlock; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); + return DoesNotDominateBlock; +} + +bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { + return getBlockDisposition(S, BB) >= DominatesBlock; +} + +bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { + return getBlockDisposition(S, BB) == ProperlyDominatesBlock; +} + +bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { + switch (S->getSCEVType()) { + case scConstant: + return false; + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *Cast = cast<SCEVCastExpr>(S); + const SCEV *CastOp = Cast->getOperand(); + return Op == CastOp || hasOperand(CastOp, Op); + } + case scAddRecExpr: + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + const SCEV *NAryOp = *I; + if (NAryOp == Op || hasOperand(NAryOp, Op)) + return true; + } + return false; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); + const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); + return LHS == Op || hasOperand(LHS, Op) || + RHS == Op || hasOperand(RHS, Op); + } + case scUnknown: + return false; + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return false; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); + return false; +} + +void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { + ValuesAtScopes.erase(S); + LoopDispositions.erase(S); + BlockDispositions.erase(S); + UnsignedRanges.erase(S); + SignedRanges.erase(S); +} diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 93b2a8b06fbe..e9edb3e083de 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -34,7 +34,10 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {} + ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) { + initializeScalarEvolutionAliasAnalysisPass( + *PassRegistry::getPassRegistry()); + } /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it @@ -49,8 +52,7 @@ namespace { private: virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual bool runOnFunction(Function &F); - virtual AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size); + virtual AliasResult alias(const Location &LocA, const Location &LocB); Value *GetBaseValue(const SCEV *S); }; @@ -58,8 +60,11 @@ namespace { // Register this pass... char ScalarEvolutionAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", - "ScalarEvolution-based Alias Analysis", false, true, false); +INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true, false) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true, false) FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() { return new ScalarEvolutionAliasAnalysis(); @@ -101,17 +106,17 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { } AliasAnalysis::AliasResult -ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, - const Value *B, unsigned BSize) { +ScalarEvolutionAliasAnalysis::alias(const Location &LocA, + const Location &LocB) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. This allows the code below to ignore this special // case. - if (ASize == 0 || BSize == 0) + if (LocA.Size == 0 || LocB.Size == 0) return NoAlias; // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! - const SCEV *AS = SE->getSCEV(const_cast<Value *>(A)); - const SCEV *BS = SE->getSCEV(const_cast<Value *>(B)); + const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr)); + const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr)); // If they evaluate to the same expression, it's a MustAlias. if (AS == BS) return MustAlias; @@ -121,8 +126,8 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, if (SE->getEffectiveSCEVType(AS->getType()) == SE->getEffectiveSCEVType(BS->getType())) { unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); - APInt ASizeInt(BitWidth, ASize); - APInt BSizeInt(BitWidth, BSize); + APInt ASizeInt(BitWidth, LocA.Size); + APInt BSizeInt(BitWidth, LocB.Size); // Compute the difference between the two pointers. const SCEV *BA = SE->getMinusSCEV(BS, AS); @@ -154,11 +159,15 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, // inttoptr and ptrtoint operators. Value *AO = GetBaseValue(AS); Value *BO = GetBaseValue(BS); - if ((AO && AO != A) || (BO && BO != B)) - if (alias(AO ? AO : A, AO ? UnknownSize : ASize, - BO ? BO : B, BO ? UnknownSize : BSize) == NoAlias) + if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr)) + if (alias(Location(AO ? AO : LocA.Ptr, + AO ? +UnknownSize : LocA.Size, + AO ? 0 : LocA.TBAATag), + Location(BO ? BO : LocB.Ptr, + BO ? +UnknownSize : LocB.Size, + BO ? 0 : LocB.TBAATag)) == NoAlias) return NoAlias; // Forward the query to the next analysis. - return AliasAnalysis::alias(A, ASize, B, BSize); + return AliasAnalysis::alias(LocA, LocB); } diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 66a06aeac43c..b7c110f28cf9 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -608,15 +608,22 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, return A; // Arbitrarily break the tie. } -/// GetRelevantLoop - Get the most relevant loop associated with the given +/// getRelevantLoop - Get the most relevant loop associated with the given /// expression, according to PickMostRelevantLoop. -static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI, - DominatorTree &DT) { +const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { + // Test whether we've already computed the most relevant loop for this SCEV. + std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair = + RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0))); + if (!Pair.second) + return Pair.first->second; + if (isa<SCEVConstant>(S)) + // A constant has no relevant loops. return 0; if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) - return LI.getLoopFor(I->getParent()); + return Pair.first->second = SE.LI->getLoopFor(I->getParent()); + // A non-instruction has no relevant loops. return 0; } if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { @@ -625,16 +632,22 @@ static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI, L = AR->getLoop(); for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) - L = PickMostRelevantLoop(L, GetRelevantLoop(*I, LI, DT), DT); - return L; + L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT); + return RelevantLoops[N] = L; + } + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { + const Loop *Result = getRelevantLoop(C->getOperand()); + return RelevantLoops[C] = Result; + } + if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { + const Loop *Result = + PickMostRelevantLoop(getRelevantLoop(D->getLHS()), + getRelevantLoop(D->getRHS()), + *SE.DT); + return RelevantLoops[D] = Result; } - if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) - return GetRelevantLoop(C->getOperand(), LI, DT); - if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) - return PickMostRelevantLoop(GetRelevantLoop(D->getLHS(), LI, DT), - GetRelevantLoop(D->getRHS(), LI, DT), - DT); llvm_unreachable("Unexpected SCEV type!"); + return 0; } namespace { @@ -682,8 +695,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), E(S->op_begin()); I != E; ++I) - OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT), - *I)); + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); // Sort by loop. Use a stable sort so that constants follow non-constants and // pointer operands precede non-pointer operands. @@ -752,8 +764,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), E(S->op_begin()); I != E; ++I) - OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT), - *I)); + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); // Sort by loop. Use a stable sort so that constants follow non-constants. std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); @@ -990,7 +1001,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Strip off any non-loop-dominating component from the addrec start. const SCEV *Start = Normalized->getStart(); const SCEV *PostLoopOffset = 0; - if (!Start->properlyDominates(L->getHeader(), SE.DT)) { + if (!SE.properlyDominates(Start, L->getHeader())) { PostLoopOffset = Start; Start = SE.getConstant(Normalized->getType(), 0); Normalized = @@ -1002,7 +1013,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Strip off any non-loop-dominating component from the addrec step. const SCEV *Step = Normalized->getStepRecurrence(SE); const SCEV *PostLoopScale = 0; - if (!Step->dominates(L->getHeader(), SE.DT)) { + if (!SE.dominates(Step, L->getHeader())) { PostLoopScale = Step; Step = SE.getConstant(Normalized->getType(), 1); Normalized = @@ -1278,7 +1289,7 @@ Value *SCEVExpander::expand(const SCEV *S) { Instruction *InsertPt = Builder.GetInsertPoint(); for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; L = L->getParentLoop()) - if (S->isLoopInvariant(L)) { + if (SE.isLoopInvariant(S, L)) { if (!L) break; if (BasicBlock *Preheader = L->getLoopPreheader()) InsertPt = Preheader->getTerminator(); @@ -1286,7 +1297,7 @@ Value *SCEVExpander::expand(const SCEV *S) { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. - if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L)) + if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) InsertPt = L->getHeader()->getFirstNonPHI(); while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index bbfdcec3f9b4..40e18ab2fbfa 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -12,29 +12,65 @@ // // In LLVM IR, memory does not have types, so LLVM's own type system is not // suitable for doing TBAA. Instead, metadata is added to the IR to describe -// a type system of a higher level language. +// a type system of a higher level language. This can be used to implement +// typical C/C++ TBAA, but it can also be used to implement custom alias +// analysis behavior for other languages. // -// This pass is language-independent. The type system is encoded in -// metadata. This allows this pass to support typical C and C++ TBAA, but -// it can also support custom aliasing behavior for other languages. +// The current metadata format is very simple. TBAA MDNodes have up to +// three fields, e.g.: +// !0 = metadata !{ metadata !"an example type tree" } +// !1 = metadata !{ metadata !"int", metadata !0 } +// !2 = metadata !{ metadata !"float", metadata !0 } +// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 } // -// This is a work-in-progress. It doesn't work yet, and the metadata -// format isn't stable. +// The first field is an identity field. It can be any value, usually +// an MDString, which uniquely identifies the type. The most important +// name in the tree is the name of the root node. Two trees with +// different root node names are entirely disjoint, even if they +// have leaves with common names. // -// TODO: getModRefBehavior. The AliasAnalysis infrastructure will need to -// be extended. -// TODO: AA chaining -// TODO: struct fields +// The second field identifies the type's parent node in the tree, or +// is null or omitted for a root node. A type is considered to alias +// all of its decendents and all of its ancestors in the tree. Also, +// a type is considered to alias all types in other trees, so that +// bitcode produced from multiple front-ends is handled conservatively. +// +// If the third field is present, it's an integer which if equal to 1 +// indicates that the type is "constant" (meaning pointsToConstantMemory +// should return true; see +// http://llvm.org/docs/AliasAnalysis.html#OtherItfs). +// +// TODO: The current metadata format doesn't support struct +// fields. For example: +// struct X { +// double d; +// int i; +// }; +// void foo(struct X *x, struct X *y, double *p) { +// *x = *y; +// *p = 0.0; +// } +// Struct X has a double member, so the store to *x can alias the store to *p. +// Currently it's not possible to precisely describe all the things struct X +// aliases, so struct assignments must use conservative TBAA nodes. There's +// no scheme for attaching metadata to @llvm.memcpy yet either. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Metadata.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +// A handy option for disabling TBAA functionality. The same effect can also be +// achieved by stripping the !tbaa tags from IR, but this option is sometimes +// more convenient. +static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); + namespace { /// TBAANode - This is a simple wrapper around an MDNode which provides a /// higher-level interface by hiding the details of how alias analysis @@ -44,16 +80,16 @@ namespace { public: TBAANode() : Node(0) {} - explicit TBAANode(MDNode *N) : Node(N) {} + explicit TBAANode(const MDNode *N) : Node(N) {} /// getNode - Get the MDNode for this TBAANode. const MDNode *getNode() const { return Node; } - /// getParent - Get this TBAANode's Alias DAG parent. + /// getParent - Get this TBAANode's Alias tree parent. TBAANode getParent() const { if (Node->getNumOperands() < 2) return TBAANode(); - MDNode *P = dyn_cast<MDNode>(Node->getOperand(1)); + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); if (!P) return TBAANode(); // Ok, this node has a valid parent. Return it. @@ -69,8 +105,7 @@ namespace { ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2)); if (!CI) return false; - // TODO: Think about the encoding. - return CI->isOne(); + return CI->getValue()[0]; } }; } @@ -82,7 +117,13 @@ namespace { public AliasAnalysis { public: static char ID; // Class identification, replacement for typeinfo - TypeBasedAliasAnalysis() : ImmutablePass(ID) {} + TypeBasedAliasAnalysis() : ImmutablePass(ID) { + initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + InitializeAliasAnalysis(this); + } /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it @@ -94,18 +135,25 @@ namespace { return this; } + bool Aliases(const MDNode *A, const MDNode *B) const; + private: virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size); - virtual bool pointsToConstantMemory(const Value *P); + virtual AliasResult alias(const Location &LocA, const Location &LocB); + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + virtual ModRefBehavior getModRefBehavior(const Function *F); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2); }; } // End of anonymous namespace // Register this pass... char TypeBasedAliasAnalysis::ID = 0; INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", - "Type-Based Alias Analysis", false, true, false); + "Type-Based Alias Analysis", false, true, false) ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { return new TypeBasedAliasAnalysis(); @@ -117,34 +165,19 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AliasAnalysis::getAnalysisUsage(AU); } -AliasAnalysis::AliasResult -TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize, - const Value *B, unsigned BSize) { - // Currently, metadata can only be attached to Instructions. - const Instruction *AI = dyn_cast<Instruction>(A); - if (!AI) return MayAlias; - const Instruction *BI = dyn_cast<Instruction>(B); - if (!BI) return MayAlias; - - // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must - // be conservative. - MDNode *AM = - AI->getMetadata(AI->getParent()->getParent()->getParent() - ->getMDKindID("tbaa")); - if (!AM) return MayAlias; - MDNode *BM = - BI->getMetadata(BI->getParent()->getParent()->getParent() - ->getMDKindID("tbaa")); - if (!BM) return MayAlias; - +/// Aliases - Test whether the type represented by A may alias the +/// type represented by B. +bool +TypeBasedAliasAnalysis::Aliases(const MDNode *A, + const MDNode *B) const { // Keep track of the root node for A and B. TBAANode RootA, RootB; - // Climb the DAG from A to see if we reach B. - for (TBAANode T(AM); ; ) { - if (T.getNode() == BM) + // Climb the tree from A to see if we reach B. + for (TBAANode T(A); ; ) { + if (T.getNode() == B) // B is an ancestor of A. - return MayAlias; + return true; RootA = T; T = T.getParent(); @@ -152,11 +185,11 @@ TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize, break; } - // Climb the DAG from B to see if we reach A. - for (TBAANode T(BM); ; ) { - if (T.getNode() == AM) + // Climb the tree from B to see if we reach A. + for (TBAANode T(B); ; ) { + if (T.getNode() == A) // A is an ancestor of B. - return MayAlias; + return true; RootB = T; T = T.getParent(); @@ -166,26 +199,101 @@ TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize, // Neither node is an ancestor of the other. - // If they have the same root, then we've proved there's no alias. - if (RootA.getNode() == RootB.getNode()) - return NoAlias; - // If they have different roots, they're part of different potentially // unrelated type systems, so we must be conservative. - return MayAlias; + if (RootA.getNode() != RootB.getNode()) + return true; + + // If they have the same root, then we've proved there's no alias. + return false; +} + +AliasAnalysis::AliasResult +TypeBasedAliasAnalysis::alias(const Location &LocA, + const Location &LocB) { + if (!EnableTBAA) + return AliasAnalysis::alias(LocA, LocB); + + // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must + // be conservative. + const MDNode *AM = LocA.TBAATag; + if (!AM) return AliasAnalysis::alias(LocA, LocB); + const MDNode *BM = LocB.TBAATag; + if (!BM) return AliasAnalysis::alias(LocA, LocB); + + // If they may alias, chain to the next AliasAnalysis. + if (Aliases(AM, BM)) + return AliasAnalysis::alias(LocA, LocB); + + // Otherwise return a definitive result. + return NoAlias; } -bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Value *P) { - // Currently, metadata can only be attached to Instructions. - const Instruction *I = dyn_cast<Instruction>(P); - if (!I) return false; +bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + if (!EnableTBAA) + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); - MDNode *M = - I->getMetadata(I->getParent()->getParent()->getParent() - ->getMDKindID("tbaa")); - if (!M) return false; + const MDNode *M = Loc.TBAATag; + if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); // If this is an "immutable" type, we can assume the pointer is pointing // to constant memory. - return TBAANode(M).TypeIsImmutable(); + if (TBAANode(M).TypeIsImmutable()) + return true; + + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +} + +AliasAnalysis::ModRefBehavior +TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + if (!EnableTBAA) + return AliasAnalysis::getModRefBehavior(CS); + + ModRefBehavior Min = UnknownModRefBehavior; + + // If this is an "immutable" type, we can assume the call doesn't write + // to memory. + if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (TBAANode(M).TypeIsImmutable()) + Min = OnlyReadsMemory; + + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); +} + +AliasAnalysis::ModRefBehavior +TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) { + // Functions don't have metadata. Just chain to the next implementation. + return AliasAnalysis::getModRefBehavior(F); +} + +AliasAnalysis::ModRefResult +TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + if (!EnableTBAA) + return AliasAnalysis::getModRefInfo(CS, Loc); + + if (const MDNode *L = Loc.TBAATag) + if (const MDNode *M = + CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (!Aliases(L, M)) + return NoModRef; + + return AliasAnalysis::getModRefInfo(CS, Loc); +} + +AliasAnalysis::ModRefResult +TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + if (!EnableTBAA) + return AliasAnalysis::getModRefInfo(CS1, CS2); + + if (const MDNode *M1 = + CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (const MDNode *M2 = + CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (!Aliases(M1, M2)) + return NoModRef; + + return AliasAnalysis::getModRefInfo(CS1, CS2); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 181c9b01980c..1060bc5349e4 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/GlobalVariable.h" @@ -23,9 +24,22 @@ #include "llvm/Target/TargetData.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/PatternMatch.h" #include "llvm/ADT/SmallPtrSet.h" #include <cstring> using namespace llvm; +using namespace llvm::PatternMatch; + +const unsigned MaxDepth = 6; + +/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if +/// unknown returns 0). For vector types, returns the element type's bitwidth. +static unsigned getBitWidth(const Type *Ty, const TargetData *TD) { + if (unsigned BitWidth = Ty->getScalarSizeInBits()) + return BitWidth; + assert(isa<PointerType>(Ty) && "Expected a pointer type!"); + return TD ? TD->getPointerSizeInBits() : 0; +} /// ComputeMaskedBits - Determine which of the bits specified in Mask are /// known to be either zero or one and return them in the KnownZero/KnownOne @@ -46,7 +60,6 @@ using namespace llvm; void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const TargetData *TD, unsigned Depth) { - const unsigned MaxDepth = 6; assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Mask.getBitWidth(); @@ -69,14 +82,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Null and aggregate-zero are all-zeros. if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { - KnownOne.clear(); + KnownOne.clearAllBits(); KnownZero = Mask; return; } // Handle a constant vector by taking the intersection of the known bits of // each element. if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { - KnownZero.set(); KnownOne.set(); + KnownZero.setAllBits(); KnownOne.setAllBits(); for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2, @@ -103,15 +116,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero = Mask & APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); else - KnownZero.clear(); - KnownOne.clear(); + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); return; } // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has // the bits of its aliasee. if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { if (GA->mayBeOverridden()) { - KnownZero.clear(); KnownOne.clear(); + KnownZero.clearAllBits(); KnownOne.clearAllBits(); } else { ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne, TD, Depth+1); @@ -119,7 +132,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } - KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything. + KnownZero.clearAllBits(); KnownOne.clearAllBits(); // Start out not knowing anything. if (Depth == MaxDepth || Mask == 0) return; // Limit search depth. @@ -185,7 +198,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Also compute a conserative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - KnownOne.clear(); + KnownOne.clearAllBits(); unsigned TrailZ = KnownZero.countTrailingOnes() + KnownZero2.countTrailingOnes(); unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + @@ -208,8 +221,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, AllOnes, KnownZero2, KnownOne2, TD, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); - KnownOne2.clear(); - KnownZero2.clear(); + KnownOne2.clearAllBits(); + KnownZero2.clearAllBits(); ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); @@ -255,14 +268,13 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, else SrcBitWidth = SrcTy->getScalarSizeInBits(); - APInt MaskIn(Mask); - MaskIn.zextOrTrunc(SrcBitWidth); - KnownZero.zextOrTrunc(SrcBitWidth); - KnownOne.zextOrTrunc(SrcBitWidth); + APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth); + KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); + KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, Depth+1); - KnownZero.zextOrTrunc(BitWidth); - KnownOne.zextOrTrunc(BitWidth); + KnownZero = KnownZero.zextOrTrunc(BitWidth); + KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); @@ -284,15 +296,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - APInt MaskIn(Mask); - MaskIn.trunc(SrcBitWidth); - KnownZero.trunc(SrcBitWidth); - KnownOne.trunc(SrcBitWidth); + APInt MaskIn = Mask.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. @@ -338,7 +349,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. APInt Mask2(Mask.shl(ShiftAmt)); @@ -474,7 +485,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); - KnownOne.clear(); + KnownOne.clearAllBits(); KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; break; } @@ -579,6 +590,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } } + // Unreachable blocks may have zero-operand PHI nodes. + if (P->getNumIncomingValues() == 0) + return; + // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { @@ -621,6 +636,156 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } } +/// ComputeSignBit - Determine whether the sign bit is known to be zero or +/// one. Convenience wrapper around ComputeMaskedBits. +void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, + const TargetData *TD, unsigned Depth) { + unsigned BitWidth = getBitWidth(V->getType(), TD); + if (!BitWidth) { + KnownZero = false; + KnownOne = false; + return; + } + APInt ZeroBits(BitWidth, 0); + APInt OneBits(BitWidth, 0); + ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD, + Depth); + KnownOne = OneBits[BitWidth - 1]; + KnownZero = ZeroBits[BitWidth - 1]; +} + +/// isPowerOfTwo - Return true if the given value is known to have exactly one +/// bit set when defined. For vectors return true if every element is known to +/// be a power of two when defined. Supports values with integer or pointer +/// types and vectors of integers. +bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return CI->getValue().isPowerOf2(); + // TODO: Handle vector constants. + + // 1 << X is clearly a power of two if the one is not shifted off the end. If + // it is shifted off the end then the result is undefined. + if (match(V, m_Shl(m_One(), m_Value()))) + return true; + + // (signbit) >>l X is clearly a power of two if the one is not shifted off the + // bottom. If it is shifted off the bottom then the result is undefined. + if (match(V, m_LShr(m_SignBit(), m_Value()))) + return true; + + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth++ == MaxDepth) + return false; + + if (ZExtInst *ZI = dyn_cast<ZExtInst>(V)) + return isPowerOfTwo(ZI->getOperand(0), TD, Depth); + + if (SelectInst *SI = dyn_cast<SelectInst>(V)) + return isPowerOfTwo(SI->getTrueValue(), TD, Depth) && + isPowerOfTwo(SI->getFalseValue(), TD, Depth); + + return false; +} + +/// isKnownNonZero - Return true if the given value is known to be non-zero +/// when defined. For vectors return true if every element is known to be +/// non-zero when defined. Supports values with integer or pointer type and +/// vectors of integers. +bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { + if (Constant *C = dyn_cast<Constant>(V)) { + if (C->isNullValue()) + return false; + if (isa<ConstantInt>(C)) + // Must be non-zero due to null test above. + return true; + // TODO: Handle vectors + return false; + } + + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth++ == MaxDepth) + return false; + + unsigned BitWidth = getBitWidth(V->getType(), TD); + + // X | Y != 0 if X != 0 or Y != 0. + Value *X = 0, *Y = 0; + if (match(V, m_Or(m_Value(X), m_Value(Y)))) + return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth); + + // ext X != 0 if X != 0. + if (isa<SExtInst>(V) || isa<ZExtInst>(V)) + return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth); + + // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined + // if the lowest bit is shifted off the end. + if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) { + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth); + if (KnownOne[0]) + return true; + } + // shr X, Y != 0 if X is negative. Note that the value of the shift is not + // defined if the sign bit is shifted off the end. + else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { + bool XKnownNonNegative, XKnownNegative; + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); + if (XKnownNegative) + return true; + } + // X + Y. + else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { + bool XKnownNonNegative, XKnownNegative; + bool YKnownNonNegative, YKnownNegative; + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); + ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth); + + // If X and Y are both non-negative (as signed values) then their sum is not + // zero unless both X and Y are zero. + if (XKnownNonNegative && YKnownNonNegative) + if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth)) + return true; + + // If X and Y are both negative (as signed values) then their sum is not + // zero unless both X and Y equal INT_MIN. + if (BitWidth && XKnownNegative && YKnownNegative) { + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + APInt Mask = APInt::getSignedMaxValue(BitWidth); + // The sign bit of X is set. If some other bit is set then X is not equal + // to INT_MIN. + ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth); + if ((KnownOne & Mask) != 0) + return true; + // The sign bit of Y is set. If some other bit is set then Y is not equal + // to INT_MIN. + ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth); + if ((KnownOne & Mask) != 0) + return true; + } + + // The sum of a non-negative number and a power of two is not zero. + if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth)) + return true; + if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth)) + return true; + } + // (C ? X : Y) != 0 if X != 0 and Y != 0. + else if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + if (isKnownNonZero(SI->getTrueValue(), TD, Depth) && + isKnownNonZero(SI->getFalseValue(), TD, Depth)) + return true; + } + + if (!BitWidth) return false; + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne, + TD, Depth); + return KnownOne != 0; +} + /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be zero /// for bits that V cannot have. @@ -679,6 +844,13 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, Tmp += C->getZExtValue(); if (Tmp > TyBits) Tmp = TyBits; } + // vector ashr X, <C, C, C, C> -> adds C sign bits + if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) { + if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) { + Tmp += CI->getZExtValue(); + if (Tmp > TyBits) Tmp = TyBits; + } + } return Tmp; case Instruction::Shl: if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) { @@ -875,8 +1047,9 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, // Turn Op0 << Op1 into Op0 * 2^Op1 APInt Op1Int = Op1CI->getValue(); uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1); - Op1 = ConstantInt::get(V->getContext(), - APInt(Op1Int.getBitWidth(), 0).set(BitToSet)); + APInt API(Op1Int.getBitWidth(), 0); + API.setBit(BitToSet); + Op1 = ConstantInt::get(V->getContext(), API); } Value *Mul0 = NULL; @@ -982,6 +1155,80 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return false; } +/// isBytewiseValue - If the specified value can be set by repeating the same +/// byte in memory, return the i8 value that it is represented with. This is +/// true for all i8 values obviously, but is also true for i32 0, i32 -1, +/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated +/// byte store (e.g. i16 0x1234), return null. +Value *llvm::isBytewiseValue(Value *V) { + // All byte-wide stores are splatable, even of arbitrary variables. + if (V->getType()->isIntegerTy(8)) return V; + + // Handle 'null' ConstantArrayZero etc. + if (Constant *C = dyn_cast<Constant>(V)) + if (C->isNullValue()) + return Constant::getNullValue(Type::getInt8Ty(V->getContext())); + + // Constant float and double values can be handled as integer values if the + // corresponding integer value is "byteable". An important case is 0.0. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { + if (CFP->getType()->isFloatTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext())); + if (CFP->getType()->isDoubleTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext())); + // Don't handle long double formats, which have strange constraints. + } + + // We can handle constant integers that are power of two in size and a + // multiple of 8 bits. + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + unsigned Width = CI->getBitWidth(); + if (isPowerOf2_32(Width) && Width > 8) { + // We can handle this value if the recursive binary decomposition is the + // same at all levels. + APInt Val = CI->getValue(); + APInt Val2; + while (Val.getBitWidth() != 8) { + unsigned NextWidth = Val.getBitWidth()/2; + Val2 = Val.lshr(NextWidth); + Val2 = Val2.trunc(Val.getBitWidth()/2); + Val = Val.trunc(Val.getBitWidth()/2); + + // If the top/bottom halves aren't the same, reject it. + if (Val != Val2) + return 0; + } + return ConstantInt::get(V->getContext(), Val); + } + } + + // A ConstantArray is splatable if all its members are equal and also + // splatable. + if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) { + if (CA->getNumOperands() == 0) + return 0; + + Value *Val = isBytewiseValue(CA->getOperand(0)); + if (!Val) + return 0; + + for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I) + if (CA->getOperand(I-1) != CA->getOperand(I)) + return 0; + + return Val; + } + + // Conceptually, we could handle things like: + // %a = zext i8 %X to i16 + // %b = shl i16 %a, 8 + // %c = or i16 %a, %b + // but until there is an example that actually needs this, it doesn't seem + // worth worrying about. + return 0; +} + + // This is the recursive version of BuildSubAggregate. It takes a few different // arguments. Idxs is the index within the nested struct From that we are // looking at now (which is of type IndexedType). IdxSkip is the number of @@ -1159,6 +1406,47 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, return 0; } +/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if +/// it can be expressed as a base pointer plus a constant offset. Return the +/// base and offset to the caller. +Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const TargetData &TD) { + Operator *PtrOp = dyn_cast<Operator>(Ptr); + if (PtrOp == 0) return Ptr; + + // Just look through bitcasts. + if (PtrOp->getOpcode() == Instruction::BitCast) + return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); + + // If this is a GEP with constant indices, we can look through it. + GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); + if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; + ++I, ++GTI) { + ConstantInt *OpC = cast<ConstantInt>(*I); + if (OpC->isZero()) continue; + + // Handle a struct and array indices which add their offset to the pointer. + if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += OpC->getSExtValue()*Size; + } + } + + // Re-sign extend from the pointer size if needed to get overflow edge cases + // right. + unsigned PtrSize = TD.getPointerSizeInBits(); + if (PtrSize < 64) + Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); + + return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); +} + + /// GetConstantStringInfo - This function computes the length of a /// null-terminated C string pointed to by V. If successful, it returns true /// and returns the string in Str. If unsuccessful, it returns false. @@ -1386,3 +1674,32 @@ uint64_t llvm::GetStringLength(Value *V) { // an empty string as a length. return Len == ~0ULL ? 1 : Len; } + +Value * +llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) { + if (!V->getType()->isPointerTy()) + return V; + for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + return V; + V = GA->getAliasee(); + } else { + // See if InstructionSimplify knows any relevant tricks. + if (Instruction *I = dyn_cast<Instruction>(V)) + // TODO: Aquire a DominatorTree and use it. + if (Value *Simplified = SimplifyInstruction(I, TD, 0)) { + V = Simplified; + continue; + } + + return V; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } + return V; +} |