diff options
Diffstat (limited to 'include/llvm/Analysis')
65 files changed, 4472 insertions, 2147 deletions
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 5cc840a64a62..d6308b7073a0 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -38,11 +38,11 @@ #ifndef LLVM_ANALYSIS_ALIASANALYSIS_H #define LLVM_ANALYSIS_ALIASANALYSIS_H -#include "llvm/ADT/DenseMap.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TargetLibraryInfo.h" namespace llvm { class BasicAAResult; @@ -50,7 +50,6 @@ class LoadInst; class StoreInst; class VAArgInst; class DataLayout; -class TargetLibraryInfo; class Pass; class AnalysisUsage; class MemTransferInst; @@ -141,7 +140,7 @@ enum FunctionModRefBehavior { /// non-volatile loads and stores from objects pointed to by its /// pointer-typed arguments, with arbitrary offsets. /// - /// This property corresponds to the IntrReadWriteArgMem LLVM intrinsic flag. + /// This property corresponds to the IntrArgMemOnly LLVM intrinsic flag. FMRB_OnlyAccessesArgumentPointees = FMRL_ArgumentPointees | MRI_ModRef, /// This function does not perform any non-local stores or volatile loads, @@ -152,6 +151,13 @@ enum FunctionModRefBehavior { /// This property corresponds to the IntrReadMem LLVM intrinsic flag. FMRB_OnlyReadsMemory = FMRL_Anywhere | MRI_Ref, + // This function does not read from memory anywhere, but may write to any + // memory location. + // + // This property corresponds to the LLVM IR 'writeonly' attribute. + // This property corresponds to the IntrWriteMem LLVM intrinsic flag. + FMRB_DoesNotReadMemory = FMRL_Anywhere | MRI_Mod, + /// This indicates that the function could not be classified into one of the /// behaviors above. FMRB_UnknownModRefBehavior = FMRL_Anywhere | MRI_ModRef @@ -161,9 +167,8 @@ class AAResults { public: // Make these results default constructable and movable. We have to spell // these out because MSVC won't synthesize them. - AAResults() {} + AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {} AAResults(AAResults &&Arg); - AAResults &operator=(AAResults &&Arg); ~AAResults(); /// Register a specific AA result. @@ -314,6 +319,12 @@ public: return !(MRB & MRI_Mod); } + /// Checks if functions with the specified behavior are known to only write + /// memory (or not access memory at all). + static bool doesNotReadMemory(FunctionModRefBehavior MRB) { + return !(MRB & MRI_Ref); + } + /// Checks if functions with the specified behavior are known to read and /// write at most from objects pointed to by their pointer-typed arguments /// (with arbitrary offsets). @@ -450,11 +461,11 @@ public: ModRefInfo getModRefInfo(const Instruction *I) { if (auto CS = ImmutableCallSite(I)) { auto MRB = getModRefBehavior(CS); - if (MRB & MRI_ModRef) + if ((MRB & MRI_ModRef) == MRI_ModRef) return MRI_ModRef; - else if (MRB & MRI_Ref) + if (MRB & MRI_Ref) return MRI_Ref; - else if (MRB & MRI_Mod) + if (MRB & MRI_Mod) return MRI_Mod; return MRI_NoModRef; } @@ -557,6 +568,8 @@ private: template <typename T> friend class AAResultBase; + const TargetLibraryInfo &TLI; + std::vector<std::unique_ptr<Concept>> AAs; }; @@ -753,20 +766,23 @@ protected: } }; - const TargetLibraryInfo &TLI; - - explicit AAResultBase(const TargetLibraryInfo &TLI) : TLI(TLI) {} + explicit AAResultBase() {} // Provide all the copy and move constructors so that derived types aren't // constrained. - AAResultBase(const AAResultBase &Arg) : TLI(Arg.TLI) {} - AAResultBase(AAResultBase &&Arg) : TLI(Arg.TLI) {} + AAResultBase(const AAResultBase &Arg) {} + AAResultBase(AAResultBase &&Arg) {} /// Get a proxy for the best AA result set to query at this time. /// /// When this result is part of a larger aggregation, this will proxy to that /// aggregation. When this result is used in isolation, it will just delegate /// back to the derived class's implementation. + /// + /// Note that callers of this need to take considerable care to not cause + /// performance problems when they use this routine, in the case of a large + /// number of alias analyses being aggregated, it can be expensive to walk + /// back across the chain. AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); } public: @@ -783,13 +799,6 @@ public: } FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) { - if (!CS.hasOperandBundles()) - // If CS has operand bundles then aliasing attributes from the function it - // calls do not directly apply to the CallSite. This can be made more - // precise in the future. - if (const Function *F = CS.getCalledFunction()) - return getBestAAResults().getModRefBehavior(F); - return FMRB_UnknownModRefBehavior; } @@ -797,170 +806,24 @@ public: return FMRB_UnknownModRefBehavior; } - ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); - - ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2); -}; - -/// Synthesize \c ModRefInfo for a call site and memory location by examining -/// the general behavior of the call site and any specific information for its -/// arguments. -/// -/// This essentially, delegates across the alias analysis interface to collect -/// information which may be enough to (conservatively) fulfill the query. -template <typename DerivedT> -ModRefInfo AAResultBase<DerivedT>::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { - auto MRB = getBestAAResults().getModRefBehavior(CS); - if (MRB == FMRB_DoesNotAccessMemory) - return MRI_NoModRef; - - ModRefInfo Mask = MRI_ModRef; - if (AAResults::onlyReadsMemory(MRB)) - Mask = MRI_Ref; - - if (AAResults::onlyAccessesArgPointees(MRB)) { - bool DoesAlias = false; - ModRefInfo AllArgsMask = MRI_NoModRef; - if (AAResults::doesAccessArgPointees(MRB)) { - for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), - AE = CS.arg_end(); - AI != AE; ++AI) { - const Value *Arg = *AI; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned ArgIdx = std::distance(CS.arg_begin(), AI); - MemoryLocation ArgLoc = MemoryLocation::getForArgument(CS, ArgIdx, TLI); - AliasResult ArgAlias = getBestAAResults().alias(ArgLoc, Loc); - if (ArgAlias != NoAlias) { - ModRefInfo ArgMask = getBestAAResults().getArgModRefInfo(CS, ArgIdx); - DoesAlias = true; - AllArgsMask = ModRefInfo(AllArgsMask | ArgMask); - } - } - } - if (!DoesAlias) - return MRI_NoModRef; - Mask = ModRefInfo(Mask & AllArgsMask); - } - - // If Loc is a constant memory location, the call definitely could not - // modify the memory location. - if ((Mask & MRI_Mod) && - getBestAAResults().pointsToConstantMemory(Loc, /*OrLocal*/ false)) - Mask = ModRefInfo(Mask & ~MRI_Mod); - - return Mask; -} - -/// Synthesize \c ModRefInfo for two call sites by examining the general -/// behavior of the call site and any specific information for its arguments. -/// -/// This essentially, delegates across the alias analysis interface to collect -/// information which may be enough to (conservatively) fulfill the query. -template <typename DerivedT> -ModRefInfo AAResultBase<DerivedT>::getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { - // If CS1 or CS2 are readnone, they don't interact. - auto CS1B = getBestAAResults().getModRefBehavior(CS1); - if (CS1B == FMRB_DoesNotAccessMemory) - return MRI_NoModRef; - - auto CS2B = getBestAAResults().getModRefBehavior(CS2); - if (CS2B == FMRB_DoesNotAccessMemory) - return MRI_NoModRef; - - // If they both only read from memory, there is no dependence. - if (AAResults::onlyReadsMemory(CS1B) && AAResults::onlyReadsMemory(CS2B)) - return MRI_NoModRef; - - ModRefInfo Mask = MRI_ModRef; - - // If CS1 only reads memory, the only dependence on CS2 can be - // from CS1 reading memory written by CS2. - if (AAResults::onlyReadsMemory(CS1B)) - Mask = ModRefInfo(Mask & MRI_Ref); - - // If CS2 only access memory through arguments, accumulate the mod/ref - // information from CS1's references to the memory referenced by - // CS2's arguments. - if (AAResults::onlyAccessesArgPointees(CS2B)) { - ModRefInfo R = MRI_NoModRef; - if (AAResults::doesAccessArgPointees(CS2B)) { - for (ImmutableCallSite::arg_iterator I = CS2.arg_begin(), - E = CS2.arg_end(); - I != E; ++I) { - const Value *Arg = *I; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I); - auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI); - - // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence - // of CS1 on that location is the inverse. - ModRefInfo ArgMask = - getBestAAResults().getArgModRefInfo(CS2, CS2ArgIdx); - if (ArgMask == MRI_Mod) - ArgMask = MRI_ModRef; - else if (ArgMask == MRI_Ref) - ArgMask = MRI_Mod; - - ArgMask = ModRefInfo(ArgMask & - getBestAAResults().getModRefInfo(CS1, CS2ArgLoc)); - - R = ModRefInfo((R | ArgMask) & Mask); - if (R == Mask) - break; - } - } - return R; + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { + return MRI_ModRef; } - // If CS1 only accesses memory through arguments, check if CS2 references - // any of the memory referenced by CS1's arguments. If not, return NoModRef. - if (AAResults::onlyAccessesArgPointees(CS1B)) { - ModRefInfo R = MRI_NoModRef; - if (AAResults::doesAccessArgPointees(CS1B)) { - for (ImmutableCallSite::arg_iterator I = CS1.arg_begin(), - E = CS1.arg_end(); - I != E; ++I) { - const Value *Arg = *I; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I); - auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI); - - // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod - // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1 - // might Ref, then we care only about a Mod by CS2. - ModRefInfo ArgMask = getBestAAResults().getArgModRefInfo(CS1, CS1ArgIdx); - ModRefInfo ArgR = getBestAAResults().getModRefInfo(CS2, CS1ArgLoc); - if (((ArgMask & MRI_Mod) != MRI_NoModRef && - (ArgR & MRI_ModRef) != MRI_NoModRef) || - ((ArgMask & MRI_Ref) != MRI_NoModRef && - (ArgR & MRI_Mod) != MRI_NoModRef)) - R = ModRefInfo((R | ArgMask) & Mask); - - if (R == Mask) - break; - } - } - return R; + ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { + return MRI_ModRef; } +}; - return Mask; -} -/// isNoAliasCall - Return true if this pointer is returned by a noalias -/// function. +/// Return true if this pointer is returned by a noalias function. bool isNoAliasCall(const Value *V); -/// isNoAliasArgument - Return true if this is an argument with the noalias -/// attribute. +/// Return true if this is an argument with the noalias attribute. bool isNoAliasArgument(const Value *V); -/// isIdentifiedObject - Return true if this pointer refers to a distinct and -/// identifiable object. This returns true for: +/// Return true if this pointer refers to a distinct and identifiable object. +/// This returns true for: /// Global Variables and Functions (but not Global Aliases) /// Allocas /// ByVal and NoAlias Arguments @@ -968,8 +831,8 @@ bool isNoAliasArgument(const Value *V); /// bool isIdentifiedObject(const Value *V); -/// isIdentifiedFunctionLocal - Return true if V is umabigously identified -/// at the function-level. Different IdentifiedFunctionLocals can't alias. +/// Return true if V is umabigously identified at the function-level. +/// Different IdentifiedFunctionLocals can't alias. /// Further, an IdentifiedFunctionLocal can not alias with any function /// arguments other than itself, which is not necessarily true for /// IdentifiedObjects. @@ -987,42 +850,48 @@ bool isIdentifiedFunctionLocal(const Value *V); /// This manager effectively wraps the AnalysisManager for registering alias /// analyses. When you register your alias analysis with this manager, it will /// ensure the analysis itself is registered with its AnalysisManager. -class AAManager { +class AAManager : public AnalysisInfoMixin<AAManager> { public: typedef AAResults Result; // This type hase value semantics. We have to spell these out because MSVC // won't synthesize them. AAManager() {} - AAManager(AAManager &&Arg) - : FunctionResultGetters(std::move(Arg.FunctionResultGetters)) {} - AAManager(const AAManager &Arg) - : FunctionResultGetters(Arg.FunctionResultGetters) {} + AAManager(AAManager &&Arg) : ResultGetters(std::move(Arg.ResultGetters)) {} + AAManager(const AAManager &Arg) : ResultGetters(Arg.ResultGetters) {} AAManager &operator=(AAManager &&RHS) { - FunctionResultGetters = std::move(RHS.FunctionResultGetters); + ResultGetters = std::move(RHS.ResultGetters); return *this; } AAManager &operator=(const AAManager &RHS) { - FunctionResultGetters = RHS.FunctionResultGetters; + ResultGetters = RHS.ResultGetters; return *this; } /// Register a specific AA result. template <typename AnalysisT> void registerFunctionAnalysis() { - FunctionResultGetters.push_back(&getFunctionAAResultImpl<AnalysisT>); + ResultGetters.push_back(&getFunctionAAResultImpl<AnalysisT>); + } + + /// Register a specific AA result. + template <typename AnalysisT> void registerModuleAnalysis() { + ResultGetters.push_back(&getModuleAAResultImpl<AnalysisT>); } Result run(Function &F, AnalysisManager<Function> &AM) { - Result R; - for (auto &Getter : FunctionResultGetters) + Result R(AM.getResult<TargetLibraryAnalysis>(F)); + for (auto &Getter : ResultGetters) (*Getter)(F, AM, R); return R; } private: + friend AnalysisInfoMixin<AAManager>; + static char PassID; + SmallVector<void (*)(Function &F, AnalysisManager<Function> &AM, AAResults &AAResults), - 4> FunctionResultGetters; + 4> ResultGetters; template <typename AnalysisT> static void getFunctionAAResultImpl(Function &F, @@ -1030,6 +899,15 @@ private: AAResults &AAResults) { AAResults.addAAResult(AM.template getResult<AnalysisT>(F)); } + + template <typename AnalysisT> + static void getModuleAAResultImpl(Function &F, AnalysisManager<Function> &AM, + AAResults &AAResults) { + auto &MAM = + AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); + if (auto *R = MAM.template getCachedResult<AnalysisT>(*F.getParent())) + AAResults.addAAResult(*R); + } }; /// A wrapper pass to provide the legacy pass manager access to a suitably @@ -1065,8 +943,16 @@ ImmutablePass *createExternalAAWrapperPass( /// A helper for the legacy pass manager to create a \c AAResults /// object populated to the best of our ability for a particular function when /// inside of a \c ModulePass or a \c CallGraphSCCPass. +/// +/// If a \c ModulePass or a \c CallGraphSCCPass calls \p +/// createLegacyPMAAResults, it also needs to call \p addUsedAAAnalyses in \p +/// getAnalysisUsage. AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR); +/// A helper for the legacy pass manager to populate \p AU to add uses to make +/// sure the analyses required by \p createLegacyPMAAResults are available. +void getAAResultsAnalysisUsage(AnalysisUsage &AU); + } // End llvm namespace #endif diff --git a/include/llvm/Analysis/AliasAnalysisEvaluator.h b/include/llvm/Analysis/AliasAnalysisEvaluator.h new file mode 100644 index 000000000000..505ed0d9723a --- /dev/null +++ b/include/llvm/Analysis/AliasAnalysisEvaluator.h @@ -0,0 +1,70 @@ +//===- AliasAnalysisEvaluator.h - Alias Analysis Accuracy Evaluator -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements a simple N^2 alias analysis accuracy evaluator. The +/// analysis result is a set of statistics of how many times the AA +/// infrastructure provides each kind of alias result and mod/ref result when +/// queried with all pairs of pointers in the function. +/// +/// It can be used to evaluate a change in an alias analysis implementation, +/// algorithm, or the AA pipeline infrastructure itself. It acts like a stable +/// and easily tested consumer of all AA information exposed. +/// +/// This is inspired and adapted from code by: Naveen Neelakantam, Francesco +/// Spadini, and Wojciech Stryjewski. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_ALIASANALYSISEVALUATOR_H +#define LLVM_ANALYSIS_ALIASANALYSISEVALUATOR_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class AAResults; + +class AAEvaluator : public PassInfoMixin<AAEvaluator> { + int64_t FunctionCount; + int64_t NoAliasCount, MayAliasCount, PartialAliasCount, MustAliasCount; + int64_t NoModRefCount, ModCount, RefCount, ModRefCount; + +public: + AAEvaluator() + : FunctionCount(), NoAliasCount(), MayAliasCount(), PartialAliasCount(), + MustAliasCount(), NoModRefCount(), ModCount(), RefCount(), + ModRefCount() {} + AAEvaluator(AAEvaluator &&Arg) + : FunctionCount(Arg.FunctionCount), NoAliasCount(Arg.NoAliasCount), + MayAliasCount(Arg.MayAliasCount), + PartialAliasCount(Arg.PartialAliasCount), + MustAliasCount(Arg.MustAliasCount), NoModRefCount(Arg.NoModRefCount), + ModCount(Arg.ModCount), RefCount(Arg.RefCount), + ModRefCount(Arg.ModRefCount) { + Arg.FunctionCount = 0; + } + ~AAEvaluator(); + + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); + +private: + // Allow the legacy pass to run this using an internal API. + friend class AAEvalLegacyPass; + + void runInternal(Function &F, AAResults &AA); +}; + +/// Create a wrapper of the above for the legacy pass manager. +FunctionPass *createAAEvalPass(); + +} + +#endif diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index 37fd69b081cc..cec56889c0ae 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// // -// This file defines two classes: AliasSetTracker and AliasSet. These interface +// This file defines two classes: AliasSetTracker and AliasSet. These interfaces // are used to classify a collection of pointer references into a maximal number -// of disjoint sets. Each AliasSet object constructed by the AliasSetTracker +// of disjoint sets. Each AliasSet object constructed by the AliasSetTracker // object refers to memory disjoint from the other sets. // //===----------------------------------------------------------------------===// @@ -30,6 +30,7 @@ namespace llvm { class LoadInst; class StoreInst; class VAArgInst; +class MemSetInst; class AliasSetTracker; class AliasSet; @@ -58,8 +59,12 @@ class AliasSet : public ilist_node<AliasSet> { return &NextInList; } - void updateSizeAndAAInfo(uint64_t NewSize, const AAMDNodes &NewAAInfo) { - if (NewSize > Size) Size = NewSize; + bool updateSizeAndAAInfo(uint64_t NewSize, const AAMDNodes &NewAAInfo) { + bool SizeChanged = false; + if (NewSize > Size) { + Size = NewSize; + SizeChanged = true; + } if (AAInfo == DenseMapInfo<AAMDNodes>::getEmptyKey()) // We don't have a AAInfo yet. Set it to NewAAInfo. @@ -67,12 +72,14 @@ class AliasSet : public ilist_node<AliasSet> { else if (AAInfo != NewAAInfo) // NewAAInfo conflicts with AAInfo. AAInfo = DenseMapInfo<AAMDNodes>::getTombstoneKey(); + + return SizeChanged; } uint64_t getSize() const { return Size; } - /// getAAInfo - Return the AAInfo, or null if there is no - /// information or conflicting information. + /// Return the AAInfo, or null if there is no information or conflicting + /// information. AAMDNodes getAAInfo() const { // If we have missing or conflicting AAInfo, return null. if (AAInfo == DenseMapInfo<AAMDNodes>::getEmptyKey() || @@ -111,11 +118,11 @@ class AliasSet : public ilist_node<AliasSet> { PointerRec *PtrList, **PtrListEnd; // Doubly linked list of nodes. AliasSet *Forward; // Forwarding pointer. - // All instructions without a specific address in this alias set. + /// All instructions without a specific address in this alias set. std::vector<AssertingVH<Instruction> > UnknownInsts; - // RefCount - Number of nodes pointing to this AliasSet plus the number of - // AliasSets forwarding to it. + /// Number of nodes pointing to this AliasSet plus the number of AliasSets + /// forwarding to it. unsigned RefCount : 28; /// The kinds of access this alias set models. @@ -143,8 +150,8 @@ class AliasSet : public ilist_node<AliasSet> { }; unsigned Alias : 1; - // Volatile - True if this alias set contains volatile loads or stores. - bool Volatile : 1; + /// True if this alias set contains volatile loads or stores. + unsigned Volatile : 1; void addRef() { ++RefCount; } void dropRef(AliasSetTracker &AST) { @@ -165,20 +172,18 @@ public: bool isMustAlias() const { return Alias == SetMustAlias; } bool isMayAlias() const { return Alias == SetMayAlias; } - // isVolatile - Return true if this alias set contains volatile loads or - // stores. + /// Return true if this alias set contains volatile loads or stores. bool isVolatile() const { return Volatile; } - /// isForwardingAliasSet - Return true if this alias set should be ignored as - /// part of the AliasSetTracker object. + /// Return true if this alias set should be ignored as part of the + /// AliasSetTracker object. bool isForwardingAliasSet() const { return Forward; } - /// mergeSetIn - Merge the specified alias set into this alias set... - /// + /// Merge the specified alias set into this alias set. void mergeSetIn(AliasSet &AS, AliasSetTracker &AST); - // Alias Set iteration - Allow access to all of the pointer which are part of - // this alias set... + // Alias Set iteration - Allow access to all of the pointers which are part of + // this alias set. class iterator; iterator begin() const { return iterator(PtrList); } iterator end() const { return iterator(); } @@ -236,9 +241,9 @@ private: return PtrList; } - /// getForwardedTarget - Return the real alias set this represents. If this - /// has been merged with another set and is forwarding, return the ultimate - /// destination set. This also implements the union-find collapsing as well. + /// Return the real alias set this represents. If this has been merged with + /// another set and is forwarding, return the ultimate destination set. This + /// also implements the union-find collapsing as well. AliasSet *getForwardedTarget(AliasSetTracker &AST) { if (!Forward) return this; @@ -271,9 +276,8 @@ private: void setVolatile() { Volatile = true; } public: - /// aliasesPointer - Return true if the specified pointer "may" (or must) - /// alias one of the members in the set. - /// + /// Return true if the specified pointer "may" (or must) alias one of the + /// members in the set. bool aliasesPointer(const Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo, AliasAnalysis &AA) const; bool aliasesUnknownInst(const Instruction *Inst, AliasAnalysis &AA) const; @@ -285,8 +289,8 @@ inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) { } class AliasSetTracker { - /// CallbackVH - A CallbackVH to arrange for AliasSetTracker to be - /// notified whenever a Value is deleted. + /// A CallbackVH to arrange for AliasSetTracker to be notified whenever a + /// Value is deleted. class ASTCallbackVH final : public CallbackVH { AliasSetTracker *AST; void deleted() override; @@ -296,8 +300,8 @@ class AliasSetTracker { ASTCallbackVH(Value *V, AliasSetTracker *AST = nullptr); ASTCallbackVH &operator=(Value *V); }; - /// ASTCallbackVHDenseMapInfo - Traits to tell DenseMap that tell us how to - /// compare and hash the value handle. + /// Traits to tell DenseMap that tell us how to compare and hash the value + /// handle. struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {}; AliasAnalysis &AA; @@ -311,15 +315,14 @@ class AliasSetTracker { PointerMapType PointerMap; public: - /// AliasSetTracker ctor - Create an empty collection of AliasSets, and use - /// the specified alias analysis object to disambiguate load and store - /// addresses. + /// Create an empty collection of AliasSets, and use the specified alias + /// analysis object to disambiguate load and store addresses. explicit AliasSetTracker(AliasAnalysis &aa) : AA(aa) {} ~AliasSetTracker() { clear(); } - /// add methods - These methods are used to add different types of - /// instructions to the alias sets. Adding a new instruction can result in - /// one of three actions happening: + /// These methods are used to add different types of instructions to the alias + /// sets. Adding a new instruction can result in one of three actions + /// happening: /// /// 1. If the instruction doesn't alias any other sets, create a new set. /// 2. If the instruction aliases exactly one set, add it to the set @@ -333,47 +336,46 @@ public: bool add(LoadInst *LI); bool add(StoreInst *SI); bool add(VAArgInst *VAAI); + bool add(MemSetInst *MSI); bool add(Instruction *I); // Dispatch to one of the other add methods... void add(BasicBlock &BB); // Add all instructions in basic block void add(const AliasSetTracker &AST); // Add alias relations from another AST bool addUnknown(Instruction *I); - /// remove methods - These methods are used to remove all entries that might - /// be aliased by the specified instruction. These methods return true if any - /// alias sets were eliminated. - // Remove a location + /// These methods are used to remove all entries that might be aliased by the + /// specified instruction. These methods return true if any alias sets were + /// eliminated. bool remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo); bool remove(LoadInst *LI); bool remove(StoreInst *SI); bool remove(VAArgInst *VAAI); + bool remove(MemSetInst *MSI); bool remove(Instruction *I); void remove(AliasSet &AS); bool removeUnknown(Instruction *I); void clear(); - /// getAliasSets - Return the alias sets that are active. - /// + /// Return the alias sets that are active. const ilist<AliasSet> &getAliasSets() const { return AliasSets; } - /// getAliasSetForPointer - Return the alias set that the specified pointer - /// lives in. If the New argument is non-null, this method sets the value to - /// true if a new alias set is created to contain the pointer (because the - /// pointer didn't alias anything). + /// Return the alias set that the specified pointer lives in. If the New + /// argument is non-null, this method sets the value to true if a new alias + /// set is created to contain the pointer (because the pointer didn't alias + /// anything). AliasSet &getAliasSetForPointer(Value *P, uint64_t Size, const AAMDNodes &AAInfo, bool *New = nullptr); - /// getAliasSetForPointerIfExists - Return the alias set containing the - /// location specified if one exists, otherwise return null. + /// Return the alias set containing the location specified if one exists, + /// otherwise return null. AliasSet *getAliasSetForPointerIfExists(const Value *P, uint64_t Size, const AAMDNodes &AAInfo) { - return findAliasSetForPointer(P, Size, AAInfo); + return mergeAliasSetsForPointer(P, Size, AAInfo); } - /// containsPointer - Return true if the specified location is represented by - /// this alias set, false otherwise. This does not modify the AST object or - /// alias sets. + /// Return true if the specified location is represented by this alias set, + /// false otherwise. This does not modify the AST object or alias sets. bool containsPointer(const Value *P, uint64_t Size, const AAMDNodes &AAInfo) const; @@ -381,23 +383,19 @@ public: /// members in any of the sets. bool containsUnknown(const Instruction *I) const; - /// getAliasAnalysis - Return the underlying alias analysis object used by - /// this tracker. + /// Return the underlying alias analysis object used by this tracker. AliasAnalysis &getAliasAnalysis() const { return AA; } - /// deleteValue method - This method is used to remove a pointer value from - /// the AliasSetTracker entirely. It should be used when an instruction is - /// deleted from the program to update the AST. If you don't use this, you - /// would have dangling pointers to deleted instructions. - /// + /// This method is used to remove a pointer value from the AliasSetTracker + /// entirely. It should be used when an instruction is deleted from the + /// program to update the AST. If you don't use this, you would have dangling + /// pointers to deleted instructions. void deleteValue(Value *PtrVal); - /// copyValue - This method should be used whenever a preexisting value in the - /// program is copied or cloned, introducing a new value. Note that it is ok - /// for clients that use this method to introduce the same value multiple - /// times: if the tracker already knows about a value, it will ignore the - /// request. - /// + /// This method should be used whenever a preexisting value in the program is + /// copied or cloned, introducing a new value. Note that it is ok for clients + /// that use this method to introduce the same value multiple times: if the + /// tracker already knows about a value, it will ignore the request. void copyValue(Value *From, Value *To); typedef ilist<AliasSet>::iterator iterator; @@ -416,8 +414,8 @@ private: friend class AliasSet; void removeAliasSet(AliasSet *AS); - // getEntryFor - Just like operator[] on the map, except that it creates an - // entry for the pointer if it doesn't already exist. + /// Just like operator[] on the map, except that it creates an entry for the + /// pointer if it doesn't already exist. AliasSet::PointerRec &getEntryFor(Value *V) { AliasSet::PointerRec *&Entry = PointerMap[ASTCallbackVH(V, this)]; if (!Entry) @@ -433,8 +431,8 @@ private: AS.Access |= E; return AS; } - AliasSet *findAliasSetForPointer(const Value *Ptr, uint64_t Size, - const AAMDNodes &AAInfo); + AliasSet *mergeAliasSetsForPointer(const Value *Ptr, uint64_t Size, + const AAMDNodes &AAInfo); AliasSet *findAliasSetForUnknownInst(Instruction *Inst); }; diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h index b903f96d55b2..06f2a117ac21 100644 --- a/include/llvm/Analysis/AssumptionCache.h +++ b/include/llvm/Analysis/AssumptionCache.h @@ -22,16 +22,12 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include <memory> namespace llvm { -// FIXME: Replace this brittle forward declaration with the include of the new -// PassManager.h when doing so doesn't break the PassManagerBuilder. -template <typename IRUnitT> class AnalysisManager; -class PreservedAnalyses; - /// \brief A cache of @llvm.assume calls within a function. /// /// This cache provides fast lookup of assumptions within a function by caching @@ -97,36 +93,31 @@ public: /// /// This analysis is intended for use with the new pass manager and will vend /// assumption caches for a given function. -class AssumptionAnalysis { +class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> { + friend AnalysisInfoMixin<AssumptionAnalysis>; static char PassID; public: typedef AssumptionCache Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - /// \brief Provide a name for the analysis for debugging and logging. - static StringRef name() { return "AssumptionAnalysis"; } - AssumptionAnalysis() {} AssumptionAnalysis(const AssumptionAnalysis &Arg) {} AssumptionAnalysis(AssumptionAnalysis &&Arg) {} AssumptionAnalysis &operator=(const AssumptionAnalysis &RHS) { return *this; } AssumptionAnalysis &operator=(AssumptionAnalysis &&RHS) { return *this; } - AssumptionCache run(Function &F) { return AssumptionCache(F); } + AssumptionCache run(Function &F, FunctionAnalysisManager &) { + return AssumptionCache(F); + } }; /// \brief Printer pass for the \c AssumptionAnalysis results. -class AssumptionPrinterPass { +class AssumptionPrinterPass : public PassInfoMixin<AssumptionPrinterPass> { raw_ostream &OS; public: explicit AssumptionPrinterPass(raw_ostream &OS) : OS(OS) {} - PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM); - - static StringRef name() { return "AssumptionPrinterPass"; } + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); }; /// \brief An immutable pass that tracks lazily created \c AssumptionCache diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h index 181a9327024c..a3195d17b029 100644 --- a/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/include/llvm/Analysis/BasicAliasAnalysis.h @@ -40,6 +40,7 @@ class BasicAAResult : public AAResultBase<BasicAAResult> { friend AAResultBase<BasicAAResult>; const DataLayout &DL; + const TargetLibraryInfo &TLI; AssumptionCache &AC; DominatorTree *DT; LoopInfo *LI; @@ -48,13 +49,14 @@ public: BasicAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree *DT = nullptr, LoopInfo *LI = nullptr) - : AAResultBase(TLI), DL(DL), AC(AC), DT(DT), LI(LI) {} + : AAResultBase(), DL(DL), TLI(TLI), AC(AC), DT(DT), LI(LI) {} BasicAAResult(const BasicAAResult &Arg) - : AAResultBase(Arg), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {} - BasicAAResult(BasicAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT), + : AAResultBase(Arg), DL(Arg.DL), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {} + BasicAAResult(BasicAAResult &&Arg) + : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI), AC(Arg.AC), + DT(Arg.DT), LI(Arg.LI) {} /// Handle invalidation events from the new pass manager. /// @@ -107,6 +109,20 @@ private: } }; + // Represents the internal structure of a GEP, decomposed into a base pointer, + // constant offsets, and variable scaled indices. + struct DecomposedGEP { + // Base pointer of the GEP + const Value *Base; + // Total constant offset w.r.t the base from indexing into structs + int64_t StructOffset; + // Total constant offset w.r.t the base from indexing through + // pointers/arrays/vectors + int64_t OtherOffset; + // Scaled variable (non-constant) indices. + SmallVector<VariableGEPIndex, 4> VarIndices; + }; + /// Track alias queries to guard against recursion. typedef std::pair<MemoryLocation, MemoryLocation> LocPair; typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy; @@ -137,11 +153,13 @@ private: const DataLayout &DL, unsigned Depth, AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW); - static const Value * - DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, - SmallVectorImpl<VariableGEPIndex> &VarIndices, - bool &MaxLookupReached, const DataLayout &DL, - AssumptionCache *AC, DominatorTree *DT); + static bool DecomposeGEPExpression(const Value *V, DecomposedGEP &Decomposed, + const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT); + + static bool isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, + const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, + uint64_t ObjectAccessSize); + /// \brief A Heuristic for aliasGEP that searches for a constant offset /// between the variables. /// @@ -178,20 +196,14 @@ private: }; /// Analysis pass providing a never-invalidated alias analysis result. -class BasicAA { +class BasicAA : public AnalysisInfoMixin<BasicAA> { + friend AnalysisInfoMixin<BasicAA>; + static char PassID; + public: typedef BasicAAResult Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - BasicAAResult run(Function &F, AnalysisManager<Function> *AM); - - /// \brief Provide access to a name for this pass for debugging purposes. - static StringRef name() { return "BasicAliasAnalysis"; } - -private: - static char PassID; + BasicAAResult run(Function &F, AnalysisManager<Function> &AM); }; /// Legacy wrapper pass to provide the BasicAAResult object. diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h index 6f2a2b522769..7d48dfc9121e 100644 --- a/include/llvm/Analysis/BlockFrequencyInfo.h +++ b/include/llvm/Analysis/BlockFrequencyInfo.h @@ -14,6 +14,8 @@ #ifndef LLVM_ANALYSIS_BLOCKFREQUENCYINFO_H #define LLVM_ANALYSIS_BLOCKFREQUENCYINFO_H +#include "llvm/ADT/Optional.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include <climits> @@ -30,12 +32,21 @@ class BlockFrequencyInfo { typedef BlockFrequencyInfoImpl<BasicBlock> ImplType; std::unique_ptr<ImplType> BFI; + void operator=(const BlockFrequencyInfo &) = delete; + BlockFrequencyInfo(const BlockFrequencyInfo &) = delete; + public: BlockFrequencyInfo(); BlockFrequencyInfo(const Function &F, const BranchProbabilityInfo &BPI, const LoopInfo &LI); + BlockFrequencyInfo(BlockFrequencyInfo &&Arg); + + BlockFrequencyInfo &operator=(BlockFrequencyInfo &&RHS); + + ~BlockFrequencyInfo(); const Function *getFunction() const; + const BranchProbabilityInfo *getBPI() const; void view() const; /// getblockFreq - Return block frequency. Return 0 if we don't have the @@ -45,6 +56,11 @@ public: /// floating points. BlockFrequency getBlockFreq(const BasicBlock *BB) const; + /// \brief Returns the estimated profile count of \p BB. + /// This computes the relative block frequency of \p BB and multiplies it by + /// the enclosing function's count (if available) and returns the value. + Optional<uint64_t> getBlockProfileCount(const BasicBlock *BB) const; + // Set the frequency of the given basic block. void setBlockFreq(const BasicBlock *BB, uint64_t Freq); @@ -65,6 +81,30 @@ public: void print(raw_ostream &OS) const; }; +/// \brief Analysis pass which computes \c BlockFrequencyInfo. +class BlockFrequencyAnalysis + : public AnalysisInfoMixin<BlockFrequencyAnalysis> { + friend AnalysisInfoMixin<BlockFrequencyAnalysis>; + static char PassID; + +public: + /// \brief Provide the result typedef for this analysis pass. + typedef BlockFrequencyInfo Result; + + /// \brief Run the analysis pass over a function and produce BFI. + Result run(Function &F, AnalysisManager<Function> &AM); +}; + +/// \brief Printer pass for the \c BlockFrequencyInfo results. +class BlockFrequencyPrinterPass + : public PassInfoMixin<BlockFrequencyPrinterPass> { + raw_ostream &OS; + +public: + explicit BlockFrequencyPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); +}; + /// \brief Legacy analysis pass which computes \c BlockFrequencyInfo. class BlockFrequencyInfoWrapperPass : public FunctionPass { BlockFrequencyInfo BFI; diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 387e9a887d93..7ed06b1bb68f 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -16,12 +16,16 @@ #define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/BasicBlock.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/ScaledNumber.h" #include "llvm/Support/raw_ostream.h" #include <deque> @@ -476,6 +480,8 @@ public: Scaled64 getFloatingBlockFreq(const BlockNode &Node) const; BlockFrequency getBlockFreq(const BlockNode &Node) const; + Optional<uint64_t> getBlockProfileCount(const Function &F, + const BlockNode &Node) const; void setBlockFreq(const BlockNode &Node, uint64_t Freq); @@ -915,11 +921,17 @@ public: BlockFrequency getBlockFreq(const BlockT *BB) const { return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); } + Optional<uint64_t> getBlockProfileCount(const Function &F, + const BlockT *BB) const { + return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB)); + } void setBlockFreq(const BlockT *BB, uint64_t Freq); Scaled64 getFloatingBlockFreq(const BlockT *BB) const { return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB)); } + const BranchProbabilityInfoT &getBPI() const { return *BPI; } + /// \brief Print the frequencies for the current function. /// /// Prints the frequencies for the blocks in the current function. @@ -1173,12 +1185,10 @@ void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass( updateLoopWithIrreducible(*OuterLoop); } -namespace { // A helper function that converts a branch probability into weight. inline uint32_t getWeightFromBranchProb(const BranchProbability Prob) { return Prob.getNumerator(); } -} // namespace template <class BT> bool @@ -1224,6 +1234,115 @@ raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const { return OS; } +// Graph trait base class for block frequency information graph +// viewer. + +enum GVDAGType { GVDT_None, GVDT_Fraction, GVDT_Integer, GVDT_Count }; + +template <class BlockFrequencyInfoT, class BranchProbabilityInfoT> +struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits { + explicit BFIDOTGraphTraitsBase(bool isSimple = false) + : DefaultDOTGraphTraits(isSimple) {} + + typedef GraphTraits<BlockFrequencyInfoT *> GTraits; + typedef typename GTraits::NodeType NodeType; + typedef typename GTraits::ChildIteratorType EdgeIter; + typedef typename GTraits::nodes_iterator NodeIter; + + uint64_t MaxFrequency = 0; + static std::string getGraphName(const BlockFrequencyInfoT *G) { + return G->getFunction()->getName(); + } + + std::string getNodeAttributes(const NodeType *Node, + const BlockFrequencyInfoT *Graph, + unsigned HotPercentThreshold = 0) { + std::string Result; + if (!HotPercentThreshold) + return Result; + + // Compute MaxFrequency on the fly: + if (!MaxFrequency) { + for (NodeIter I = GTraits::nodes_begin(Graph), + E = GTraits::nodes_end(Graph); + I != E; ++I) { + NodeType &N = *I; + MaxFrequency = + std::max(MaxFrequency, Graph->getBlockFreq(&N).getFrequency()); + } + } + BlockFrequency Freq = Graph->getBlockFreq(Node); + BlockFrequency HotFreq = + (BlockFrequency(MaxFrequency) * + BranchProbability::getBranchProbability(HotPercentThreshold, 100)); + + if (Freq < HotFreq) + return Result; + + raw_string_ostream OS(Result); + OS << "color=\"red\""; + OS.flush(); + return Result; + } + + std::string getNodeLabel(const NodeType *Node, + const BlockFrequencyInfoT *Graph, GVDAGType GType) { + std::string Result; + raw_string_ostream OS(Result); + + OS << Node->getName().str() << " : "; + switch (GType) { + case GVDT_Fraction: + Graph->printBlockFreq(OS, Node); + break; + case GVDT_Integer: + OS << Graph->getBlockFreq(Node).getFrequency(); + break; + case GVDT_Count: { + auto Count = Graph->getBlockProfileCount(Node); + if (Count) + OS << Count.getValue(); + else + OS << "Unknown"; + break; + } + case GVDT_None: + llvm_unreachable("If we are not supposed to render a graph we should " + "never reach this point."); + } + return Result; + } + + std::string getEdgeAttributes(const NodeType *Node, EdgeIter EI, + const BlockFrequencyInfoT *BFI, + const BranchProbabilityInfoT *BPI, + unsigned HotPercentThreshold = 0) { + std::string Str; + if (!BPI) + return Str; + + BranchProbability BP = BPI->getEdgeProbability(Node, EI); + uint32_t N = BP.getNumerator(); + uint32_t D = BP.getDenominator(); + double Percent = 100.0 * N / D; + raw_string_ostream OS(Str); + OS << format("label=\"%.1f%%\"", Percent); + + if (HotPercentThreshold) { + BlockFrequency EFreq = BFI->getBlockFreq(Node) * BP; + BlockFrequency HotFreq = BlockFrequency(MaxFrequency) * + BranchProbability(HotPercentThreshold, 100); + + if (EFreq >= HotFreq) { + OS << ",color=\"red\""; + } + } + + OS.flush(); + return Str; + } +}; + } // end namespace llvm #undef DEBUG_TYPE diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h index cfdf218491bd..6434ba962ebc 100644 --- a/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/include/llvm/Analysis/BranchProbabilityInfo.h @@ -15,8 +15,11 @@ #define LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" @@ -40,7 +43,22 @@ class raw_ostream; class BranchProbabilityInfo { public: BranchProbabilityInfo() {} - BranchProbabilityInfo(Function &F, const LoopInfo &LI) { calculate(F, LI); } + BranchProbabilityInfo(const Function &F, const LoopInfo &LI) { + calculate(F, LI); + } + + BranchProbabilityInfo(BranchProbabilityInfo &&Arg) + : Probs(std::move(Arg.Probs)), LastF(Arg.LastF), + PostDominatedByUnreachable(std::move(Arg.PostDominatedByUnreachable)), + PostDominatedByColdCall(std::move(Arg.PostDominatedByColdCall)) {} + + BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) { + releaseMemory(); + Probs = std::move(RHS.Probs); + PostDominatedByColdCall = std::move(RHS.PostDominatedByColdCall); + PostDominatedByUnreachable = std::move(RHS.PostDominatedByUnreachable); + return *this; + } void releaseMemory(); @@ -74,7 +92,7 @@ public: /// /// Given a basic block, look through its successors and if one exists for /// which \see isEdgeHot would return true, return that successor block. - BasicBlock *getHotSucc(BasicBlock *BB) const; + const BasicBlock *getHotSucc(const BasicBlock *BB) const; /// \brief Print an edge's probability. /// @@ -98,9 +116,31 @@ public: return IsLikely ? LikelyProb : LikelyProb.getCompl(); } - void calculate(Function &F, const LoopInfo& LI); + void calculate(const Function &F, const LoopInfo &LI); + + /// Forget analysis results for the given basic block. + void eraseBlock(const BasicBlock *BB); private: + void operator=(const BranchProbabilityInfo &) = delete; + BranchProbabilityInfo(const BranchProbabilityInfo &) = delete; + + // We need to store CallbackVH's in order to correctly handle basic block + // removal. + class BasicBlockCallbackVH final : public CallbackVH { + BranchProbabilityInfo *BPI; + void deleted() override { + assert(BPI != nullptr); + BPI->eraseBlock(cast<BasicBlock>(getValPtr())); + BPI->Handles.erase(*this); + } + + public: + BasicBlockCallbackVH(const Value *V, BranchProbabilityInfo *BPI=nullptr) + : CallbackVH(const_cast<Value *>(V)), BPI(BPI) {} + }; + DenseSet<BasicBlockCallbackVH, DenseMapInfo<Value*>> Handles; + // Since we allow duplicate edges from one basic block to another, we use // a pair (PredBlock and an index in the successors) to specify an edge. typedef std::pair<const BasicBlock *, unsigned> Edge; @@ -116,22 +156,46 @@ private: DenseMap<Edge, BranchProbability> Probs; /// \brief Track the last function we run over for printing. - Function *LastF; + const Function *LastF; /// \brief Track the set of blocks directly succeeded by a returning block. - SmallPtrSet<BasicBlock *, 16> PostDominatedByUnreachable; + SmallPtrSet<const BasicBlock *, 16> PostDominatedByUnreachable; /// \brief Track the set of blocks that always lead to a cold call. - SmallPtrSet<BasicBlock *, 16> PostDominatedByColdCall; - - bool calcUnreachableHeuristics(BasicBlock *BB); - bool calcMetadataWeights(BasicBlock *BB); - bool calcColdCallHeuristics(BasicBlock *BB); - bool calcPointerHeuristics(BasicBlock *BB); - bool calcLoopBranchHeuristics(BasicBlock *BB, const LoopInfo &LI); - bool calcZeroHeuristics(BasicBlock *BB); - bool calcFloatingPointHeuristics(BasicBlock *BB); - bool calcInvokeHeuristics(BasicBlock *BB); + SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall; + + bool calcUnreachableHeuristics(const BasicBlock *BB); + bool calcMetadataWeights(const BasicBlock *BB); + bool calcColdCallHeuristics(const BasicBlock *BB); + bool calcPointerHeuristics(const BasicBlock *BB); + bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI); + bool calcZeroHeuristics(const BasicBlock *BB); + bool calcFloatingPointHeuristics(const BasicBlock *BB); + bool calcInvokeHeuristics(const BasicBlock *BB); +}; + +/// \brief Analysis pass which computes \c BranchProbabilityInfo. +class BranchProbabilityAnalysis + : public AnalysisInfoMixin<BranchProbabilityAnalysis> { + friend AnalysisInfoMixin<BranchProbabilityAnalysis>; + static char PassID; + +public: + /// \brief Provide the result typedef for this analysis pass. + typedef BranchProbabilityInfo Result; + + /// \brief Run the analysis pass over a function and produce BPI. + BranchProbabilityInfo run(Function &F, AnalysisManager<Function> &AM); +}; + +/// \brief Printer pass for the \c BranchProbabilityAnalysis results. +class BranchProbabilityPrinterPass + : public PassInfoMixin<BranchProbabilityPrinterPass> { + raw_ostream &OS; + +public: + explicit BranchProbabilityPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); }; /// \brief Legacy analysis pass which computes \c BranchProbabilityInfo. diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h new file mode 100644 index 000000000000..48eca888419a --- /dev/null +++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h @@ -0,0 +1,138 @@ +//=- CFLAndersAliasAnalysis.h - Unification-based Alias Analysis ---*- C++-*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for LLVM's inclusion-based alias analysis +/// implemented with CFL graph reachability. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CFLANDERSALIASANALYSIS_H +#define LLVM_ANALYSIS_CFLANDERSALIASANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include <forward_list> + +namespace llvm { + +class TargetLibraryInfo; + +namespace cflaa { +struct AliasSummary; +} + +class CFLAndersAAResult : public AAResultBase<CFLAndersAAResult> { + friend AAResultBase<CFLAndersAAResult>; + class FunctionInfo; + +public: + explicit CFLAndersAAResult(const TargetLibraryInfo &); + CFLAndersAAResult(CFLAndersAAResult &&); + ~CFLAndersAAResult(); + + /// Handle invalidation events from the new pass manager. + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &) { return false; } + /// Evict the given function from cache + void evict(const Function &Fn); + + /// \brief Get the alias summary for the given function + /// Return nullptr if the summary is not found or not available + const cflaa::AliasSummary *getAliasSummary(const Function &); + + AliasResult query(const MemoryLocation &, const MemoryLocation &); + AliasResult alias(const MemoryLocation &, const MemoryLocation &); + +private: + struct FunctionHandle final : public CallbackVH { + FunctionHandle(Function *Fn, CFLAndersAAResult *Result) + : CallbackVH(Fn), Result(Result) { + assert(Fn != nullptr); + assert(Result != nullptr); + } + + void deleted() override { removeSelfFromCache(); } + void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } + + private: + CFLAndersAAResult *Result; + + void removeSelfFromCache() { + assert(Result != nullptr); + auto *Val = getValPtr(); + Result->evict(*cast<Function>(Val)); + setValPtr(nullptr); + } + }; + + /// \brief Ensures that the given function is available in the cache. + /// Returns the appropriate entry from the cache. + const Optional<FunctionInfo> &ensureCached(const Function &); + + /// \brief Inserts the given Function into the cache. + void scan(const Function &); + + /// \brief Build summary for a given function + FunctionInfo buildInfoFrom(const Function &); + + const TargetLibraryInfo &TLI; + + /// \brief Cached mapping of Functions to their StratifiedSets. + /// If a function's sets are currently being built, it is marked + /// in the cache as an Optional without a value. This way, if we + /// have any kind of recursion, it is discernable from a function + /// that simply has empty sets. + DenseMap<const Function *, Optional<FunctionInfo>> Cache; + + std::forward_list<FunctionHandle> Handles; +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +/// +/// FIXME: We really should refactor CFL to use the analysis more heavily, and +/// in particular to leverage invalidation to trigger re-computation. +class CFLAndersAA : public AnalysisInfoMixin<CFLAndersAA> { + friend AnalysisInfoMixin<CFLAndersAA>; + static char PassID; + +public: + typedef CFLAndersAAResult Result; + + CFLAndersAAResult run(Function &F, AnalysisManager<Function> &AM); +}; + +/// Legacy wrapper pass to provide the CFLAndersAAResult object. +class CFLAndersAAWrapperPass : public ImmutablePass { + std::unique_ptr<CFLAndersAAResult> Result; + +public: + static char ID; + + CFLAndersAAWrapperPass(); + + CFLAndersAAResult &getResult() { return *Result; } + const CFLAndersAAResult &getResult() const { return *Result; } + + void initializePass() override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +//===--------------------------------------------------------------------===// +// +// createCFLAndersAAWrapperPass - This pass implements a set-based approach to +// alias analysis. +// +ImmutablePass *createCFLAndersAAWrapperPass(); +} + +#endif diff --git a/include/llvm/Analysis/CFLAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h index 7473a454ab30..80a00d02b811 100644 --- a/include/llvm/Analysis/CFLAliasAnalysis.h +++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h @@ -1,4 +1,4 @@ -//===- CFLAliasAnalysis.h - CFL-Based Alias Analysis Interface ---*- C++ -*-==// +//=- CFLSteensAliasAnalysis.h - Unification-based Alias Analysis ---*- C++-*-=// // // The LLVM Compiler Infrastructure // @@ -7,17 +7,18 @@ // //===----------------------------------------------------------------------===// /// \file -/// This is the interface for LLVM's primary stateless and local alias analysis. +/// This is the interface for LLVM's unification-based alias analysis +/// implemented with CFL graph reachability. /// //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_CFLALIASANALYSIS_H -#define LLVM_ANALYSIS_CFLALIASANALYSIS_H +#ifndef LLVM_ANALYSIS_CFLSTEENSALIASANALYSIS_H +#define LLVM_ANALYSIS_CFLSTEENSALIASANALYSIS_H -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" @@ -26,14 +27,20 @@ namespace llvm { -class CFLAAResult : public AAResultBase<CFLAAResult> { - friend AAResultBase<CFLAAResult>; +class TargetLibraryInfo; - struct FunctionInfo; +namespace cflaa { +struct AliasSummary; +} + +class CFLSteensAAResult : public AAResultBase<CFLSteensAAResult> { + friend AAResultBase<CFLSteensAAResult>; + class FunctionInfo; public: - explicit CFLAAResult(const TargetLibraryInfo &TLI); - CFLAAResult(CFLAAResult &&Arg); + explicit CFLSteensAAResult(const TargetLibraryInfo &); + CFLSteensAAResult(CFLSteensAAResult &&Arg); + ~CFLSteensAAResult(); /// Handle invalidation events from the new pass manager. /// @@ -49,26 +56,23 @@ public: /// Returns the appropriate entry from the cache. const Optional<FunctionInfo> &ensureCached(Function *Fn); + /// \brief Get the alias summary for the given function + /// Return nullptr if the summary is not found or not available + const cflaa::AliasSummary *getAliasSummary(Function &Fn); + AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB); AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { - if (LocA.Ptr == LocB.Ptr) { - if (LocA.Size == LocB.Size) { - return MustAlias; - } else { - return PartialAlias; - } - } + if (LocA.Ptr == LocB.Ptr) + return LocA.Size == LocB.Size ? MustAlias : PartialAlias; // Comparisons between global variables and other constants should be // handled by BasicAA. - // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing - // a GlobalValue and ConstantExpr, but every query needs to have at least - // one Value tied to a Function, and neither GlobalValues nor ConstantExprs - // are. - if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) { + // CFLSteensAA may report NoAlias when comparing a GlobalValue and + // ConstantExpr, but every query needs to have at least one Value tied to a + // Function, and neither GlobalValues nor ConstantExprs are. + if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) return AAResultBase::alias(LocA, LocB); - } AliasResult QueryResult = query(LocA, LocB); if (QueryResult == MayAlias) @@ -77,9 +81,19 @@ public: return QueryResult; } + /// Get the location associated with a pointer argument of a callsite. + ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx); + + /// Returns the behavior when calling the given call site. + FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS); + + /// Returns the behavior when calling the given function. For use when the + /// call site is not known. + FunctionModRefBehavior getModRefBehavior(const Function *F); + private: struct FunctionHandle final : public CallbackVH { - FunctionHandle(Function *Fn, CFLAAResult *Result) + FunctionHandle(Function *Fn, CFLSteensAAResult *Result) : CallbackVH(Fn), Result(Result) { assert(Fn != nullptr); assert(Result != nullptr); @@ -89,7 +103,7 @@ private: void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } private: - CFLAAResult *Result; + CFLSteensAAResult *Result; void removeSelfFromCache() { assert(Result != nullptr); @@ -99,6 +113,8 @@ private: } }; + const TargetLibraryInfo &TLI; + /// \brief Cached mapping of Functions to their StratifiedSets. /// If a function's sets are currently being built, it is marked /// in the cache as an Optional without a value. This way, if we @@ -114,45 +130,38 @@ private: /// /// FIXME: We really should refactor CFL to use the analysis more heavily, and /// in particular to leverage invalidation to trigger re-computation of sets. -class CFLAA { -public: - typedef CFLAAResult Result; - - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - CFLAAResult run(Function &F, AnalysisManager<Function> *AM); +class CFLSteensAA : public AnalysisInfoMixin<CFLSteensAA> { + friend AnalysisInfoMixin<CFLSteensAA>; + static char PassID; - /// \brief Provide access to a name for this pass for debugging purposes. - static StringRef name() { return "CFLAA"; } +public: + typedef CFLSteensAAResult Result; -private: - static char PassID; + CFLSteensAAResult run(Function &F, AnalysisManager<Function> &AM); }; -/// Legacy wrapper pass to provide the CFLAAResult object. -class CFLAAWrapperPass : public ImmutablePass { - std::unique_ptr<CFLAAResult> Result; +/// Legacy wrapper pass to provide the CFLSteensAAResult object. +class CFLSteensAAWrapperPass : public ImmutablePass { + std::unique_ptr<CFLSteensAAResult> Result; public: static char ID; - CFLAAWrapperPass(); + CFLSteensAAWrapperPass(); - CFLAAResult &getResult() { return *Result; } - const CFLAAResult &getResult() const { return *Result; } + CFLSteensAAResult &getResult() { return *Result; } + const CFLSteensAAResult &getResult() const { return *Result; } - bool doInitialization(Module &M) override; - bool doFinalization(Module &M) override; + void initializePass() override; void getAnalysisUsage(AnalysisUsage &AU) const override; }; //===--------------------------------------------------------------------===// // -// createCFLAAWrapperPass - This pass implements a set-based approach to +// createCFLSteensAAWrapperPass - This pass implements a set-based approach to // alias analysis. // -ImmutablePass *createCFLAAWrapperPass(); +ImmutablePass *createCFLSteensAAWrapperPass(); } #endif diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h index e7635eb1ab67..3263ecec4e26 100644 --- a/include/llvm/Analysis/CGSCCPassManager.h +++ b/include/llvm/Analysis/CGSCCPassManager.h @@ -11,7 +11,7 @@ /// This header provides classes for managing passes over SCCs of the call /// graph. These passes form an important component of LLVM's interprocedural /// optimizations. Because they operate on the SCCs of the call graph, and they -/// wtraverse the graph in post order, they can effectively do pair-wise +/// traverse the graph in post order, they can effectively do pair-wise /// interprocedural optimizations for all call edges in the program. At each /// call site edge, the callee has already been optimized as much as is /// possible. This in turn allows very accurate analysis of it for IPO. @@ -26,6 +26,7 @@ namespace llvm { +extern template class PassManager<LazyCallGraph::SCC>; /// \brief The CGSCC pass manager. /// /// See the documentation for the PassManager template for details. It runs @@ -33,6 +34,7 @@ namespace llvm { /// typedef serves as a convenient way to refer to this construct. typedef PassManager<LazyCallGraph::SCC> CGSCCPassManager; +extern template class AnalysisManager<LazyCallGraph::SCC>; /// \brief The CGSCC analysis manager. /// /// See the documentation for the AnalysisManager template for detail @@ -41,147 +43,16 @@ typedef PassManager<LazyCallGraph::SCC> CGSCCPassManager; /// pass manager infrastructure. typedef AnalysisManager<LazyCallGraph::SCC> CGSCCAnalysisManager; -/// \brief A module analysis which acts as a proxy for a CGSCC analysis -/// manager. -/// -/// This primarily proxies invalidation information from the module analysis -/// manager and module pass manager to a CGSCC analysis manager. You should -/// never use a CGSCC analysis manager from within (transitively) a module -/// pass manager unless your parent module pass has received a proxy result -/// object for it. -class CGSCCAnalysisManagerModuleProxy { -public: - class Result { - public: - explicit Result(CGSCCAnalysisManager &CGAM) : CGAM(&CGAM) {} - // We have to explicitly define all the special member functions because - // MSVC refuses to generate them. - Result(const Result &Arg) : CGAM(Arg.CGAM) {} - Result(Result &&Arg) : CGAM(std::move(Arg.CGAM)) {} - Result &operator=(Result RHS) { - std::swap(CGAM, RHS.CGAM); - return *this; - } - ~Result(); - - /// \brief Accessor for the \c CGSCCAnalysisManager. - CGSCCAnalysisManager &getManager() { return *CGAM; } - - /// \brief Handler for invalidation of the module. - /// - /// If this analysis itself is preserved, then we assume that the call - /// graph of the module hasn't changed and thus we don't need to invalidate - /// *all* cached data associated with a \c SCC* in the \c - /// CGSCCAnalysisManager. - /// - /// Regardless of whether this analysis is marked as preserved, all of the - /// analyses in the \c CGSCCAnalysisManager are potentially invalidated - /// based on the set of preserved analyses. - bool invalidate(Module &M, const PreservedAnalyses &PA); - - private: - CGSCCAnalysisManager *CGAM; - }; - - static void *ID() { return (void *)&PassID; } - - static StringRef name() { return "CGSCCAnalysisManagerModuleProxy"; } - - explicit CGSCCAnalysisManagerModuleProxy(CGSCCAnalysisManager &CGAM) - : CGAM(&CGAM) {} - // We have to explicitly define all the special member functions because MSVC - // refuses to generate them. - CGSCCAnalysisManagerModuleProxy(const CGSCCAnalysisManagerModuleProxy &Arg) - : CGAM(Arg.CGAM) {} - CGSCCAnalysisManagerModuleProxy(CGSCCAnalysisManagerModuleProxy &&Arg) - : CGAM(std::move(Arg.CGAM)) {} - CGSCCAnalysisManagerModuleProxy & - operator=(CGSCCAnalysisManagerModuleProxy RHS) { - std::swap(CGAM, RHS.CGAM); - return *this; - } - - /// \brief Run the analysis pass and create our proxy result object. - /// - /// This doesn't do any interesting work, it is primarily used to insert our - /// proxy result object into the module analysis cache so that we can proxy - /// invalidation to the CGSCC analysis manager. - /// - /// In debug builds, it will also assert that the analysis manager is empty - /// as no queries should arrive at the CGSCC analysis manager prior to - /// this analysis being requested. - Result run(Module &M); +extern template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>; +/// A proxy from a \c CGSCCAnalysisManager to a \c Module. +typedef InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module> + CGSCCAnalysisManagerModuleProxy; -private: - static char PassID; - - CGSCCAnalysisManager *CGAM; -}; - -/// \brief A CGSCC analysis which acts as a proxy for a module analysis -/// manager. -/// -/// This primarily provides an accessor to a parent module analysis manager to -/// CGSCC passes. Only the const interface of the module analysis manager is -/// provided to indicate that once inside of a CGSCC analysis pass you -/// cannot request a module analysis to actually run. Instead, the user must -/// rely on the \c getCachedResult API. -/// -/// This proxy *doesn't* manage the invalidation in any way. That is handled by -/// the recursive return path of each layer of the pass manager and the -/// returned PreservedAnalysis set. -class ModuleAnalysisManagerCGSCCProxy { -public: - /// \brief Result proxy object for \c ModuleAnalysisManagerCGSCCProxy. - class Result { - public: - explicit Result(const ModuleAnalysisManager &MAM) : MAM(&MAM) {} - // We have to explicitly define all the special member functions because - // MSVC refuses to generate them. - Result(const Result &Arg) : MAM(Arg.MAM) {} - Result(Result &&Arg) : MAM(std::move(Arg.MAM)) {} - Result &operator=(Result RHS) { - std::swap(MAM, RHS.MAM); - return *this; - } - - const ModuleAnalysisManager &getManager() const { return *MAM; } - - /// \brief Handle invalidation by ignoring it, this pass is immutable. - bool invalidate(LazyCallGraph::SCC &) { return false; } - - private: - const ModuleAnalysisManager *MAM; - }; - - static void *ID() { return (void *)&PassID; } - - static StringRef name() { return "ModuleAnalysisManagerCGSCCProxy"; } - - ModuleAnalysisManagerCGSCCProxy(const ModuleAnalysisManager &MAM) - : MAM(&MAM) {} - // We have to explicitly define all the special member functions because MSVC - // refuses to generate them. - ModuleAnalysisManagerCGSCCProxy(const ModuleAnalysisManagerCGSCCProxy &Arg) - : MAM(Arg.MAM) {} - ModuleAnalysisManagerCGSCCProxy(ModuleAnalysisManagerCGSCCProxy &&Arg) - : MAM(std::move(Arg.MAM)) {} - ModuleAnalysisManagerCGSCCProxy & - operator=(ModuleAnalysisManagerCGSCCProxy RHS) { - std::swap(MAM, RHS.MAM); - return *this; - } - - /// \brief Run the analysis pass and create our proxy result object. - /// Nothing to see here, it just forwards the \c MAM reference into the - /// result. - Result run(LazyCallGraph::SCC &) { return Result(*MAM); } - -private: - static char PassID; - - const ModuleAnalysisManager *MAM; -}; +extern template class OuterAnalysisManagerProxy<ModuleAnalysisManager, + LazyCallGraph::SCC>; +/// A proxy from a \c ModuleAnalysisManager to an \c SCC. +typedef OuterAnalysisManagerProxy<ModuleAnalysisManager, LazyCallGraph::SCC> + ModuleAnalysisManagerCGSCCProxy; /// \brief The core module pass which does a post-order walk of the SCCs and /// runs a CGSCC pass over each one. @@ -192,21 +63,24 @@ private: /// \c CGSCCAnalysisManagerModuleProxy analysis prior to running the CGSCC /// pass over the module to enable a \c FunctionAnalysisManager to be used /// within this run safely. -template <typename CGSCCPassT> class ModuleToPostOrderCGSCCPassAdaptor { +template <typename CGSCCPassT> +class ModuleToPostOrderCGSCCPassAdaptor + : public PassInfoMixin<ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>> { public: - explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) - : Pass(std::move(Pass)) {} + explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass, bool DebugLogging = false) + : Pass(std::move(Pass)), DebugLogging(DebugLogging) {} // We have to explicitly define all the special member functions because MSVC // refuses to generate them. ModuleToPostOrderCGSCCPassAdaptor( const ModuleToPostOrderCGSCCPassAdaptor &Arg) - : Pass(Arg.Pass) {} + : Pass(Arg.Pass), DebugLogging(Arg.DebugLogging) {} ModuleToPostOrderCGSCCPassAdaptor(ModuleToPostOrderCGSCCPassAdaptor &&Arg) - : Pass(std::move(Arg.Pass)) {} + : Pass(std::move(Arg.Pass)), DebugLogging(Arg.DebugLogging) {} friend void swap(ModuleToPostOrderCGSCCPassAdaptor &LHS, ModuleToPostOrderCGSCCPassAdaptor &RHS) { using std::swap; swap(LHS.Pass, RHS.Pass); + swap(LHS.DebugLogging, RHS.DebugLogging); } ModuleToPostOrderCGSCCPassAdaptor & operator=(ModuleToPostOrderCGSCCPassAdaptor RHS) { @@ -215,33 +89,36 @@ public: } /// \brief Runs the CGSCC pass across every SCC in the module. - PreservedAnalyses run(Module &M, ModuleAnalysisManager *AM) { - assert(AM && "We need analyses to compute the call graph!"); - + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) { // Setup the CGSCC analysis manager from its proxy. CGSCCAnalysisManager &CGAM = - AM->getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager(); + AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager(); // Get the call graph for this module. - LazyCallGraph &CG = AM->getResult<LazyCallGraphAnalysis>(M); + LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M); PreservedAnalyses PA = PreservedAnalyses::all(); - for (LazyCallGraph::SCC &C : CG.postorder_sccs()) { - PreservedAnalyses PassPA = Pass.run(C, &CGAM); - - // We know that the CGSCC pass couldn't have invalidated any other - // SCC's analyses (that's the contract of a CGSCC pass), so - // directly handle the CGSCC analysis manager's invalidation here. We - // also update the preserved set of analyses to reflect that invalidated - // analyses are now safe to preserve. - // FIXME: This isn't quite correct. We need to handle the case where the - // pass updated the CG, particularly some child of the current SCC, and - // invalidate its analyses. - PassPA = CGAM.invalidate(C, std::move(PassPA)); - - // Then intersect the preserved set so that invalidation of module - // analyses will eventually occur when the module pass completes. - PA.intersect(std::move(PassPA)); + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { + if (DebugLogging) + dbgs() << "Running an SCC pass across the RefSCC: " << RC << "\n"; + + for (LazyCallGraph::SCC &C : RC) { + PreservedAnalyses PassPA = Pass.run(C, CGAM); + + // We know that the CGSCC pass couldn't have invalidated any other + // SCC's analyses (that's the contract of a CGSCC pass), so + // directly handle the CGSCC analysis manager's invalidation here. We + // also update the preserved set of analyses to reflect that invalidated + // analyses are now safe to preserve. + // FIXME: This isn't quite correct. We need to handle the case where the + // pass updated the CG, particularly some child of the current SCC, and + // invalidate its analyses. + PassPA = CGAM.invalidate(C, std::move(PassPA)); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + PA.intersect(std::move(PassPA)); + } } // By definition we preserve the proxy. This precludes *any* invalidation @@ -252,163 +129,29 @@ public: return PA; } - static StringRef name() { return "ModuleToPostOrderCGSCCPassAdaptor"; } - private: CGSCCPassT Pass; + bool DebugLogging; }; /// \brief A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename CGSCCPassT> ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT> -createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) { - return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass)); +createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass, bool DebugLogging = false) { + return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass), DebugLogging); } -/// \brief A CGSCC analysis which acts as a proxy for a function analysis -/// manager. -/// -/// This primarily proxies invalidation information from the CGSCC analysis -/// manager and CGSCC pass manager to a function analysis manager. You should -/// never use a function analysis manager from within (transitively) a CGSCC -/// pass manager unless your parent CGSCC pass has received a proxy result -/// object for it. -class FunctionAnalysisManagerCGSCCProxy { -public: - class Result { - public: - explicit Result(FunctionAnalysisManager &FAM) : FAM(&FAM) {} - // We have to explicitly define all the special member functions because - // MSVC refuses to generate them. - Result(const Result &Arg) : FAM(Arg.FAM) {} - Result(Result &&Arg) : FAM(std::move(Arg.FAM)) {} - Result &operator=(Result RHS) { - std::swap(FAM, RHS.FAM); - return *this; - } - ~Result(); - - /// \brief Accessor for the \c FunctionAnalysisManager. - FunctionAnalysisManager &getManager() { return *FAM; } - - /// \brief Handler for invalidation of the SCC. - /// - /// If this analysis itself is preserved, then we assume that the set of \c - /// Function objects in the \c SCC hasn't changed and thus we don't need - /// to invalidate *all* cached data associated with a \c Function* in the \c - /// FunctionAnalysisManager. - /// - /// Regardless of whether this analysis is marked as preserved, all of the - /// analyses in the \c FunctionAnalysisManager are potentially invalidated - /// based on the set of preserved analyses. - bool invalidate(LazyCallGraph::SCC &C, const PreservedAnalyses &PA); - - private: - FunctionAnalysisManager *FAM; - }; - - static void *ID() { return (void *)&PassID; } - - static StringRef name() { return "FunctionAnalysisManagerCGSCCProxy"; } - - explicit FunctionAnalysisManagerCGSCCProxy(FunctionAnalysisManager &FAM) - : FAM(&FAM) {} - // We have to explicitly define all the special member functions because MSVC - // refuses to generate them. - FunctionAnalysisManagerCGSCCProxy( - const FunctionAnalysisManagerCGSCCProxy &Arg) - : FAM(Arg.FAM) {} - FunctionAnalysisManagerCGSCCProxy(FunctionAnalysisManagerCGSCCProxy &&Arg) - : FAM(std::move(Arg.FAM)) {} - FunctionAnalysisManagerCGSCCProxy & - operator=(FunctionAnalysisManagerCGSCCProxy RHS) { - std::swap(FAM, RHS.FAM); - return *this; - } - - /// \brief Run the analysis pass and create our proxy result object. - /// - /// This doesn't do any interesting work, it is primarily used to insert our - /// proxy result object into the module analysis cache so that we can proxy - /// invalidation to the function analysis manager. - /// - /// In debug builds, it will also assert that the analysis manager is empty - /// as no queries should arrive at the function analysis manager prior to - /// this analysis being requested. - Result run(LazyCallGraph::SCC &C); - -private: - static char PassID; - - FunctionAnalysisManager *FAM; -}; - -/// \brief A function analysis which acts as a proxy for a CGSCC analysis -/// manager. -/// -/// This primarily provides an accessor to a parent CGSCC analysis manager to -/// function passes. Only the const interface of the CGSCC analysis manager is -/// provided to indicate that once inside of a function analysis pass you -/// cannot request a CGSCC analysis to actually run. Instead, the user must -/// rely on the \c getCachedResult API. -/// -/// This proxy *doesn't* manage the invalidation in any way. That is handled by -/// the recursive return path of each layer of the pass manager and the -/// returned PreservedAnalysis set. -class CGSCCAnalysisManagerFunctionProxy { -public: - /// \brief Result proxy object for \c CGSCCAnalysisManagerFunctionProxy. - class Result { - public: - explicit Result(const CGSCCAnalysisManager &CGAM) : CGAM(&CGAM) {} - // We have to explicitly define all the special member functions because - // MSVC refuses to generate them. - Result(const Result &Arg) : CGAM(Arg.CGAM) {} - Result(Result &&Arg) : CGAM(std::move(Arg.CGAM)) {} - Result &operator=(Result RHS) { - std::swap(CGAM, RHS.CGAM); - return *this; - } - - const CGSCCAnalysisManager &getManager() const { return *CGAM; } - - /// \brief Handle invalidation by ignoring it, this pass is immutable. - bool invalidate(Function &) { return false; } +extern template class InnerAnalysisManagerProxy<FunctionAnalysisManager, + LazyCallGraph::SCC>; +/// A proxy from a \c FunctionAnalysisManager to an \c SCC. +typedef InnerAnalysisManagerProxy<FunctionAnalysisManager, LazyCallGraph::SCC> + FunctionAnalysisManagerCGSCCProxy; - private: - const CGSCCAnalysisManager *CGAM; - }; - - static void *ID() { return (void *)&PassID; } - - static StringRef name() { return "CGSCCAnalysisManagerFunctionProxy"; } - - CGSCCAnalysisManagerFunctionProxy(const CGSCCAnalysisManager &CGAM) - : CGAM(&CGAM) {} - // We have to explicitly define all the special member functions because MSVC - // refuses to generate them. - CGSCCAnalysisManagerFunctionProxy( - const CGSCCAnalysisManagerFunctionProxy &Arg) - : CGAM(Arg.CGAM) {} - CGSCCAnalysisManagerFunctionProxy(CGSCCAnalysisManagerFunctionProxy &&Arg) - : CGAM(std::move(Arg.CGAM)) {} - CGSCCAnalysisManagerFunctionProxy & - operator=(CGSCCAnalysisManagerFunctionProxy RHS) { - std::swap(CGAM, RHS.CGAM); - return *this; - } - - /// \brief Run the analysis pass and create our proxy result object. - /// Nothing to see here, it just forwards the \c CGAM reference into the - /// result. - Result run(Function &) { return Result(*CGAM); } - -private: - static char PassID; - - const CGSCCAnalysisManager *CGAM; -}; +extern template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>; +/// A proxy from a \c CGSCCAnalysisManager to a \c Function. +typedef OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function> + CGSCCAnalysisManagerFunctionProxy; /// \brief Adaptor that maps from a SCC to its functions. /// @@ -418,20 +161,23 @@ private: /// \c FunctionAnalysisManagerCGSCCProxy analysis prior to running the function /// pass over the SCC to enable a \c FunctionAnalysisManager to be used /// within this run safely. -template <typename FunctionPassT> class CGSCCToFunctionPassAdaptor { +template <typename FunctionPassT> +class CGSCCToFunctionPassAdaptor + : public PassInfoMixin<CGSCCToFunctionPassAdaptor<FunctionPassT>> { public: - explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass) - : Pass(std::move(Pass)) {} + explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass, bool DebugLogging = false) + : Pass(std::move(Pass)), DebugLogging(DebugLogging) {} // We have to explicitly define all the special member functions because MSVC // refuses to generate them. CGSCCToFunctionPassAdaptor(const CGSCCToFunctionPassAdaptor &Arg) - : Pass(Arg.Pass) {} + : Pass(Arg.Pass), DebugLogging(Arg.DebugLogging) {} CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg) - : Pass(std::move(Arg.Pass)) {} + : Pass(std::move(Arg.Pass)), DebugLogging(Arg.DebugLogging) {} friend void swap(CGSCCToFunctionPassAdaptor &LHS, CGSCCToFunctionPassAdaptor &RHS) { using std::swap; swap(LHS.Pass, RHS.Pass); + swap(LHS.DebugLogging, RHS.DebugLogging); } CGSCCToFunctionPassAdaptor &operator=(CGSCCToFunctionPassAdaptor RHS) { swap(*this, RHS); @@ -439,23 +185,24 @@ public: } /// \brief Runs the function pass across every function in the module. - PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager *AM) { - FunctionAnalysisManager *FAM = nullptr; - if (AM) - // Setup the function analysis manager from its proxy. - FAM = &AM->getResult<FunctionAnalysisManagerCGSCCProxy>(C).getManager(); + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM) { + // Setup the function analysis manager from its proxy. + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C).getManager(); + + if (DebugLogging) + dbgs() << "Running function passes across an SCC: " << C << "\n"; PreservedAnalyses PA = PreservedAnalyses::all(); - for (LazyCallGraph::Node *N : C) { - PreservedAnalyses PassPA = Pass.run(N->getFunction(), FAM); + for (LazyCallGraph::Node &N : C) { + PreservedAnalyses PassPA = Pass.run(N.getFunction(), FAM); // We know that the function pass couldn't have invalidated any other // function's analyses (that's the contract of a function pass), so // directly handle the function analysis manager's invalidation here. // Also, update the preserved analyses to reflect that once invalidated // these can again be preserved. - if (FAM) - PassPA = FAM->invalidate(N->getFunction(), std::move(PassPA)); + PassPA = FAM.invalidate(N.getFunction(), std::move(PassPA)); // Then intersect the preserved set so that invalidation of module // analyses will eventually occur when the module pass completes. @@ -472,18 +219,18 @@ public: return PA; } - static StringRef name() { return "CGSCCToFunctionPassAdaptor"; } - private: FunctionPassT Pass; + bool DebugLogging; }; /// \brief A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename FunctionPassT> CGSCCToFunctionPassAdaptor<FunctionPassT> -createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) { - return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass)); +createCGSCCToFunctionPassAdaptor(FunctionPassT Pass, bool DebugLogging = false) { + return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass), + DebugLogging); } } diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h index 5562e9b9465f..4ecacb0f0be2 100644 --- a/include/llvm/Analysis/CallGraph.h +++ b/include/llvm/Analysis/CallGraph.h @@ -57,6 +57,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include <map> @@ -294,20 +295,27 @@ private: /// This class implements the concept of an analysis pass used by the \c /// ModuleAnalysisManager to run an analysis over a module and cache the /// resulting data. -class CallGraphAnalysis { +class CallGraphAnalysis : public AnalysisInfoMixin<CallGraphAnalysis> { + friend AnalysisInfoMixin<CallGraphAnalysis>; + static char PassID; + public: /// \brief A formulaic typedef to inform clients of the result type. typedef CallGraph Result; - static void *ID() { return (void *)&PassID; } - /// \brief Compute the \c CallGraph for the module \c M. /// /// The real work here is done in the \c CallGraph constructor. - CallGraph run(Module *M) { return CallGraph(*M); } + CallGraph run(Module &M, ModuleAnalysisManager &) { return CallGraph(M); } +}; -private: - static char PassID; +/// \brief Printer pass for the \c CallGraphAnalysis results. +class CallGraphPrinterPass : public PassInfoMixin<CallGraphPrinterPass> { + raw_ostream &OS; + +public: + explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM); }; /// \brief The \c ModulePass which wraps up a \c CallGraph and the logic to diff --git a/include/llvm/Analysis/CallGraphSCCPass.h b/include/llvm/Analysis/CallGraphSCCPass.h index 9c7f7bd34cce..cb35b3292be7 100644 --- a/include/llvm/Analysis/CallGraphSCCPass.h +++ b/include/llvm/Analysis/CallGraphSCCPass.h @@ -23,6 +23,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Pass.h" +#include "llvm/PassSupport.h" namespace llvm { @@ -77,15 +78,21 @@ public: /// the call graph. If the derived class implements this method, it should /// always explicitly call the implementation here. void getAnalysisUsage(AnalysisUsage &Info) const override; + +protected: + /// Optional passes call this function to check whether the pass should be + /// skipped. This is the case when optimization bisect is over the limit. + bool skipSCC(CallGraphSCC &SCC) const; }; /// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on. class CallGraphSCC { + const CallGraph &CG; // The call graph for this SCC. void *Context; // The CGPassManager object that is vending this. std::vector<CallGraphNode*> Nodes; public: - CallGraphSCC(void *context) : Context(context) {} + CallGraphSCC(CallGraph &cg, void *context) : CG(cg), Context(context) {} void initialize(CallGraphNode *const *I, CallGraphNode *const *E) { Nodes.assign(I, E); @@ -101,6 +108,25 @@ public: typedef std::vector<CallGraphNode *>::const_iterator iterator; iterator begin() const { return Nodes.begin(); } iterator end() const { return Nodes.end(); } + + const CallGraph &getCallGraph() { return CG; } +}; + +void initializeDummyCGSCCPassPass(PassRegistry &); + +/// This pass is required by interprocedural register allocation. It forces +/// codegen to follow bottom up order on call graph. +class DummyCGSCCPass : public CallGraphSCCPass { +public: + static char ID; + DummyCGSCCPass() : CallGraphSCCPass(ID) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeDummyCGSCCPassPass(Registry); + }; + bool runOnSCC(CallGraphSCC &SCC) override { return false; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } }; } // End llvm namespace diff --git a/include/llvm/Analysis/CallPrinter.h b/include/llvm/Analysis/CallPrinter.h index 5f5d160c3ca0..8b697d5aa149 100644 --- a/include/llvm/Analysis/CallPrinter.h +++ b/include/llvm/Analysis/CallPrinter.h @@ -17,10 +17,10 @@ namespace llvm { - class ModulePass; +class ModulePass; - ModulePass *createCallGraphViewerPass(); - ModulePass *createCallGraphPrinterPass(); +ModulePass *createCallGraphViewerPass(); +ModulePass *createCallGraphDOTPrinterPass(); } // end namespace llvm diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h index 2f5969129e02..f512aca57865 100644 --- a/include/llvm/Analysis/CodeMetrics.h +++ b/include/llvm/Analysis/CodeMetrics.h @@ -42,50 +42,48 @@ bool callIsSmall(ImmutableCallSite CS); struct CodeMetrics { /// \brief True if this function contains a call to setjmp or other functions /// with attribute "returns twice" without having the attribute itself. - bool exposesReturnsTwice; + bool exposesReturnsTwice = false; /// \brief True if this function calls itself. - bool isRecursive; + bool isRecursive = false; /// \brief True if this function cannot be duplicated. /// /// True if this function contains one or more indirect branches, or it contains /// one or more 'noduplicate' instructions. - bool notDuplicatable; + bool notDuplicatable = false; + + /// \brief True if this function contains a call to a convergent function. + bool convergent = false; /// \brief True if this function calls alloca (in the C sense). - bool usesDynamicAlloca; + bool usesDynamicAlloca = false; /// \brief Number of instructions in the analyzed blocks. - unsigned NumInsts; + unsigned NumInsts = false; /// \brief Number of analyzed blocks. - unsigned NumBlocks; + unsigned NumBlocks = false; /// \brief Keeps track of basic block code size estimates. DenseMap<const BasicBlock *, unsigned> NumBBInsts; /// \brief Keep track of the number of calls to 'big' functions. - unsigned NumCalls; + unsigned NumCalls = false; /// \brief The number of calls to internal functions with a single caller. /// /// These are likely targets for future inlining, likely exposed by /// interleaved devirtualization. - unsigned NumInlineCandidates; + unsigned NumInlineCandidates = 0; /// \brief How many instructions produce vector values. /// /// The inliner is more aggressive with inlining vector kernels. - unsigned NumVectorInsts; + unsigned NumVectorInsts = 0; /// \brief How many 'ret' instructions the blocks contain. - unsigned NumRets; - - CodeMetrics() - : exposesReturnsTwice(false), isRecursive(false), notDuplicatable(false), - usesDynamicAlloca(false), NumInsts(0), NumBlocks(0), NumCalls(0), - NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {} + unsigned NumRets = 0; /// \brief Add information about a block to the current state. void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h index e8185b3b6307..b1504004d83c 100644 --- a/include/llvm/Analysis/ConstantFolding.h +++ b/include/llvm/Analysis/ConstantFolding.h @@ -21,30 +21,46 @@ #define LLVM_ANALYSIS_CONSTANTFOLDING_H namespace llvm { - class Constant; - class ConstantExpr; - class Instruction; - class DataLayout; - class TargetLibraryInfo; - class Function; - class Type; - template<typename T> - class ArrayRef; +class APInt; +template <typename T> class ArrayRef; +class Constant; +class ConstantExpr; +class DataLayout; +class Function; +class GlobalValue; +class Instruction; +class TargetLibraryInfo; +class Type; + +/// If this constant is a constant offset from a global, return the global and +/// the constant. Because of constantexprs, this function is recursive. +bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, + const DataLayout &DL); /// ConstantFoldInstruction - Try to constant fold the specified instruction. /// If successful, the constant result is returned, if not, null is returned. /// Note that this fails if not all of the operands are constant. Otherwise, /// this function can only fail when attempting to fold instructions like loads /// and stores, which have no constant expression form. - Constant *ConstantFoldInstruction(Instruction *I, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr); +Constant *ConstantFoldInstruction(Instruction *I, const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr); /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified DataLayout. If successful, the constant result is /// result is returned, if not, null is returned. - Constant * - ConstantFoldConstantExpression(const ConstantExpr *CE, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr); +Constant * +ConstantFoldConstantExpression(const ConstantExpr *CE, const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr); + +/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the +/// specified operands. If successful, the constant result is returned, if not, +/// null is returned. Note that this function can fail when attempting to +/// fold instructions like loads and stores, which have no constant expression +/// form. +/// +Constant *ConstantFoldInstOperands(Instruction *I, ArrayRef<Constant *> Ops, + const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr); /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the /// specified operands. If successful, the constant result is returned, if not, @@ -52,19 +68,32 @@ namespace llvm { /// fold instructions like loads and stores, which have no constant expression /// form. /// - Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, - ArrayRef<Constant *> Ops, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr); +/// This function doesn't work for compares (use ConstantFoldCompareInstOperands +/// for this) and GEPs. +Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, + ArrayRef<Constant *> Ops, + const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr); /// ConstantFoldCompareInstOperands - Attempt to constant fold a compare /// instruction (icmp/fcmp) with the specified operands. If it fails, it /// returns a constant expression of the specified operands. /// - Constant * - ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, - Constant *RHS, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr); +Constant * +ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, + Constant *RHS, const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr); + +/// \brief Attempt to constant fold a binary operation with the specified +/// operands. If it fails, it returns a constant expression of the specified +/// operands. +Constant *ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, + Constant *RHS, const DataLayout &DL); + +/// \brief Attempt to constant fold a cast with the specified operand. If it +/// fails, it returns a constant expression of the specified operand. +Constant *ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, + const DataLayout &DL); /// ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue /// instruction with the specified operands and indices. The constant result is @@ -86,7 +115,7 @@ Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx); /// ConstantFoldLoadFromConstPtr - Return the value that a load from C would /// produce if it is constant and determinable. If this is not determinable, /// return null. -Constant *ConstantFoldLoadFromConstPtr(Constant *C, const DataLayout &DL); +Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &DL); /// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a /// getelementptr constantexpr, return the constant value being addressed by the @@ -98,7 +127,7 @@ Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE); /// return the constant value being addressed by a virtual load, or null if /// something is funny and we can't decide. Constant *ConstantFoldLoadThroughGEPIndices(Constant *C, - ArrayRef<Constant*> Indices); + ArrayRef<Constant *> Indices); /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h index 42932bfd3491..fafd5d00b481 100644 --- a/include/llvm/Analysis/DemandedBits.h +++ b/include/llvm/Analysis/DemandedBits.h @@ -26,6 +26,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/PassManager.h" namespace llvm { @@ -35,40 +36,81 @@ class Instruction; class DominatorTree; class AssumptionCache; -struct DemandedBits : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - DemandedBits(); +class DemandedBits { +public: + DemandedBits(Function &F, AssumptionCache &AC, DominatorTree &DT) : + F(F), AC(AC), DT(DT), Analyzed(false) {} - bool runOnFunction(Function& F) override; - void getAnalysisUsage(AnalysisUsage& AU) const override; - void print(raw_ostream &OS, const Module *M) const override; - /// Return the bits demanded from instruction I. APInt getDemandedBits(Instruction *I); /// Return true if, during analysis, I could not be reached. bool isInstructionDead(Instruction *I); + + void print(raw_ostream &OS); private: + Function &F; + AssumptionCache &AC; + DominatorTree &DT; + void performAnalysis(); void determineLiveOperandBits(const Instruction *UserI, - const Instruction *I, unsigned OperandNo, - const APInt &AOut, APInt &AB, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2); - - AssumptionCache *AC; - DominatorTree *DT; - Function *F; + const Instruction *I, unsigned OperandNo, + const APInt &AOut, APInt &AB, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2); + bool Analyzed; // The set of visited instructions (non-integer-typed only). - SmallPtrSet<Instruction*, 128> Visited; + SmallPtrSet<Instruction*, 32> Visited; DenseMap<Instruction *, APInt> AliveBits; }; +class DemandedBitsWrapperPass : public FunctionPass { +private: + mutable Optional<DemandedBits> DB; +public: + static char ID; // Pass identification, replacement for typeid + DemandedBitsWrapperPass(); + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Clean up memory in between runs + void releaseMemory() override; + + DemandedBits &getDemandedBits() { return *DB; } + + void print(raw_ostream &OS, const Module *M) const override; +}; + +/// An analysis that produces \c DemandedBits for a function. +class DemandedBitsAnalysis : public AnalysisInfoMixin<DemandedBitsAnalysis> { + friend AnalysisInfoMixin<DemandedBitsAnalysis>; + static char PassID; + +public: + /// \brief Provide the result typedef for this analysis pass. + typedef DemandedBits Result; + + /// \brief Run the analysis pass over a function and produce demanded bits + /// information. + DemandedBits run(Function &F, AnalysisManager<Function> &AM); +}; + +/// \brief Printer pass for DemandedBits +class DemandedBitsPrinterPass : public PassInfoMixin<DemandedBitsPrinterPass> { + raw_ostream &OS; + +public: + explicit DemandedBitsPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); +}; + /// Create a demanded bits analysis pass. -FunctionPass *createDemandedBitsPass(); +FunctionPass *createDemandedBitsWrapperPass(); } // End llvm namespace diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h index 5290552b41dc..32dd367a9c0a 100644 --- a/include/llvm/Analysis/DependenceAnalysis.h +++ b/include/llvm/Analysis/DependenceAnalysis.h @@ -41,12 +41,12 @@ #define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H #include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" namespace llvm { +template <typename T> class ArrayRef; class Loop; class LoopInfo; class ScalarEvolution; @@ -206,7 +206,7 @@ namespace llvm { private: Instruction *Src, *Dst; const Dependence *NextPredecessor, *NextSuccessor; - friend class DependenceAnalysis; + friend class DependenceInfo; }; /// FullDependence - This class represents a dependence between two memory @@ -274,16 +274,17 @@ namespace llvm { bool LoopIndependent; bool Consistent; // Init to true, then refine. std::unique_ptr<DVEntry[]> DV; - friend class DependenceAnalysis; + friend class DependenceInfo; }; - /// DependenceAnalysis - This class is the main dependence-analysis driver. + /// DependenceInfo - This class is the main dependence-analysis driver. /// - class DependenceAnalysis : public FunctionPass { - void operator=(const DependenceAnalysis &) = delete; - DependenceAnalysis(const DependenceAnalysis &) = delete; - + class DependenceInfo { public: + DependenceInfo(Function *F, AliasAnalysis *AA, ScalarEvolution *SE, + LoopInfo *LI) + : AA(AA), SE(SE), LI(LI), F(F) {} + /// depends - Tests for a dependence between the Src and Dst instructions. /// Returns NULL if no dependence; otherwise, returns a Dependence (or a /// FullDependence) with as much information as can be gleaned. @@ -336,6 +337,8 @@ namespace llvm { /// both loops. const SCEV *getSplitIteration(const Dependence &Dep, unsigned Level); + Function *getFunction() const { return F; } + private: AliasAnalysis *AA; ScalarEvolution *SE; @@ -919,22 +922,41 @@ namespace llvm { bool tryDelinearize(Instruction *Src, Instruction *Dst, SmallVectorImpl<Subscript> &Pair); + }; // class DependenceInfo + /// \brief AnalysisPass to compute dependence information in a function + class DependenceAnalysis : public AnalysisInfoMixin<DependenceAnalysis> { + public: + typedef DependenceInfo Result; + Result run(Function &F, FunctionAnalysisManager &FAM); + + private: + static char PassID; + friend struct AnalysisInfoMixin<DependenceAnalysis>; + }; // class DependenceAnalysis + + /// \brief Legacy pass manager pass to access dependence information + class DependenceAnalysisWrapperPass : public FunctionPass { public: static char ID; // Class identification, replacement for typeinfo - DependenceAnalysis() : FunctionPass(ID) { - initializeDependenceAnalysisPass(*PassRegistry::getPassRegistry()); + DependenceAnalysisWrapperPass() : FunctionPass(ID) { + initializeDependenceAnalysisWrapperPassPass( + *PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; void print(raw_ostream &, const Module * = nullptr) const override; - }; // class DependenceAnalysis + DependenceInfo &getDI() const; + + private: + std::unique_ptr<DependenceInfo> info; + }; // class DependenceAnalysisWrapperPass /// createDependenceAnalysisPass - This creates an instance of the - /// DependenceAnalysis pass. - FunctionPass *createDependenceAnalysisPass(); + /// DependenceAnalysis wrapper pass. + FunctionPass *createDependenceAnalysisWrapperPass(); } // namespace llvm diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h index fb730054a8e5..79672e4e4225 100644 --- a/include/llvm/Analysis/DominanceFrontier.h +++ b/include/llvm/Analysis/DominanceFrontier.h @@ -19,6 +19,7 @@ #define LLVM_ANALYSIS_DOMINANCEFRONTIER_H #include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManager.h" #include <map> #include <set> @@ -133,63 +134,24 @@ public: const DomSetType &calculate(const DomTreeT &DT, const DomTreeNodeT *Node); }; -class DominanceFrontier : public FunctionPass { - ForwardDominanceFrontierBase<BasicBlock> Base; - +class DominanceFrontier : public ForwardDominanceFrontierBase<BasicBlock> { public: typedef DominatorTreeBase<BasicBlock> DomTreeT; typedef DomTreeNodeBase<BasicBlock> DomTreeNodeT; typedef DominanceFrontierBase<BasicBlock>::DomSetType DomSetType; typedef DominanceFrontierBase<BasicBlock>::iterator iterator; typedef DominanceFrontierBase<BasicBlock>::const_iterator const_iterator; +}; +class DominanceFrontierWrapperPass : public FunctionPass { + DominanceFrontier DF; +public: static char ID; // Pass ID, replacement for typeid - DominanceFrontier(); - - ForwardDominanceFrontierBase<BasicBlock> &getBase() { return Base; } - - inline const std::vector<BasicBlock *> &getRoots() const { - return Base.getRoots(); - } - - BasicBlock *getRoot() const { return Base.getRoot(); } - - bool isPostDominator() const { return Base.isPostDominator(); } - - iterator begin() { return Base.begin(); } + DominanceFrontierWrapperPass(); - const_iterator begin() const { return Base.begin(); } - - iterator end() { return Base.end(); } - - const_iterator end() const { return Base.end(); } - - iterator find(BasicBlock *B) { return Base.find(B); } - - const_iterator find(BasicBlock *B) const { return Base.find(B); } - - iterator addBasicBlock(BasicBlock *BB, const DomSetType &frontier) { - return Base.addBasicBlock(BB, frontier); - } - - void removeBlock(BasicBlock *BB) { return Base.removeBlock(BB); } - - void addToFrontier(iterator I, BasicBlock *Node) { - return Base.addToFrontier(I, Node); - } - - void removeFromFrontier(iterator I, BasicBlock *Node) { - return Base.removeFromFrontier(I, Node); - } - - bool compareDomSet(DomSetType &DS1, const DomSetType &DS2) const { - return Base.compareDomSet(DS1, DS2); - } - - bool compare(DominanceFrontierBase<BasicBlock> &Other) const { - return Base.compare(Other); - } + DominanceFrontier &getDominanceFrontier() { return DF; } + const DominanceFrontier &getDominanceFrontier() const { return DF; } void releaseMemory() override; @@ -205,6 +167,30 @@ public: extern template class DominanceFrontierBase<BasicBlock>; extern template class ForwardDominanceFrontierBase<BasicBlock>; +/// \brief Analysis pass which computes a \c DominanceFrontier. +class DominanceFrontierAnalysis + : public AnalysisInfoMixin<DominanceFrontierAnalysis> { + friend AnalysisInfoMixin<DominanceFrontierAnalysis>; + static char PassID; + +public: + /// \brief Provide the result typedef for this analysis pass. + typedef DominanceFrontier Result; + + /// \brief Run the analysis pass over a function and produce a dominator tree. + DominanceFrontier run(Function &F, AnalysisManager<Function> &AM); +}; + +/// \brief Printer pass for the \c DominanceFrontier. +class DominanceFrontierPrinterPass + : public PassInfoMixin<DominanceFrontierPrinterPass> { + raw_ostream &OS; + +public: + explicit DominanceFrontierPrinterPass(raw_ostream &OS); + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); +}; + } // End llvm namespace #endif diff --git a/include/llvm/Analysis/EHPersonalities.h b/include/llvm/Analysis/EHPersonalities.h index 59e9672b88e5..a26c575cfe10 100644 --- a/include/llvm/Analysis/EHPersonalities.h +++ b/include/llvm/Analysis/EHPersonalities.h @@ -23,12 +23,15 @@ enum class EHPersonality { Unknown, GNU_Ada, GNU_C, + GNU_C_SjLj, GNU_CXX, + GNU_CXX_SjLj, GNU_ObjC, MSVC_X86SEH, MSVC_Win64SEH, MSVC_CXX, - CoreCLR + CoreCLR, + Rust }; /// \brief See if the given exception handling personality function is one diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h index bcd102e7ded2..4c0a98949778 100644 --- a/include/llvm/Analysis/GlobalsModRef.h +++ b/include/llvm/Analysis/GlobalsModRef.h @@ -35,6 +35,7 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> { class FunctionInfo; const DataLayout &DL; + const TargetLibraryInfo &TLI; /// The globals that do not have their addresses taken. SmallPtrSet<const GlobalValue *, 8> NonAddressTakenGlobals; @@ -76,6 +77,7 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> { public: GlobalsAAResult(GlobalsAAResult &&Arg); + ~GlobalsAAResult(); static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI, CallGraph &CG); @@ -116,20 +118,14 @@ private: }; /// Analysis pass providing a never-invalidated alias analysis result. -class GlobalsAA { +class GlobalsAA : public AnalysisInfoMixin<GlobalsAA> { + friend AnalysisInfoMixin<GlobalsAA>; + static char PassID; + public: typedef GlobalsAAResult Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - GlobalsAAResult run(Module &M, AnalysisManager<Module> *AM); - - /// \brief Provide access to a name for this pass for debugging purposes. - static StringRef name() { return "GlobalsAA"; } - -private: - static char PassID; + GlobalsAAResult run(Module &M, AnalysisManager<Module> &AM); }; /// Legacy wrapper pass to provide the GlobalsAAResult object. diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h index 37d01490dac6..e68a77526b96 100644 --- a/include/llvm/Analysis/IVUsers.h +++ b/include/llvm/Analysis/IVUsers.h @@ -117,7 +117,7 @@ private: mutable ilist_node<IVStrideUse> Sentinel; }; -class IVUsers : public LoopPass { +class IVUsers { friend class IVStrideUse; Loop *L; AssumptionCache *AC; @@ -133,15 +133,9 @@ class IVUsers : public LoopPass { // Ephemeral values used by @llvm.assume in this function. SmallPtrSet<const Value *, 32> EphValues; - void getAnalysisUsage(AnalysisUsage &AU) const override; - - bool runOnLoop(Loop *L, LPPassManager &LPM) override; - - void releaseMemory() override; - public: - static char ID; // Pass ID, replacement for typeid - IVUsers(); + IVUsers(Loop *L, AssumptionCache *AC, LoopInfo *LI, DominatorTree *DT, + ScalarEvolution *SE); Loop *getLoop() const { return L; } @@ -173,16 +167,58 @@ public: return Processed.count(Inst); } - void print(raw_ostream &OS, const Module* = nullptr) const override; + void releaseMemory(); + + void print(raw_ostream &OS, const Module * = nullptr) const; /// dump - This method is used for debugging. void dump() const; + protected: bool AddUsersImpl(Instruction *I, SmallPtrSetImpl<Loop*> &SimpleLoopNests); }; Pass *createIVUsersPass(); +class IVUsersWrapperPass : public LoopPass { + std::unique_ptr<IVUsers> IU; + +public: + static char ID; + + IVUsersWrapperPass(); + + IVUsers &getIU() { return *IU; } + const IVUsers &getIU() const { return *IU; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void releaseMemory() override; + + void print(raw_ostream &OS, const Module * = nullptr) const override; +}; + +/// Analysis pass that exposes the \c IVUsers for a loop. +class IVUsersAnalysis : public AnalysisInfoMixin<IVUsersAnalysis> { + friend AnalysisInfoMixin<IVUsersAnalysis>; + static char PassID; + +public: + typedef IVUsers Result; + + IVUsers run(Loop &L, AnalysisManager<Loop> &AM); +}; + +/// Printer pass for the \c IVUsers for a loop. +class IVUsersPrinterPass : public PassInfoMixin<IVUsersPrinterPass> { + raw_ostream &OS; + +public: + explicit IVUsersPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM); +}; } #endif diff --git a/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/include/llvm/Analysis/IndirectCallPromotionAnalysis.h new file mode 100644 index 000000000000..007e4d8602fa --- /dev/null +++ b/include/llvm/Analysis/IndirectCallPromotionAnalysis.h @@ -0,0 +1,67 @@ +//===- IndirectCallPromotionAnalysis.h - Indirect call analysis -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// Interface to identify indirect call promotion candidates. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INDIRECTCALLPROMOTIONANALYSIS_H +#define LLVM_ANALYSIS_INDIRECTCALLPROMOTIONANALYSIS_H + +#include "llvm/ProfileData/InstrProf.h" + +namespace llvm { + +class Instruction; + +// Class for identifying profitable indirect call promotion candidates when +// the indirect-call value profile metadata is available. +class ICallPromotionAnalysis { +private: + // Allocate space to read the profile annotation. + std::unique_ptr<InstrProfValueData[]> ValueDataArray; + + // Count is the call count for the direct-call target and + // TotalCount is the call count for the indirect-call callsite. + // Return true we should promote this indirect-call target. + bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount); + + // Returns the number of profitable candidates to promote for the + // current ValueDataArray and the given \p Inst. + uint32_t getProfitablePromotionCandidates(const Instruction *Inst, + uint32_t NumVals, + uint64_t TotalCount); + + // Noncopyable + ICallPromotionAnalysis(const ICallPromotionAnalysis &other) = delete; + ICallPromotionAnalysis & + operator=(const ICallPromotionAnalysis &other) = delete; + +public: + ICallPromotionAnalysis(); + + /// \brief Returns reference to array of InstrProfValueData for the given + /// instruction \p I. + /// + /// The \p NumVals, \p TotalCount and \p NumCandidates + /// are set to the number of values in the array, the total profile count + /// of the indirect call \p I, and the number of profitable candidates + /// in the given array (which is sorted in reverse order of profitability). + /// + /// The returned array space is owned by this class, and overwritten on + /// subsequent calls. + ArrayRef<InstrProfValueData> + getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals, + uint64_t &TotalCount, + uint32_t &NumCandidates); +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/Analysis/IndirectCallSiteVisitor.h b/include/llvm/Analysis/IndirectCallSiteVisitor.h new file mode 100644 index 000000000000..71a8cb886321 --- /dev/null +++ b/include/llvm/Analysis/IndirectCallSiteVisitor.h @@ -0,0 +1,43 @@ +//===-- IndirectCallSiteVisitor.h - indirect call-sites visitor -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements defines a visitor class and a helper function that find +// all indirect call-sites in a function. + +#include "llvm/IR/InstVisitor.h" +#include <vector> + +namespace llvm { +// Visitor class that finds all indirect call sites. +struct PGOIndirectCallSiteVisitor + : public InstVisitor<PGOIndirectCallSiteVisitor> { + std::vector<Instruction *> IndirectCallInsts; + PGOIndirectCallSiteVisitor() {} + + void visitCallSite(CallSite CS) { + if (CS.getCalledFunction() || !CS.getCalledValue()) + return; + Instruction *I = CS.getInstruction(); + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (CI->isInlineAsm()) + return; + } + if (isa<Constant>(CS.getCalledValue())) + return; + IndirectCallInsts.push_back(I); + } +}; + +// Helper function that finds all indirect call sites. +static inline std::vector<Instruction *> findIndirectCallSites(Function &F) { + PGOIndirectCallSiteVisitor ICV; + ICV.visit(F); + return ICV.IndirectCallInsts; +} +} diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index 35f991cb3f67..2928d2be30e5 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -23,6 +23,7 @@ class AssumptionCacheTracker; class CallSite; class DataLayout; class Function; +class ProfileSummaryInfo; class TargetTransformInfo; namespace InlineConstants { @@ -101,25 +102,31 @@ public: /// \brief Get an InlineCost object representing the cost of inlining this /// callsite. /// -/// Note that threshold is passed into this function. Only costs below the -/// threshold are computed with any accuracy. The threshold can be used to -/// bound the computation necessary to determine whether the cost is +/// Note that a default threshold is passed into this function. This threshold +/// could be modified based on callsite's properties and only costs below this +/// new threshold are computed with any accuracy. The new threshold can be +/// used to bound the computation necessary to determine whether the cost is /// sufficiently low to warrant inlining. /// /// Also note that calling this function *dynamically* computes the cost of /// inlining the callsite. It is an expensive, heavyweight call. -InlineCost getInlineCost(CallSite CS, int Threshold, +InlineCost getInlineCost(CallSite CS, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT); + AssumptionCacheTracker *ACT, ProfileSummaryInfo *PSI); /// \brief Get an InlineCost with the callee explicitly specified. /// This allows you to calculate the cost of inlining a function via a /// pointer. This behaves exactly as the version with no explicit callee /// parameter in all other respects. // -InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold, +InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT); + AssumptionCacheTracker *ACT, ProfileSummaryInfo *PSI); + +int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel); + +/// \brief Return the default value of -inline-threshold. +int getDefaultInlineThreshold(); /// \brief Minimal filter to detect invalid constructs for inlining. bool isInlineViable(Function &Callee); diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index ed313dae9ab1..410fa4165a91 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -46,8 +46,7 @@ namespace llvm { class Type; class Value; - /// SimplifyAddInst - Given operands for an Add, see if we can - /// fold the result. If not, this returns null. + /// Given operands for an Add, fold the result or return null. Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -55,8 +54,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifySubInst - Given operands for a Sub, see if we can - /// fold the result. If not, this returns null. + /// Given operands for a Sub, fold the result or return null. Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -64,8 +62,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// Given operands for an FAdd, see if we can fold the result. If not, this - /// returns null. + /// Given operands for an FAdd, fold the result or return null. Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -73,8 +70,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// Given operands for an FSub, see if we can fold the result. If not, this - /// returns null. + /// Given operands for an FSub, fold the result or return null. Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -82,8 +78,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// Given operands for an FMul, see if we can fold the result. If not, this - /// returns null. + /// Given operands for an FMul, fold the result or return null. Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -91,32 +86,28 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyMulInst - Given operands for a Mul, see if we can - /// fold the result. If not, this returns null. + /// Given operands for a Mul, fold the result or return null. Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifySDivInst - Given operands for an SDiv, see if we can - /// fold the result. If not, this returns null. + /// Given operands for an SDiv, fold the result or return null. Value *SimplifySDivInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyUDivInst - Given operands for a UDiv, see if we can - /// fold the result. If not, this returns null. + /// Given operands for a UDiv, fold the result or return null. Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyFDivInst - Given operands for an FDiv, see if we can - /// fold the result. If not, this returns null. + /// Given operands for an FDiv, fold the result or return null. Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -124,24 +115,21 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifySRemInst - Given operands for an SRem, see if we can - /// fold the result. If not, this returns null. + /// Given operands for an SRem, fold the result or return null. Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyURemInst - Given operands for a URem, see if we can - /// fold the result. If not, this returns null. + /// Given operands for a URem, fold the result or return null. Value *SimplifyURemInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyFRemInst - Given operands for an FRem, see if we can - /// fold the result. If not, this returns null. + /// Given operands for an FRem, fold the result or return null. Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -149,8 +137,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyShlInst - Given operands for a Shl, see if we can - /// fold the result. If not, this returns null. + /// Given operands for a Shl, fold the result or return null. Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -158,8 +145,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyLShrInst - Given operands for a LShr, see if we can - /// fold the result. If not, this returns null. + /// Given operands for a LShr, fold the result or return null. Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -167,8 +153,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyAShrInst - Given operands for a AShr, see if we can - /// fold the result. If not, this returns null. + /// Given operands for a AShr, fold the result or return nulll. Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -176,32 +161,28 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyAndInst - Given operands for an And, see if we can - /// fold the result. If not, this returns null. + /// Given operands for an And, fold the result or return null. Value *SimplifyAndInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyOrInst - Given operands for an Or, see if we can - /// fold the result. If not, this returns null. + /// Given operands for an Or, fold the result or return null. Value *SimplifyOrInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyXorInst - Given operands for a Xor, see if we can - /// fold the result. If not, this returns null. + /// Given operands for an Xor, fold the result or return null. Value *SimplifyXorInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can - /// fold the result. If not, this returns null. + /// Given operands for an ICmpInst, fold the result or return null. Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -209,8 +190,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can - /// fold the result. If not, this returns null. + /// Given operands for an FCmpInst, fold the result or return null. Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -218,8 +198,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold - /// the result. If not, this returns null. + /// Given operands for a SelectInst, fold the result or return null. Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -227,16 +206,15 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can - /// fold the result. If not, this returns null. - Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL, + /// Given operands for a GetElementPtrInst, fold the result or return null. + Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, + const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we - /// can fold the result. If not, this returns null. + /// Given operands for an InsertValueInst, fold the result or return null. Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -244,8 +222,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// \brief Given operands for an ExtractValueInst, see if we can fold the - /// result. If not, this returns null. + /// Given operands for an ExtractValueInst, fold the result or return null. Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -253,8 +230,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// \brief Given operands for an ExtractElementInst, see if we can fold the - /// result. If not, this returns null. + /// Given operands for an ExtractElementInst, fold the result or return null. Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -262,8 +238,7 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyTruncInst - Given operands for an TruncInst, see if we can fold - /// the result. If not, this returns null. + /// Given operands for an TruncInst, fold the result or return null. Value *SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, @@ -273,8 +248,7 @@ namespace llvm { //=== Helper functions for higher up the class hierarchy. - /// SimplifyCmpInst - Given operands for a CmpInst, see if we can - /// fold the result. If not, this returns null. + /// Given operands for a CmpInst, fold the result or return null. Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -282,16 +256,15 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can - /// fold the result. If not, this returns null. + /// Given operands for a BinaryOperator, fold the result or return null. Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can - /// fold the result. If not, this returns null. + + /// Given operands for an FP BinaryOperator, fold the result or return null. /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, @@ -301,10 +274,8 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// \brief Given a function and iterators over arguments, see if we can fold - /// the result. - /// - /// If this call could not be simplified returns null. + /// Given a function and iterators over arguments, fold the result or return + /// null. Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -312,25 +283,21 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// \brief Given a function and set of arguments, see if we can fold the - /// result. - /// - /// If this call could not be simplified returns null. + /// Given a function and set of arguments, fold the result or return null. Value *SimplifyCall(Value *V, ArrayRef<Value *> Args, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// SimplifyInstruction - See if we can compute a simplified version of this - /// instruction. If not, this returns null. + /// See if we can compute a simplified version of this instruction. If not, + /// return null. Value *SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr); - /// \brief Replace all uses of 'I' with 'SimpleV' and simplify the uses - /// recursively. + /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. /// /// This first performs a normal RAUW of I with SimpleV. It then recursively /// attempts to simplify those users updated by the operation. The 'I' @@ -342,7 +309,7 @@ namespace llvm { const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr); - /// \brief Recursively attempt to simplify an instruction. + /// Recursively attempt to simplify an instruction. /// /// This routine uses SimplifyInstruction to simplify 'I', and if successful /// replaces uses of 'I' with the simplified value. It then recurses on each diff --git a/include/llvm/Analysis/Interval.h b/include/llvm/Analysis/Interval.h index 01eba3f16c01..a904753adaab 100644 --- a/include/llvm/Analysis/Interval.h +++ b/include/llvm/Analysis/Interval.h @@ -67,8 +67,9 @@ public: /// contains - Find out if a basic block is in this interval inline bool contains(BasicBlock *BB) const { - for (unsigned i = 0; i < Nodes.size(); ++i) - if (Nodes[i] == BB) return true; + for (BasicBlock *Node : Nodes) + if (Node == BB) + return true; return false; // I don't want the dependency on <algorithm> //return find(Nodes.begin(), Nodes.end(), BB) != Nodes.end(); @@ -76,8 +77,9 @@ public: /// isSuccessor - find out if a basic block is a successor of this Interval inline bool isSuccessor(BasicBlock *BB) const { - for (unsigned i = 0; i < Successors.size(); ++i) - if (Successors[i] == BB) return true; + for (BasicBlock *Successor : Successors) + if (Successor == BB) + return true; return false; // I don't want the dependency on <algorithm> //return find(Successors.begin(), Successors.end(), BB) != Successors.end(); diff --git a/include/llvm/Analysis/IteratedDominanceFrontier.h b/include/llvm/Analysis/IteratedDominanceFrontier.h index a1ded2554d44..37da5617b913 100644 --- a/include/llvm/Analysis/IteratedDominanceFrontier.h +++ b/include/llvm/Analysis/IteratedDominanceFrontier.h @@ -24,18 +24,14 @@ #ifndef LLVM_ANALYSIS_IDF_H #define LLVM_ANALYSIS_IDF_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" namespace llvm { -class BasicBlock; -template <class T> class DomTreeNodeBase; -typedef DomTreeNodeBase<BasicBlock> DomTreeNode; -template <class T> class DominatorTreeBase; - /// \brief Determine the iterated dominance frontier, given a set of defining /// blocks, and optionally, a set of live-in blocks. /// @@ -44,6 +40,9 @@ template <class T> class DominatorTreeBase; /// This algorithm is a linear time computation of Iterated Dominance Frontiers, /// pruned using the live-in set. /// By default, liveness is not used to prune the IDF computation. +/// The template parameters should be either BasicBlock* or Inverse<BasicBlock +/// *>, depending on if you want the forward or reverse IDF. +template <class NodeTy> class IDFCalculator { public: @@ -92,5 +91,7 @@ private: const SmallPtrSetImpl<BasicBlock *> *DefBlocks; SmallVector<BasicBlock *, 32> PHIBlocks; }; +typedef IDFCalculator<BasicBlock *> ForwardIDFCalculator; +typedef IDFCalculator<Inverse<BasicBlock *>> ReverseIDFCalculator; } #endif diff --git a/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/include/llvm/Analysis/LazyBlockFrequencyInfo.h new file mode 100644 index 000000000000..a2d24bb9eb88 --- /dev/null +++ b/include/llvm/Analysis/LazyBlockFrequencyInfo.h @@ -0,0 +1,125 @@ +//===- LazyBlockFrequencyInfo.h - Lazy Block Frequency Analysis -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an alternative analysis pass to BlockFrequencyInfoWrapperPass. The +// difference is that with this pass the block frequencies are not computed when +// the analysis pass is executed but rather when the BFI results is explicitly +// requested by the analysis client. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LAZYBLOCKFREQUENCYINFO_H +#define LLVM_ANALYSIS_LAZYBLOCKFREQUENCYINFO_H + +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Pass.h" + +namespace llvm { +class AnalysisUsage; +class BranchProbabilityInfo; +class Function; +class LoopInfo; + +/// \brief This is an alternative analysis pass to +/// BlockFrequencyInfoWrapperPass. The difference is that with this pass the +/// block frequencies are not computed when the analysis pass is executed but +/// rather when the BFI results is explicitly requested by the analysis client. +/// +/// There are some additional requirements for any client pass that wants to use +/// the analysis: +/// +/// 1. The pass needs to initialize dependent passes with: +/// +/// INITIALIZE_PASS_DEPENDENCY(LazyBFIPass) +/// +/// 2. Similarly, getAnalysisUsage should call: +/// +/// LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU) +/// +/// 3. The computed BFI should be requested with +/// getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() before either LoopInfo +/// or BPI could be invalidated for example by changing the CFG. +/// +/// Note that it is expected that we wouldn't need this functionality for the +/// new PM since with the new PM, analyses are executed on demand. +class LazyBlockFrequencyInfoPass : public FunctionPass { + + /// Wraps a BFI to allow lazy computation of the block frequencies. + /// + /// A pass that only conditionally uses BFI can uncondtionally require the + /// analysis without paying for the overhead if BFI doesn't end up being used. + class LazyBlockFrequencyInfo { + public: + LazyBlockFrequencyInfo() + : Calculated(false), F(nullptr), BPI(nullptr), LI(nullptr) {} + + /// Set up the per-function input. + void setAnalysis(const Function *F, const BranchProbabilityInfo *BPI, + const LoopInfo *LI) { + this->F = F; + this->BPI = BPI; + this->LI = LI; + } + + /// Retrieve the BFI with the block frequencies computed. + BlockFrequencyInfo &getCalculated() { + if (!Calculated) { + assert(F && BPI && LI && "call setAnalysis"); + BFI.calculate(*F, *BPI, *LI); + Calculated = true; + } + return BFI; + } + + const BlockFrequencyInfo &getCalculated() const { + return const_cast<LazyBlockFrequencyInfo *>(this)->getCalculated(); + } + + void releaseMemory() { + BFI.releaseMemory(); + Calculated = false; + setAnalysis(nullptr, nullptr, nullptr); + } + + private: + BlockFrequencyInfo BFI; + bool Calculated; + const Function *F; + const BranchProbabilityInfo *BPI; + const LoopInfo *LI; + }; + + LazyBlockFrequencyInfo LBFI; + +public: + static char ID; + + LazyBlockFrequencyInfoPass(); + + /// \brief Compute and return the block frequencies. + BlockFrequencyInfo &getBFI() { return LBFI.getCalculated(); } + + /// \brief Compute and return the block frequencies. + const BlockFrequencyInfo &getBFI() const { return LBFI.getCalculated(); } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Helper for client passes to set up the analysis usage on behalf of this + /// pass. + static void getLazyBFIAnalysisUsage(AnalysisUsage &AU); + + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M) const override; +}; + +/// \brief Helper for client passes to initialize dependent passes for LBFI. +void initializeLazyBFIPassPass(PassRegistry &Registry); +} +#endif diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index e02f3ab2de1f..9f62eaa2e9f8 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -48,7 +48,9 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/raw_ostream.h" #include <iterator> +#include <utility> namespace llvm { class PreservedAnalyses; @@ -104,9 +106,85 @@ class LazyCallGraph { public: class Node; class SCC; - class iterator; - typedef SmallVector<PointerUnion<Function *, Node *>, 4> NodeVectorT; - typedef SmallVectorImpl<PointerUnion<Function *, Node *>> NodeVectorImplT; + class RefSCC; + class edge_iterator; + class call_edge_iterator; + + /// A class used to represent edges in the call graph. + /// + /// The lazy call graph models both *call* edges and *reference* edges. Call + /// edges are much what you would expect, and exist when there is a 'call' or + /// 'invoke' instruction of some function. Reference edges are also tracked + /// along side these, and exist whenever any instruction (transitively + /// through its operands) references a function. All call edges are + /// inherently reference edges, and so the reference graph forms a superset + /// of the formal call graph. + /// + /// Furthermore, edges also may point to raw \c Function objects when those + /// functions have not been scanned and incorporated into the graph (yet). + /// This is one of the primary ways in which the graph can be lazy. When + /// functions are scanned and fully incorporated into the graph, all of the + /// edges referencing them are updated to point to the graph \c Node objects + /// instead of to the raw \c Function objects. This class even provides + /// methods to trigger this scan on-demand by attempting to get the target + /// node of the graph and providing a reference back to the graph in order to + /// lazily build it if necessary. + /// + /// All of these forms of edges are fundamentally represented as outgoing + /// edges. The edges are stored in the source node and point at the target + /// node. This allows the edge structure itself to be a very compact data + /// structure: essentially a tagged pointer. + class Edge { + public: + /// The kind of edge in the graph. + enum Kind : bool { Ref = false, Call = true }; + + Edge(); + explicit Edge(Function &F, Kind K); + explicit Edge(Node &N, Kind K); + + /// Test whether the edge is null. + /// + /// This happens when an edge has been deleted. We leave the edge objects + /// around but clear them. + operator bool() const; + + /// Test whether the edge represents a direct call to a function. + /// + /// This requires that the edge is not null. + bool isCall() const; + + /// Get the function referenced by this edge. + /// + /// This requires that the edge is not null, but will succeed whether we + /// have built a graph node for the function yet or not. + Function &getFunction() const; + + /// Get the call graph node referenced by this edge if one exists. + /// + /// This requires that the edge is not null. If we have built a graph node + /// for the function this edge points to, this will return that node, + /// otherwise it will return null. + Node *getNode() const; + + /// Get the call graph node for this edge, building it if necessary. + /// + /// This requires that the edge is not null. If we have not yet built + /// a graph node for the function this edge points to, this will first ask + /// the graph to build that node, inserting it into all the relevant + /// structures. + Node &getNode(LazyCallGraph &G); + + private: + friend class LazyCallGraph::Node; + + PointerIntPair<PointerUnion<Function *, Node *>, 1, Kind> Value; + + void setKind(Kind K) { Value.setInt(K); } + }; + + typedef SmallVector<Edge, 4> EdgeVectorT; + typedef SmallVectorImpl<Edge> EdgeVectorImplT; /// A node in the call graph. /// @@ -121,35 +199,65 @@ public: Function &F; // We provide for the DFS numbering and Tarjan walk lowlink numbers to be - // stored directly within the node. + // stored directly within the node. These are both '-1' when nodes are part + // of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk. int DFSNumber; int LowLink; - mutable NodeVectorT Callees; - DenseMap<Function *, size_t> CalleeIndexMap; + mutable EdgeVectorT Edges; + DenseMap<Function *, int> EdgeIndexMap; - /// Basic constructor implements the scanning of F into Callees and - /// CalleeIndexMap. + /// Basic constructor implements the scanning of F into Edges and + /// EdgeIndexMap. Node(LazyCallGraph &G, Function &F); - /// Internal helper to insert a callee. - void insertEdgeInternal(Function &Callee); + /// Internal helper to insert an edge to a function. + void insertEdgeInternal(Function &ChildF, Edge::Kind EK); - /// Internal helper to insert a callee. - void insertEdgeInternal(Node &CalleeN); + /// Internal helper to insert an edge to a node. + void insertEdgeInternal(Node &ChildN, Edge::Kind EK); - /// Internal helper to remove a callee from this node. - void removeEdgeInternal(Function &Callee); + /// Internal helper to change an edge kind. + void setEdgeKind(Function &ChildF, Edge::Kind EK); + + /// Internal helper to remove the edge to the given function. + void removeEdgeInternal(Function &ChildF); + + /// Print the name of this node's function. + friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) { + return OS << N.F.getName(); + } + + /// Dump the name of this node's function to stderr. + void dump() const; public: - typedef LazyCallGraph::iterator iterator; + LazyCallGraph &getGraph() const { return *G; } Function &getFunction() const { return F; } - iterator begin() const { - return iterator(*G, Callees.begin(), Callees.end()); + edge_iterator begin() const { + return edge_iterator(Edges.begin(), Edges.end()); + } + edge_iterator end() const { return edge_iterator(Edges.end(), Edges.end()); } + + const Edge &operator[](int i) const { return Edges[i]; } + const Edge &operator[](Function &F) const { + assert(EdgeIndexMap.find(&F) != EdgeIndexMap.end() && "No such edge!"); + return Edges[EdgeIndexMap.find(&F)->second]; + } + const Edge &operator[](Node &N) const { return (*this)[N.getFunction()]; } + + call_edge_iterator call_begin() const { + return call_edge_iterator(Edges.begin(), Edges.end()); + } + call_edge_iterator call_end() const { + return call_edge_iterator(Edges.end(), Edges.end()); + } + + iterator_range<call_edge_iterator> calls() const { + return make_range(call_begin(), call_end()); } - iterator end() const { return iterator(*G, Callees.end(), Callees.end()); } /// Equality is defined as address equality. bool operator==(const Node &N) const { return this == &N; } @@ -162,101 +270,279 @@ public: /// be scanned for "calls" or uses of functions and its child information /// will be constructed. All of these results are accumulated and cached in /// the graph. - class iterator - : public iterator_adaptor_base<iterator, NodeVectorImplT::iterator, - std::forward_iterator_tag, Node> { + class edge_iterator + : public iterator_adaptor_base<edge_iterator, EdgeVectorImplT::iterator, + std::forward_iterator_tag> { friend class LazyCallGraph; friend class LazyCallGraph::Node; - LazyCallGraph *G; - NodeVectorImplT::iterator E; + EdgeVectorImplT::iterator E; - // Build the iterator for a specific position in a node list. - iterator(LazyCallGraph &G, NodeVectorImplT::iterator NI, - NodeVectorImplT::iterator E) - : iterator_adaptor_base(NI), G(&G), E(E) { - while (I != E && I->isNull()) + // Build the iterator for a specific position in the edge list. + edge_iterator(EdgeVectorImplT::iterator BaseI, + EdgeVectorImplT::iterator E) + : iterator_adaptor_base(BaseI), E(E) { + while (I != E && !*I) ++I; } public: - iterator() {} + edge_iterator() {} using iterator_adaptor_base::operator++; - iterator &operator++() { + edge_iterator &operator++() { do { ++I; - } while (I != E && I->isNull()); + } while (I != E && !*I); return *this; } + }; - reference operator*() const { - if (I->is<Node *>()) - return *I->get<Node *>(); + /// A lazy iterator over specifically call edges. + /// + /// This has the same iteration properties as the \c edge_iterator, but + /// restricts itself to edges which represent actual calls. + class call_edge_iterator + : public iterator_adaptor_base<call_edge_iterator, + EdgeVectorImplT::iterator, + std::forward_iterator_tag> { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + EdgeVectorImplT::iterator E; + + /// Advance the iterator to the next valid, call edge. + void advanceToNextEdge() { + while (I != E && (!*I || !I->isCall())) + ++I; + } + + // Build the iterator for a specific position in the edge list. + call_edge_iterator(EdgeVectorImplT::iterator BaseI, + EdgeVectorImplT::iterator E) + : iterator_adaptor_base(BaseI), E(E) { + advanceToNextEdge(); + } + + public: + call_edge_iterator() {} - Function *F = I->get<Function *>(); - Node &ChildN = G->get(*F); - *I = &ChildN; - return ChildN; + using iterator_adaptor_base::operator++; + call_edge_iterator &operator++() { + ++I; + advanceToNextEdge(); + return *this; } }; /// An SCC of the call graph. /// - /// This represents a Strongly Connected Component of the call graph as + /// This represents a Strongly Connected Component of the direct call graph + /// -- ignoring indirect calls and function references. It stores this as /// a collection of call graph nodes. While the order of nodes in the SCC is /// stable, it is not any particular order. + /// + /// The SCCs are nested within a \c RefSCC, see below for details about that + /// outer structure. SCCs do not support mutation of the call graph, that + /// must be done through the containing \c RefSCC in order to fully reason + /// about the ordering and connections of the graph. class SCC { friend class LazyCallGraph; friend class LazyCallGraph::Node; - LazyCallGraph *G; - SmallPtrSet<SCC *, 1> ParentSCCs; + RefSCC *OuterRefSCC; SmallVector<Node *, 1> Nodes; - SCC(LazyCallGraph &G) : G(&G) {} + template <typename NodeRangeT> + SCC(RefSCC &OuterRefSCC, NodeRangeT &&Nodes) + : OuterRefSCC(&OuterRefSCC), Nodes(std::forward<NodeRangeT>(Nodes)) {} + + void clear() { + OuterRefSCC = nullptr; + Nodes.clear(); + } + + /// Print a short descrtiption useful for debugging or logging. + /// + /// We print the function names in the SCC wrapped in '()'s and skipping + /// the middle functions if there are a large number. + // + // Note: this is defined inline to dodge issues with GCC's interpretation + // of enclosing namespaces for friend function declarations. + friend raw_ostream &operator<<(raw_ostream &OS, const SCC &C) { + OS << '('; + int i = 0; + for (LazyCallGraph::Node &N : C) { + if (i > 0) + OS << ", "; + // Elide the inner elements if there are too many. + if (i > 8) { + OS << "..., " << *C.Nodes.back(); + break; + } + OS << N; + ++i; + } + OS << ')'; + return OS; + } - void insert(Node &N); + /// Dump a short description of this SCC to stderr. + void dump() const; - void - internalDFS(SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack, - SmallVectorImpl<Node *> &PendingSCCStack, Node *N, - SmallVectorImpl<SCC *> &ResultSCCs); +#ifndef NDEBUG + /// Verify invariants about the SCC. + /// + /// This will attempt to validate all of the basic invariants within an + /// SCC, but not that it is a strongly connected componet per-se. Primarily + /// useful while building and updating the graph to check that basic + /// properties are in place rather than having inexplicable crashes later. + void verify(); +#endif public: - typedef SmallVectorImpl<Node *>::const_iterator iterator; - typedef pointee_iterator<SmallPtrSet<SCC *, 1>::const_iterator> - parent_iterator; + typedef pointee_iterator<SmallVectorImpl<Node *>::const_iterator> iterator; iterator begin() const { return Nodes.begin(); } iterator end() const { return Nodes.end(); } - parent_iterator parent_begin() const { return ParentSCCs.begin(); } - parent_iterator parent_end() const { return ParentSCCs.end(); } + int size() const { return Nodes.size(); } + + RefSCC &getOuterRefSCC() const { return *OuterRefSCC; } + + /// Provide a short name by printing this SCC to a std::string. + /// + /// This copes with the fact that we don't have a name per-se for an SCC + /// while still making the use of this in debugging and logging useful. + std::string getName() const { + std::string Name; + raw_string_ostream OS(Name); + OS << *this; + OS.flush(); + return Name; + } + }; + + /// A RefSCC of the call graph. + /// + /// This models a Strongly Connected Component of function reference edges in + /// the call graph. As opposed to actual SCCs, these can be used to scope + /// subgraphs of the module which are independent from other subgraphs of the + /// module because they do not reference it in any way. This is also the unit + /// where we do mutation of the graph in order to restrict mutations to those + /// which don't violate this independence. + /// + /// A RefSCC contains a DAG of actual SCCs. All the nodes within the RefSCC + /// are necessarily within some actual SCC that nests within it. Since + /// a direct call *is* a reference, there will always be at least one RefSCC + /// around any SCC. + class RefSCC { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + LazyCallGraph *G; + SmallPtrSet<RefSCC *, 1> Parents; + + /// A postorder list of the inner SCCs. + SmallVector<SCC *, 4> SCCs; + + /// A map from SCC to index in the postorder list. + SmallDenseMap<SCC *, int, 4> SCCIndices; + + /// Fast-path constructor. RefSCCs should instead be constructed by calling + /// formRefSCCFast on the graph itself. + RefSCC(LazyCallGraph &G); + + /// Print a short description useful for debugging or logging. + /// + /// We print the SCCs wrapped in '[]'s and skipping the middle SCCs if + /// there are a large number. + // + // Note: this is defined inline to dodge issues with GCC's interpretation + // of enclosing namespaces for friend function declarations. + friend raw_ostream &operator<<(raw_ostream &OS, const RefSCC &RC) { + OS << '['; + int i = 0; + for (LazyCallGraph::SCC &C : RC) { + if (i > 0) + OS << ", "; + // Elide the inner elements if there are too many. + if (i > 4) { + OS << "..., " << *RC.SCCs.back(); + break; + } + OS << C; + ++i; + } + OS << ']'; + return OS; + } + + /// Dump a short description of this RefSCC to stderr. + void dump() const; + +#ifndef NDEBUG + /// Verify invariants about the RefSCC and all its SCCs. + /// + /// This will attempt to validate all of the invariants *within* the + /// RefSCC, but not that it is a strongly connected component of the larger + /// graph. This makes it useful even when partially through an update. + /// + /// Invariants checked: + /// - SCCs and their indices match. + /// - The SCCs list is in fact in post-order. + void verify(); +#endif + + public: + typedef pointee_iterator<SmallVectorImpl<SCC *>::const_iterator> iterator; + typedef iterator_range<iterator> range; + typedef pointee_iterator<SmallPtrSetImpl<RefSCC *>::const_iterator> + parent_iterator; + + iterator begin() const { return SCCs.begin(); } + iterator end() const { return SCCs.end(); } + + ssize_t size() const { return SCCs.size(); } + + SCC &operator[](int Idx) { return *SCCs[Idx]; } + + iterator find(SCC &C) const { + return SCCs.begin() + SCCIndices.find(&C)->second; + } + + parent_iterator parent_begin() const { return Parents.begin(); } + parent_iterator parent_end() const { return Parents.end(); } iterator_range<parent_iterator> parents() const { return make_range(parent_begin(), parent_end()); } /// Test if this SCC is a parent of \a C. - bool isParentOf(const SCC &C) const { return C.isChildOf(*this); } + bool isParentOf(const RefSCC &C) const { return C.isChildOf(*this); } - /// Test if this SCC is an ancestor of \a C. - bool isAncestorOf(const SCC &C) const { return C.isDescendantOf(*this); } + /// Test if this RefSCC is an ancestor of \a C. + bool isAncestorOf(const RefSCC &C) const { return C.isDescendantOf(*this); } - /// Test if this SCC is a child of \a C. - bool isChildOf(const SCC &C) const { - return ParentSCCs.count(const_cast<SCC *>(&C)); + /// Test if this RefSCC is a child of \a C. + bool isChildOf(const RefSCC &C) const { + return Parents.count(const_cast<RefSCC *>(&C)); } - /// Test if this SCC is a descendant of \a C. - bool isDescendantOf(const SCC &C) const; + /// Test if this RefSCC is a descendant of \a C. + bool isDescendantOf(const RefSCC &C) const; - /// Short name useful for debugging or logging. + /// Provide a short name by printing this SCC to a std::string. /// - /// We use the name of the first function in the SCC to name the SCC for - /// the purposes of debugging and logging. - StringRef getName() const { return (*begin())->getFunction().getName(); } + /// This copes with the fact that we don't have a name per-se for an SCC + /// while still making the use of this in debugging and logging useful. + std::string getName() const { + std::string Name; + raw_string_ostream OS(Name); + OS << *this; + OS.flush(); + return Name; + } ///@{ /// \name Mutation API @@ -267,80 +553,151 @@ public: /// Note that these methods sometimes have complex runtimes, so be careful /// how you call them. - /// Insert an edge from one node in this SCC to another in this SCC. + /// Make an existing internal ref edge into a call edge. + /// + /// This may form a larger cycle and thus collapse SCCs into TargetN's SCC. + /// If that happens, the deleted SCC pointers are returned. These SCCs are + /// not in a valid state any longer but the pointers will remain valid + /// until destruction of the parent graph instance for the purpose of + /// clearing cached information. /// - /// By the definition of an SCC, this does not change the nature or make-up - /// of any SCCs. - void insertIntraSCCEdge(Node &CallerN, Node &CalleeN); + /// After this operation, both SourceN's SCC and TargetN's SCC may move + /// position within this RefSCC's postorder list. Any SCCs merged are + /// merged into the TargetN's SCC in order to preserve reachability analyses + /// which took place on that SCC. + SmallVector<SCC *, 1> switchInternalEdgeToCall(Node &SourceN, + Node &TargetN); + + /// Make an existing internal call edge into a ref edge. + /// + /// If SourceN and TargetN are part of a single SCC, it may be split up due + /// to breaking a cycle in the call edges that formed it. If that happens, + /// then this routine will insert new SCCs into the postorder list *before* + /// the SCC of TargetN (previously the SCC of both). This preserves + /// postorder as the TargetN can reach all of the other nodes by definition + /// of previously being in a single SCC formed by the cycle from SourceN to + /// TargetN. The newly added nodes are added *immediately* and contiguously + /// prior to the TargetN SCC and so they may be iterated starting from + /// there. + void switchInternalEdgeToRef(Node &SourceN, Node &TargetN); + + /// Make an existing outgoing ref edge into a call edge. + /// + /// Note that this is trivial as there are no cyclic impacts and there + /// remains a reference edge. + void switchOutgoingEdgeToCall(Node &SourceN, Node &TargetN); - /// Insert an edge whose tail is in this SCC and head is in some child SCC. + /// Make an existing outgoing call edge into a ref edge. /// - /// There must be an existing path from the caller to the callee. This - /// operation is inexpensive and does not change the set of SCCs in the - /// graph. - void insertOutgoingEdge(Node &CallerN, Node &CalleeN); + /// This is trivial as there are no cyclic impacts and there remains + /// a reference edge. + void switchOutgoingEdgeToRef(Node &SourceN, Node &TargetN); - /// Insert an edge whose tail is in a descendant SCC and head is in this - /// SCC. + /// Insert a ref edge from one node in this RefSCC to another in this + /// RefSCC. + /// + /// This is always a trivial operation as it doesn't change any part of the + /// graph structure besides connecting the two nodes. + /// + /// Note that we don't support directly inserting internal *call* edges + /// because that could change the graph structure and requires returning + /// information about what became invalid. As a consequence, the pattern + /// should be to first insert the necessary ref edge, and then to switch it + /// to a call edge if needed and handle any invalidation that results. See + /// the \c switchInternalEdgeToCall routine for details. + void insertInternalRefEdge(Node &SourceN, Node &TargetN); + + /// Insert an edge whose parent is in this RefSCC and child is in some + /// child RefSCC. + /// + /// There must be an existing path from the \p SourceN to the \p TargetN. + /// This operation is inexpensive and does not change the set of SCCs and + /// RefSCCs in the graph. + void insertOutgoingEdge(Node &SourceN, Node &TargetN, Edge::Kind EK); + + /// Insert an edge whose source is in a descendant RefSCC and target is in + /// this RefSCC. + /// + /// There must be an existing path from the target to the source in this + /// case. + /// + /// NB! This is has the potential to be a very expensive function. It + /// inherently forms a cycle in the prior RefSCC DAG and we have to merge + /// RefSCCs to resolve that cycle. But finding all of the RefSCCs which + /// participate in the cycle can in the worst case require traversing every + /// RefSCC in the graph. Every attempt is made to avoid that, but passes + /// must still exercise caution calling this routine repeatedly. + /// + /// Also note that this can only insert ref edges. In order to insert + /// a call edge, first insert a ref edge and then switch it to a call edge. + /// These are intentionally kept as separate interfaces because each step + /// of the operation invalidates a different set of data structures. /// - /// There must be an existing path from the callee to the caller in this - /// case. NB! This is has the potential to be a very expensive function. It - /// inherently forms a cycle in the prior SCC DAG and we have to merge SCCs - /// to resolve that cycle. But finding all of the SCCs which participate in - /// the cycle can in the worst case require traversing every SCC in the - /// graph. Every attempt is made to avoid that, but passes must still - /// exercise caution calling this routine repeatedly. + /// This returns all the RefSCCs which were merged into the this RefSCC + /// (the target's). This allows callers to invalidate any cached + /// information. /// /// FIXME: We could possibly optimize this quite a bit for cases where the /// caller and callee are very nearby in the graph. See comments in the /// implementation for details, but that use case might impact users. - SmallVector<SCC *, 1> insertIncomingEdge(Node &CallerN, Node &CalleeN); + SmallVector<RefSCC *, 1> insertIncomingRefEdge(Node &SourceN, + Node &TargetN); - /// Remove an edge whose source is in this SCC and target is *not*. + /// Remove an edge whose source is in this RefSCC and target is *not*. /// - /// This removes an inter-SCC edge. All inter-SCC edges originating from - /// this SCC have been fully explored by any in-flight DFS SCC formation, - /// so this is always safe to call once you have the source SCC. + /// This removes an inter-RefSCC edge. All inter-RefSCC edges originating + /// from this SCC have been fully explored by any in-flight DFS graph + /// formation, so this is always safe to call once you have the source + /// RefSCC. /// - /// This operation does not change the set of SCCs or the members of the - /// SCCs and so is very inexpensive. It may change the connectivity graph - /// of the SCCs though, so be careful calling this while iterating over - /// them. - void removeInterSCCEdge(Node &CallerN, Node &CalleeN); + /// This operation does not change the cyclic structure of the graph and so + /// is very inexpensive. It may change the connectivity graph of the SCCs + /// though, so be careful calling this while iterating over them. + void removeOutgoingEdge(Node &SourceN, Node &TargetN); - /// Remove an edge which is entirely within this SCC. + /// Remove a ref edge which is entirely within this RefSCC. /// - /// Both the \a Caller and the \a Callee must be within this SCC. Removing - /// such an edge make break cycles that form this SCC and thus this - /// operation may change the SCC graph significantly. In particular, this - /// operation will re-form new SCCs based on the remaining connectivity of - /// the graph. The following invariants are guaranteed to hold after - /// calling this method: + /// Both the \a SourceN and the \a TargetN must be within this RefSCC. + /// Removing such an edge may break cycles that form this RefSCC and thus + /// this operation may change the RefSCC graph significantly. In + /// particular, this operation will re-form new RefSCCs based on the + /// remaining connectivity of the graph. The following invariants are + /// guaranteed to hold after calling this method: /// - /// 1) This SCC is still an SCC in the graph. - /// 2) This SCC will be the parent of any new SCCs. Thus, this SCC is - /// preserved as the root of any new SCC directed graph formed. - /// 3) No SCC other than this SCC has its member set changed (this is + /// 1) This RefSCC is still a RefSCC in the graph. + /// 2) This RefSCC will be the parent of any new RefSCCs. Thus, this RefSCC + /// is preserved as the root of any new RefSCC DAG formed. + /// 3) No RefSCC other than this RefSCC has its member set changed (this is /// inherent in the definition of removing such an edge). - /// 4) All of the parent links of the SCC graph will be updated to reflect - /// the new SCC structure. - /// 5) All SCCs formed out of this SCC, excluding this SCC, will be - /// returned in a vector. - /// 6) The order of the SCCs in the vector will be a valid postorder - /// traversal of the new SCCs. + /// 4) All of the parent links of the RefSCC graph will be updated to + /// reflect the new RefSCC structure. + /// 5) All RefSCCs formed out of this RefSCC, excluding this RefSCC, will + /// be returned in post-order. + /// 6) The order of the RefSCCs in the vector will be a valid postorder + /// traversal of the new RefSCCs. /// /// These invariants are very important to ensure that we can build - /// optimization pipeliens on top of the CGSCC pass manager which - /// intelligently update the SCC graph without invalidating other parts of - /// the SCC graph. + /// optimization pipelines on top of the CGSCC pass manager which + /// intelligently update the RefSCC graph without invalidating other parts + /// of the RefSCC graph. + /// + /// Note that we provide no routine to remove a *call* edge. Instead, you + /// must first switch it to a ref edge using \c switchInternalEdgeToRef. + /// This split API is intentional as each of these two steps can invalidate + /// a different aspect of the graph structure and needs to have the + /// invalidation handled independently. /// /// The runtime complexity of this method is, in the worst case, O(V+E) - /// where V is the number of nodes in this SCC and E is the number of edges - /// leaving the nodes in this SCC. Note that E includes both edges within - /// this SCC and edges from this SCC to child SCCs. Some effort has been - /// made to minimize the overhead of common cases such as self-edges and - /// edge removals which result in a spanning tree with no more cycles. - SmallVector<SCC *, 1> removeIntraSCCEdge(Node &CallerN, Node &CalleeN); + /// where V is the number of nodes in this RefSCC and E is the number of + /// edges leaving the nodes in this RefSCC. Note that E includes both edges + /// within this RefSCC and edges from this RefSCC to child RefSCCs. Some + /// effort has been made to minimize the overhead of common cases such as + /// self-edges and edge removals which result in a spanning tree with no + /// more cycles. There are also detailed comments within the implementation + /// on techniques which could substantially improve this routine's + /// efficiency. + SmallVector<RefSCC *, 1> removeInternalRefEdge(Node &SourceN, + Node &TargetN); ///@} }; @@ -351,9 +708,9 @@ public: /// the call graph, walking it lazily in depth-first post-order. That is, it /// always visits SCCs for a callee prior to visiting the SCC for a caller /// (when they are in different SCCs). - class postorder_scc_iterator - : public iterator_facade_base<postorder_scc_iterator, - std::forward_iterator_tag, SCC> { + class postorder_ref_scc_iterator + : public iterator_facade_base<postorder_ref_scc_iterator, + std::forward_iterator_tag, RefSCC> { friend class LazyCallGraph; friend class LazyCallGraph::Node; @@ -361,27 +718,27 @@ public: struct IsAtEndT {}; LazyCallGraph *G; - SCC *C; + RefSCC *C; // Build the begin iterator for a node. - postorder_scc_iterator(LazyCallGraph &G) : G(&G) { - C = G.getNextSCCInPostOrder(); + postorder_ref_scc_iterator(LazyCallGraph &G) : G(&G) { + C = G.getNextRefSCCInPostOrder(); } // Build the end iterator for a node. This is selected purely by overload. - postorder_scc_iterator(LazyCallGraph &G, IsAtEndT /*Nonce*/) + postorder_ref_scc_iterator(LazyCallGraph &G, IsAtEndT /*Nonce*/) : G(&G), C(nullptr) {} public: - bool operator==(const postorder_scc_iterator &Arg) const { + bool operator==(const postorder_ref_scc_iterator &Arg) const { return G == Arg.G && C == Arg.C; } reference operator*() const { return *C; } using iterator_facade_base::operator++; - postorder_scc_iterator &operator++() { - C = G->getNextSCCInPostOrder(); + postorder_ref_scc_iterator &operator++() { + C = G->getNextRefSCCInPostOrder(); return *this; } }; @@ -396,20 +753,23 @@ public: LazyCallGraph(LazyCallGraph &&G); LazyCallGraph &operator=(LazyCallGraph &&RHS); - iterator begin() { - return iterator(*this, EntryNodes.begin(), EntryNodes.end()); + edge_iterator begin() { + return edge_iterator(EntryEdges.begin(), EntryEdges.end()); + } + edge_iterator end() { + return edge_iterator(EntryEdges.end(), EntryEdges.end()); } - iterator end() { return iterator(*this, EntryNodes.end(), EntryNodes.end()); } - postorder_scc_iterator postorder_scc_begin() { - return postorder_scc_iterator(*this); + postorder_ref_scc_iterator postorder_ref_scc_begin() { + return postorder_ref_scc_iterator(*this); } - postorder_scc_iterator postorder_scc_end() { - return postorder_scc_iterator(*this, postorder_scc_iterator::IsAtEndT()); + postorder_ref_scc_iterator postorder_ref_scc_end() { + return postorder_ref_scc_iterator(*this, + postorder_ref_scc_iterator::IsAtEndT()); } - iterator_range<postorder_scc_iterator> postorder_sccs() { - return make_range(postorder_scc_begin(), postorder_scc_end()); + iterator_range<postorder_ref_scc_iterator> postorder_ref_sccs() { + return make_range(postorder_ref_scc_begin(), postorder_ref_scc_end()); } /// Lookup a function in the graph which has already been scanned and added. @@ -421,6 +781,17 @@ public: /// iterator walk. SCC *lookupSCC(Node &N) const { return SCCMap.lookup(&N); } + /// Lookup a function's RefSCC in the graph. + /// + /// \returns null if the function hasn't been assigned a RefSCC via the + /// RefSCC iterator walk. + RefSCC *lookupRefSCC(Node &N) const { + if (SCC *C = lookupSCC(N)) + return &C->getOuterRefSCC(); + + return nullptr; + } + /// Get a graph node for a given function, scanning it to populate the graph /// data as necessary. Node &get(Function &F) { @@ -442,11 +813,11 @@ public: /// mutation of the graph via the SCC methods. /// Update the call graph after inserting a new edge. - void insertEdge(Node &Caller, Function &Callee); + void insertEdge(Node &Caller, Function &Callee, Edge::Kind EK); /// Update the call graph after inserting a new edge. - void insertEdge(Function &Caller, Function &Callee) { - return insertEdge(get(Caller), Callee); + void insertEdge(Function &Caller, Function &Callee, Edge::Kind EK) { + return insertEdge(get(Caller), Callee, EK); } /// Update the call graph after deleting an edge. @@ -460,6 +831,9 @@ public: ///@} private: + typedef SmallVectorImpl<Node *>::reverse_iterator node_stack_iterator; + typedef iterator_range<node_stack_iterator> node_stack_range; + /// Allocator that holds all the call graph nodes. SpecificBumpPtrAllocator<Node> BPA; @@ -470,10 +844,10 @@ private: /// /// These nodes are reachable through "external" means. Put another way, they /// escape at the module scope. - NodeVectorT EntryNodes; + EdgeVectorT EntryEdges; - /// Map of the entry nodes in the graph to their indices in \c EntryNodes. - DenseMap<Function *, size_t> EntryIndexMap; + /// Map of the entry nodes in the graph to their indices in \c EntryEdges. + DenseMap<Function *, int> EntryIndexMap; /// Allocator that holds all the call graph SCCs. SpecificBumpPtrAllocator<SCC> SCCBPA; @@ -481,19 +855,22 @@ private: /// Maps Function -> SCC for fast lookup. DenseMap<Node *, SCC *> SCCMap; - /// The leaf SCCs of the graph. + /// Allocator that holds all the call graph RefSCCs. + SpecificBumpPtrAllocator<RefSCC> RefSCCBPA; + + /// The leaf RefSCCs of the graph. /// - /// These are all of the SCCs which have no children. - SmallVector<SCC *, 4> LeafSCCs; + /// These are all of the RefSCCs which have no children. + SmallVector<RefSCC *, 4> LeafRefSCCs; /// Stack of nodes in the DFS walk. - SmallVector<std::pair<Node *, iterator>, 4> DFSStack; + SmallVector<std::pair<Node *, edge_iterator>, 4> DFSStack; - /// Set of entry nodes not-yet-processed into SCCs. - SmallVector<Function *, 4> SCCEntryNodes; + /// Set of entry nodes not-yet-processed into RefSCCs. + SmallVector<Function *, 4> RefSCCEntryNodes; /// Stack of nodes the DFS has walked but not yet put into a SCC. - SmallVector<Node *, 4> PendingSCCStack; + SmallVector<Node *, 4> PendingRefSCCStack; /// Counter for the next DFS number to assign. int NextDFSNumber; @@ -505,18 +882,79 @@ private: /// Helper to update pointers back to the graph object during moves. void updateGraphPtrs(); - /// Helper to form a new SCC out of the top of a DFSStack-like - /// structure. - SCC *formSCC(Node *RootN, SmallVectorImpl<Node *> &NodeStack); + /// Allocates an SCC and constructs it using the graph allocator. + /// + /// The arguments are forwarded to the constructor. + template <typename... Ts> SCC *createSCC(Ts &&... Args) { + return new (SCCBPA.Allocate()) SCC(std::forward<Ts>(Args)...); + } + + /// Allocates a RefSCC and constructs it using the graph allocator. + /// + /// The arguments are forwarded to the constructor. + template <typename... Ts> RefSCC *createRefSCC(Ts &&... Args) { + return new (RefSCCBPA.Allocate()) RefSCC(std::forward<Ts>(Args)...); + } + + /// Build the SCCs for a RefSCC out of a list of nodes. + void buildSCCs(RefSCC &RC, node_stack_range Nodes); - /// Retrieve the next node in the post-order SCC walk of the call graph. - SCC *getNextSCCInPostOrder(); + /// Connect a RefSCC into the larger graph. + /// + /// This walks the edges to connect the RefSCC to its children's parent set, + /// and updates the root leaf list. + void connectRefSCC(RefSCC &RC); + + /// Retrieve the next node in the post-order RefSCC walk of the call graph. + RefSCC *getNextRefSCCInPostOrder(); }; +inline LazyCallGraph::Edge::Edge() : Value() {} +inline LazyCallGraph::Edge::Edge(Function &F, Kind K) : Value(&F, K) {} +inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {} + +inline LazyCallGraph::Edge::operator bool() const { + return !Value.getPointer().isNull(); +} + +inline bool LazyCallGraph::Edge::isCall() const { + assert(*this && "Queried a null edge!"); + return Value.getInt() == Call; +} + +inline Function &LazyCallGraph::Edge::getFunction() const { + assert(*this && "Queried a null edge!"); + auto P = Value.getPointer(); + if (auto *F = P.dyn_cast<Function *>()) + return *F; + + return P.get<Node *>()->getFunction(); +} + +inline LazyCallGraph::Node *LazyCallGraph::Edge::getNode() const { + assert(*this && "Queried a null edge!"); + auto P = Value.getPointer(); + if (auto *N = P.dyn_cast<Node *>()) + return N; + + return nullptr; +} + +inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode(LazyCallGraph &G) { + assert(*this && "Queried a null edge!"); + auto P = Value.getPointer(); + if (auto *N = P.dyn_cast<Node *>()) + return *N; + + Node &N = G.get(*P.get<Function *>()); + Value.setPointer(&N); + return N; +} + // Provide GraphTraits specializations for call graphs. template <> struct GraphTraits<LazyCallGraph::Node *> { typedef LazyCallGraph::Node NodeType; - typedef LazyCallGraph::iterator ChildIteratorType; + typedef LazyCallGraph::edge_iterator ChildIteratorType; static NodeType *getEntryNode(NodeType *N) { return N; } static ChildIteratorType child_begin(NodeType *N) { return N->begin(); } @@ -524,7 +962,7 @@ template <> struct GraphTraits<LazyCallGraph::Node *> { }; template <> struct GraphTraits<LazyCallGraph *> { typedef LazyCallGraph::Node NodeType; - typedef LazyCallGraph::iterator ChildIteratorType; + typedef LazyCallGraph::edge_iterator ChildIteratorType; static NodeType *getEntryNode(NodeType *N) { return N; } static ChildIteratorType child_begin(NodeType *N) { return N->begin(); } @@ -532,39 +970,48 @@ template <> struct GraphTraits<LazyCallGraph *> { }; /// An analysis pass which computes the call graph for a module. -class LazyCallGraphAnalysis { +class LazyCallGraphAnalysis : public AnalysisInfoMixin<LazyCallGraphAnalysis> { + friend AnalysisInfoMixin<LazyCallGraphAnalysis>; + static char PassID; + public: /// Inform generic clients of the result type. typedef LazyCallGraph Result; - static void *ID() { return (void *)&PassID; } - - static StringRef name() { return "Lazy CallGraph Analysis"; } - /// Compute the \c LazyCallGraph for the module \c M. /// /// This just builds the set of entry points to the call graph. The rest is /// built lazily as it is walked. - LazyCallGraph run(Module &M) { return LazyCallGraph(M); } - -private: - static char PassID; + LazyCallGraph run(Module &M, ModuleAnalysisManager &) { + return LazyCallGraph(M); + } }; /// A pass which prints the call graph to a \c raw_ostream. /// /// This is primarily useful for testing the analysis. -class LazyCallGraphPrinterPass { +class LazyCallGraphPrinterPass + : public PassInfoMixin<LazyCallGraphPrinterPass> { raw_ostream &OS; public: explicit LazyCallGraphPrinterPass(raw_ostream &OS); - PreservedAnalyses run(Module &M, ModuleAnalysisManager *AM); - - static StringRef name() { return "LazyCallGraphPrinterPass"; } + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; +/// A pass which prints the call graph as a DOT file to a \c raw_ostream. +/// +/// This is primarily useful for visualization purposes. +class LazyCallGraphDOTPrinterPass + : public PassInfoMixin<LazyCallGraphDOTPrinterPass> { + raw_ostream &OS; + +public: + explicit LazyCallGraphDOTPrinterPass(raw_ostream &OS); + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; } #endif diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h index 42002062dca2..c85cf2c5da56 100644 --- a/include/llvm/Analysis/LazyValueInfo.h +++ b/include/llvm/Analysis/LazyValueInfo.h @@ -15,11 +15,13 @@ #ifndef LLVM_ANALYSIS_LAZYVALUEINFO_H #define LLVM_ANALYSIS_LAZYVALUEINFO_H +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { class AssumptionCache; class Constant; + class ConstantRange; class DataLayout; class DominatorTree; class Instruction; @@ -27,19 +29,33 @@ namespace llvm { class Value; /// This pass computes, caches, and vends lazy value constraint information. -class LazyValueInfo : public FunctionPass { - AssumptionCache *AC; - class TargetLibraryInfo *TLI; - DominatorTree *DT; - void *PImpl; +class LazyValueInfo { + friend class LazyValueInfoWrapperPass; + AssumptionCache *AC = nullptr; + class TargetLibraryInfo *TLI = nullptr; + DominatorTree *DT = nullptr; + void *PImpl = nullptr; LazyValueInfo(const LazyValueInfo&) = delete; void operator=(const LazyValueInfo&) = delete; public: - static char ID; - LazyValueInfo() : FunctionPass(ID), PImpl(nullptr) { - initializeLazyValueInfoPass(*PassRegistry::getPassRegistry()); + ~LazyValueInfo(); + LazyValueInfo() {} + LazyValueInfo(AssumptionCache *AC_, TargetLibraryInfo *TLI_, + DominatorTree *DT_) + : AC(AC_), TLI(TLI_), DT(DT_) {} + LazyValueInfo(LazyValueInfo &&Arg) + : AC(Arg.AC), TLI(Arg.TLI), DT(Arg.DT), PImpl(Arg.PImpl) { + Arg.PImpl = nullptr; + } + LazyValueInfo &operator=(LazyValueInfo &&Arg) { + releaseMemory(); + AC = Arg.AC; + TLI = Arg.TLI; + DT = Arg.DT; + PImpl = Arg.PImpl; + Arg.PImpl = nullptr; + return *this; } - ~LazyValueInfo() override { assert(!PImpl && "releaseMemory not called"); } /// This is used to return true/false/dunno results. enum Tristate { @@ -65,6 +81,11 @@ public: /// constant at the end of the specified block. Return null if not. Constant *getConstant(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr); + /// Return the ConstantRange constraint that is known to hold for the + /// specified value at the end of the specified block. This may only be called + /// on integer-typed Values. + ConstantRange getConstantRange(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr); + /// Determine whether the specified value is known to be a /// constant on the specified edge. Return null if not. Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, @@ -77,11 +98,41 @@ public: /// Inform the analysis cache that we have erased a block. void eraseBlock(BasicBlock *BB); - // Implementation boilerplate. + // For old PM pass. Delete once LazyValueInfoWrapperPass is gone. + void releaseMemory(); +}; + +/// \brief Analysis to compute lazy value information. +class LazyValueAnalysis : public AnalysisInfoMixin<LazyValueAnalysis> { +public: + typedef LazyValueInfo Result; + Result run(Function &F, FunctionAnalysisManager &FAM); + +private: + static char PassID; + friend struct AnalysisInfoMixin<LazyValueAnalysis>; +}; + +/// Wrapper around LazyValueInfo. +class LazyValueInfoWrapperPass : public FunctionPass { + LazyValueInfoWrapperPass(const LazyValueInfoWrapperPass&) = delete; + void operator=(const LazyValueInfoWrapperPass&) = delete; +public: + static char ID; + LazyValueInfoWrapperPass() : FunctionPass(ID) { + initializeLazyValueInfoWrapperPassPass(*PassRegistry::getPassRegistry()); + } + ~LazyValueInfoWrapperPass() override { + assert(!Info.PImpl && "releaseMemory not called"); + } + + LazyValueInfo &getLVI(); void getAnalysisUsage(AnalysisUsage &AU) const override; void releaseMemory() override; bool runOnFunction(Function &F) override; +private: + LazyValueInfo Info; }; } // end namespace llvm diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h index 939663b0def1..39f80f489e12 100644 --- a/include/llvm/Analysis/Loads.h +++ b/include/llvm/Analysis/Loads.h @@ -23,39 +23,74 @@ namespace llvm { class DataLayout; class MDNode; +/// isDereferenceablePointer - Return true if this is always a dereferenceable +/// pointer. If the context instruction is specified perform context-sensitive +/// analysis and return true if the pointer is dereferenceable at the +/// specified instruction. +bool isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr); + +/// Returns true if V is always a dereferenceable pointer with alignment +/// greater or equal than requested. If the context instruction is specified +/// performs context-sensitive analysis and returns true if the pointer is +/// dereferenceable at the specified instruction. +bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const DataLayout &DL, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr); + /// isSafeToLoadUnconditionally - Return true if we know that executing a load -/// from this value cannot trap. If it is not obviously safe to load from the -/// specified pointer, we do a quick local scan of the basic block containing -/// ScanFrom, to determine if the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align); +/// from this value cannot trap. +/// +/// If DT and ScanFrom are specified this method performs context-sensitive +/// analysis and returns true if it is safe to load immediately before ScanFrom. +/// +/// If it is not obviously safe to load from the specified pointer, we do a +/// quick local scan of the basic block containing ScanFrom, to determine if +/// the address is already accessed. +bool isSafeToLoadUnconditionally(Value *V, unsigned Align, + const DataLayout &DL, + Instruction *ScanFrom = nullptr, + const DominatorTree *DT = nullptr); /// DefMaxInstsToScan - the default number of maximum instructions /// to scan in the block, used by FindAvailableLoadedValue(). extern cl::opt<unsigned> DefMaxInstsToScan; -/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at -/// the instruction before ScanFrom) checking to see if we have the value at -/// the memory address *Ptr locally available within a small number of -/// instructions. If the value is available, return it. +/// \brief Scan backwards to see if we have the value of the given load +/// available locally within a small number of instructions. +/// +/// You can use this function to scan across multiple blocks: after you call +/// this function, if ScanFrom points at the beginning of the block, it's safe +/// to continue scanning the predecessors. /// -/// If not, return the iterator for the last validated instruction that the -/// value would be live through. If we scanned the entire block and didn't -/// find something that invalidates *Ptr or provides it, ScanFrom would be -/// left at begin() and this returns null. ScanFrom could also be left +/// Note that performing load CSE requires special care to make sure the +/// metadata is set appropriately. In particular, aliasing metadata needs +/// to be merged. (This doesn't matter for store-to-load forwarding because +/// the only relevant load gets deleted.) /// -/// MaxInstsToScan specifies the maximum instructions to scan in the block. -/// If it is set to 0, it will scan the whole block. You can also optionally -/// specify an alias analysis implementation, which makes this more precise. +/// \param Load The load we want to replace. +/// \param ScanBB The basic block to scan. FIXME: This is redundant. +/// \param [in,out] ScanFrom The location to start scanning from. When this +/// function returns, it points at the last instruction scanned. +/// \param MaxInstsToScan The maximum number of instructions to scan. If this +/// is zero, the whole block will be scanned. +/// \param AA Optional pointer to alias analysis, to make the scan more +/// precise. +/// \param [out] AATags The aliasing metadata for the operation which produced +/// the value. FIXME: This is basically useless. +/// \param [out] IsLoadCSE Whether the returned value is a load from the same +/// location in memory, as opposed to the value operand of a store. /// -/// If AATags is non-null and a load or store is found, the AA tags from the -/// load or store are recorded there. If there are no AA tags or if no access -/// is found, it is left unmodified. -Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, +/// \returns The found value, or nullptr if no value is found. +Value *FindAvailableLoadedValue(LoadInst *Load, + BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = DefMaxInstsToScan, AliasAnalysis *AA = nullptr, - AAMDNodes *AATags = nullptr); + AAMDNodes *AATags = nullptr, + bool *IsLoadCSE = nullptr); } diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h index 871d35e99b74..ceee1be5e1e7 100644 --- a/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/include/llvm/Analysis/LoopAccessAnalysis.h @@ -228,7 +228,7 @@ public: /// \brief The maximum number of bytes of a vector register we can vectorize /// the accesses safely with. - unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } + uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } /// \brief In same cases when the dependency check fails we can still /// vectorize the loop with a dynamic array access check. @@ -284,7 +284,7 @@ private: unsigned AccessIdx; // We can access this many bytes in parallel safely. - unsigned MaxSafeDepDistBytes; + uint64_t MaxSafeDepDistBytes; /// \brief If we see a non-constant dependence distance we can still try to /// vectorize this loop with runtime checks. @@ -321,7 +321,10 @@ private: /// \brief Check whether the data dependence could prevent store-load /// forwarding. - bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize); + /// + /// \return false if we shouldn't vectorize at all or avoid larger + /// vectorization factors by limiting MaxSafeDepDistBytes. + bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize); }; /// \brief Holds information about the memory runtime legality checks to verify @@ -363,10 +366,10 @@ public: } /// Insert a pointer and calculate the start and end SCEVs. - /// \p We need Preds in order to compute the SCEV expression of the pointer + /// We need \p PSE in order to compute the SCEV expression of the pointer /// according to the assumptions that we've made during the analysis. /// The method might also version the pointer stride according to \p Strides, - /// and change \p Preds. + /// and add new predicates to \p PSE. void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE); @@ -508,23 +511,53 @@ private: /// PSE must be emitted in order for the results of this analysis to be valid. class LoopAccessInfo { public: - LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, - const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI, - const ValueToValueMap &Strides); + LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, + AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI); + + // FIXME: + // Hack for MSVC 2013 which sems like it can't synthesize this even + // with default keyword: + // LoopAccessInfo(LoopAccessInfo &&LAI) = default; + LoopAccessInfo(LoopAccessInfo &&LAI) + : PSE(std::move(LAI.PSE)), PtrRtChecking(std::move(LAI.PtrRtChecking)), + DepChecker(std::move(LAI.DepChecker)), TheLoop(LAI.TheLoop), + NumLoads(LAI.NumLoads), NumStores(LAI.NumStores), + MaxSafeDepDistBytes(LAI.MaxSafeDepDistBytes), CanVecMem(LAI.CanVecMem), + StoreToLoopInvariantAddress(LAI.StoreToLoopInvariantAddress), + Report(std::move(LAI.Report)), + SymbolicStrides(std::move(LAI.SymbolicStrides)), + StrideSet(std::move(LAI.StrideSet)) {} + // LoopAccessInfo &operator=(LoopAccessInfo &&LAI) = default; + LoopAccessInfo &operator=(LoopAccessInfo &&LAI) { + assert(this != &LAI); + + PSE = std::move(LAI.PSE); + PtrRtChecking = std::move(LAI.PtrRtChecking); + DepChecker = std::move(LAI.DepChecker); + TheLoop = LAI.TheLoop; + NumLoads = LAI.NumLoads; + NumStores = LAI.NumStores; + MaxSafeDepDistBytes = LAI.MaxSafeDepDistBytes; + CanVecMem = LAI.CanVecMem; + StoreToLoopInvariantAddress = LAI.StoreToLoopInvariantAddress; + Report = std::move(LAI.Report); + SymbolicStrides = std::move(LAI.SymbolicStrides); + StrideSet = std::move(LAI.StrideSet); + return *this; + } /// Return true we can analyze the memory accesses in the loop and there are /// no memory dependence cycles. bool canVectorizeMemory() const { return CanVecMem; } const RuntimePointerChecking *getRuntimePointerChecking() const { - return &PtrRtChecking; + return PtrRtChecking.get(); } /// \brief Number of memchecks required to prove independence of otherwise /// may-alias pointers. unsigned getNumRuntimePointerChecks() const { - return PtrRtChecking.getNumberOfChecks(); + return PtrRtChecking->getNumberOfChecks(); } /// Return true if the block BB needs to be predicated in order for the loop @@ -535,7 +568,7 @@ public: /// Returns true if the value V is uniform within the loop. bool isUniform(Value *V) const; - unsigned getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; } + uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; } unsigned getNumStores() const { return NumStores; } unsigned getNumLoads() const { return NumLoads;} @@ -563,23 +596,25 @@ public: /// \brief the Memory Dependence Checker which can determine the /// loop-independent and loop-carried dependences between memory accesses. - const MemoryDepChecker &getDepChecker() const { return DepChecker; } + const MemoryDepChecker &getDepChecker() const { return *DepChecker; } /// \brief Return the list of instructions that use \p Ptr to read or write /// memory. SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr, bool isWrite) const { - return DepChecker.getInstructionsForAccess(Ptr, isWrite); + return DepChecker->getInstructionsForAccess(Ptr, isWrite); } + /// \brief If an access has a symbolic strides, this maps the pointer value to + /// the stride symbol. + const ValueToValueMap &getSymbolicStrides() const { return SymbolicStrides; } + + /// \brief Pointer has a symbolic stride. + bool hasStride(Value *V) const { return StrideSet.count(V); } + /// \brief Print the information about the memory accesses in the loop. void print(raw_ostream &OS, unsigned Depth = 0) const; - /// \brief Used to ensure that if the analysis was run with speculating the - /// value of symbolic strides, the client queries it with the same assumption. - /// Only used in DEBUG build but we don't want NDEBUG-dependent ABI. - unsigned NumSymbolicStrides; - /// \brief Checks existence of store to invariant address inside loop. /// If the loop has any store to invariant address, then it returns true, /// else returns false. @@ -592,11 +627,12 @@ public: /// should be re-written (and therefore simplified) according to PSE. /// A user of LoopAccessAnalysis will need to emit the runtime checks /// associated with this predicate. - PredicatedScalarEvolution PSE; + const PredicatedScalarEvolution &getPSE() const { return *PSE; } private: - /// \brief Analyze the loop. Substitute symbolic strides using Strides. - void analyzeLoop(const ValueToValueMap &Strides); + /// \brief Analyze the loop. + void analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, + const TargetLibraryInfo *TLI, DominatorTree *DT); /// \brief Check if the structure of the loop allows it to be analyzed by this /// pass. @@ -604,25 +640,28 @@ private: void emitAnalysis(LoopAccessReport &Message); + /// \brief Collect memory access with loop invariant strides. + /// + /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop + /// invariant. + void collectStridedAccess(Value *LoadOrStoreInst); + + std::unique_ptr<PredicatedScalarEvolution> PSE; + /// We need to check that all of the pointers in this list are disjoint - /// at runtime. - RuntimePointerChecking PtrRtChecking; + /// at runtime. Using std::unique_ptr to make using move ctor simpler. + std::unique_ptr<RuntimePointerChecking> PtrRtChecking; /// \brief the Memory Dependence Checker which can determine the /// loop-independent and loop-carried dependences between memory accesses. - MemoryDepChecker DepChecker; + std::unique_ptr<MemoryDepChecker> DepChecker; Loop *TheLoop; - const DataLayout &DL; - const TargetLibraryInfo *TLI; - AliasAnalysis *AA; - DominatorTree *DT; - LoopInfo *LI; unsigned NumLoads; unsigned NumStores; - unsigned MaxSafeDepDistBytes; + uint64_t MaxSafeDepDistBytes; /// \brief Cache the result of analyzeLoop. bool CanVecMem; @@ -634,15 +673,23 @@ private: /// \brief The diagnostics report generated for the analysis. E.g. why we /// couldn't analyze the loop. Optional<LoopAccessReport> Report; + + /// \brief If an access has a symbolic strides, this maps the pointer value to + /// the stride symbol. + ValueToValueMap SymbolicStrides; + + /// \brief Set of symbolic strides values. + SmallPtrSet<Value *, 8> StrideSet; }; Value *stripIntegerCast(Value *V); -///\brief Return the SCEV corresponding to a pointer with the symbolic stride -/// replaced with constant one, assuming \p Preds is true. +/// \brief Return the SCEV corresponding to a pointer with the symbolic stride +/// replaced with constant one, assuming the SCEV predicate associated with +/// \p PSE is true. /// /// If necessary this method will version the stride of the pointer according -/// to \p PtrToStride and therefore add a new predicate to \p Preds. +/// to \p PtrToStride and therefore add further predicates to \p PSE. /// /// If \p OrigPtr is not null, use it to look up the stride value instead of \p /// Ptr. \p PtrToStride provides the mapping between the pointer value and its @@ -651,13 +698,24 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr = nullptr); -/// \brief Check the stride of the pointer and ensure that it does not wrap in -/// the address space, assuming \p Preds is true. +/// \brief If the pointer has a constant stride return it in units of its +/// element size. Otherwise return zero. +/// +/// Ensure that it does not wrap in the address space, assuming the predicate +/// associated with \p PSE is true. /// /// If necessary this method will version the stride of the pointer according -/// to \p PtrToStride and therefore add a new predicate to \p Preds. -int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap); +/// to \p PtrToStride and therefore add further predicates to \p PSE. +/// The \p Assume parameter indicates if we are allowed to make additional +/// run-time assumptions. +int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap = ValueToValueMap(), + bool Assume = false); + +/// \brief Returns true if the memory operations \p A and \p B are consecutive. +/// This is a simple API that does not depend on the analysis pass. +bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, + ScalarEvolution &SE, bool CheckType = true); /// \brief This analysis provides dependence information for the memory accesses /// of a loop. @@ -666,12 +724,12 @@ int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, /// querying the loop access info via LAA::getInfo. getInfo return a /// LoopAccessInfo object. See this class for the specifics of what information /// is provided. -class LoopAccessAnalysis : public FunctionPass { +class LoopAccessLegacyAnalysis : public FunctionPass { public: static char ID; - LoopAccessAnalysis() : FunctionPass(ID) { - initializeLoopAccessAnalysisPass(*PassRegistry::getPassRegistry()); + LoopAccessLegacyAnalysis() : FunctionPass(ID) { + initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -680,11 +738,8 @@ public: /// \brief Query the result of the loop access information for the loop \p L. /// - /// If the client speculates (and then issues run-time checks) for the values - /// of symbolic strides, \p Strides provides the mapping (see - /// replaceSymbolicStrideSCEV). If there is no cached result available run - /// the analysis. - const LoopAccessInfo &getInfo(Loop *L, const ValueToValueMap &Strides); + /// If there is no cached result available run the analysis. + const LoopAccessInfo &getInfo(Loop *L); void releaseMemory() override { // Invalidate the cache when the pass is freed. @@ -706,6 +761,34 @@ private: LoopInfo *LI; }; +/// \brief This analysis provides dependence information for the memory +/// accesses of a loop. +/// +/// It runs the analysis for a loop on demand. This can be initiated by +/// querying the loop access info via AM.getResult<LoopAccessAnalysis>. +/// getResult return a LoopAccessInfo object. See this class for the +/// specifics of what information is provided. +class LoopAccessAnalysis + : public AnalysisInfoMixin<LoopAccessAnalysis> { + friend AnalysisInfoMixin<LoopAccessAnalysis>; + static char PassID; + +public: + typedef LoopAccessInfo Result; + Result run(Loop &, AnalysisManager<Loop> &); + static StringRef name() { return "LoopAccessAnalysis"; } +}; + +/// \brief Printer pass for the \c LoopAccessInfo results. +class LoopAccessInfoPrinterPass + : public PassInfoMixin<LoopAccessInfoPrinterPass> { + raw_ostream &OS; + +public: + explicit LoopAccessInfoPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM); +}; + inline Instruction *MemoryDepChecker::Dependence::getSource( const LoopAccessInfo &LAI) const { return LAI.getDepChecker().getMemoryInstructions()[Source]; diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 70e636ce1f3d..35dc6bcb6864 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -25,6 +25,12 @@ // * the loop depth // * etc... // +// Note that this analysis specifically identifies *Loops* not cycles or SCCs +// in the CFG. There can be strongly connected compontents in the CFG which +// this analysis will not recognize and that will not be represented by a Loop +// instance. In particular, a Loop might be inside such a non-loop SCC, or a +// non-loop SCC might contain a sub-SCC which is a Loop. +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_LOOPINFO_H @@ -38,16 +44,12 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include <algorithm> namespace llvm { -// FIXME: Replace this brittle forward declaration with the include of the new -// PassManager.h when doing so doesn't break the PassManagerBuilder. -template <typename IRUnitT> class AnalysisManager; -class PreservedAnalyses; - class DominatorTree; class LoopInfo; class Loop; @@ -346,6 +348,9 @@ raw_ostream& operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) { // Implementation in LoopInfoImpl.h extern template class LoopBase<BasicBlock, Loop>; + +/// Represents a single loop in the control flow graph. Note that not all SCCs +/// in the CFG are neccessarily loops. class Loop : public LoopBase<BasicBlock, Loop> { public: Loop() {} @@ -452,21 +457,13 @@ public: /// location by looking at the preheader and header blocks. If it /// cannot find a terminating instruction with location information, /// it returns an unknown location. - DebugLoc getStartLoc() const { - BasicBlock *HeadBB; - - // Try the pre-header first. - if ((HeadBB = getLoopPreheader()) != nullptr) - if (DebugLoc DL = HeadBB->getTerminator()->getDebugLoc()) - return DL; + DebugLoc getStartLoc() const; - // If we have no pre-header or there are no instructions with debug - // info in it, try the header. - HeadBB = getHeader(); - if (HeadBB) - return HeadBB->getTerminator()->getDebugLoc(); - - return DebugLoc(); + StringRef getName() const { + if (BasicBlock *Header = getHeader()) + if (Header->hasName()) + return Header->getName(); + return "<unnamed loop>"; } private: @@ -775,30 +772,23 @@ template <> struct GraphTraits<Loop*> { }; /// \brief Analysis pass that exposes the \c LoopInfo for a function. -class LoopAnalysis { +class LoopAnalysis : public AnalysisInfoMixin<LoopAnalysis> { + friend AnalysisInfoMixin<LoopAnalysis>; static char PassID; public: typedef LoopInfo Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - /// \brief Provide a name for the analysis for debugging and logging. - static StringRef name() { return "LoopAnalysis"; } - - LoopInfo run(Function &F, AnalysisManager<Function> *AM); + LoopInfo run(Function &F, AnalysisManager<Function> &AM); }; /// \brief Printer pass for the \c LoopAnalysis results. -class LoopPrinterPass { +class LoopPrinterPass : public PassInfoMixin<LoopPrinterPass> { raw_ostream &OS; public: explicit LoopPrinterPass(raw_ostream &OS) : OS(OS) {} - PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM); - - static StringRef name() { return "LoopPrinterPass"; } + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); }; /// \brief The legacy pass manager's analysis pass to compute loop information. @@ -828,7 +818,7 @@ public: }; /// \brief Pass for printing a loop's contents as LLVM's text IR assembly. -class PrintLoopPass { +class PrintLoopPass : public PassInfoMixin<PrintLoopPass> { raw_ostream &OS; std::string Banner; @@ -836,8 +826,7 @@ public: PrintLoopPass(); PrintLoopPass(raw_ostream &OS, const std::string &Banner = ""); - PreservedAnalyses run(Loop &L); - static StringRef name() { return "PrintLoopPass"; } + PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &); }; } // End llvm namespace diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index 824fc7e8f155..816a15452dae 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -277,7 +277,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const { } assert(HasInsideLoopPreds && "Loop block has no in-loop predecessors!"); assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!"); - assert(BB != getHeader()->getParent()->begin() && + assert(BB != &getHeader()->getParent()->front() && "Loop contains function entry block!"); NumVisited++; diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h index 2cf734e53bb4..89debec04e94 100644 --- a/include/llvm/Analysis/LoopPass.h +++ b/include/llvm/Analysis/LoopPass.h @@ -88,9 +88,10 @@ public: virtual void deleteAnalysisLoop(Loop *L) {} protected: - /// skipOptnoneFunction - Containing function has Attribute::OptimizeNone - /// and most transformation passes should skip it. - bool skipOptnoneFunction(const Loop *L) const; + /// Optional passes call this function to check whether the pass should be + /// skipped. This is the case when Attribute::OptimizeNone is set or when + /// optimization bisect is over the limit. + bool skipLoop(const Loop *L) const; }; class LPPassManager : public FunctionPass, public PMDataManager { diff --git a/include/llvm/Analysis/LoopPassManager.h b/include/llvm/Analysis/LoopPassManager.h new file mode 100644 index 000000000000..a89551851259 --- /dev/null +++ b/include/llvm/Analysis/LoopPassManager.h @@ -0,0 +1,142 @@ +//===- LoopPassManager.h - Loop pass management -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This header provides classes for managing passes over loops in LLVM IR. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPPASSMANAGER_H +#define LLVM_ANALYSIS_LOOPPASSMANAGER_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +extern template class PassManager<Loop>; +/// \brief The loop pass manager. +/// +/// See the documentation for the PassManager template for details. It runs a +/// sequency of loop passes over each loop that the manager is run over. This +/// typedef serves as a convenient way to refer to this construct. +typedef PassManager<Loop> LoopPassManager; + +extern template class AnalysisManager<Loop>; +/// \brief The loop analysis manager. +/// +/// See the documentation for the AnalysisManager template for detail +/// documentation. This typedef serves as a convenient way to refer to this +/// construct in the adaptors and proxies used to integrate this into the larger +/// pass manager infrastructure. +typedef AnalysisManager<Loop> LoopAnalysisManager; + +extern template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>; +/// A proxy from a \c LoopAnalysisManager to a \c Function. +typedef InnerAnalysisManagerProxy<LoopAnalysisManager, Function> + LoopAnalysisManagerFunctionProxy; + +extern template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop>; +/// A proxy from a \c FunctionAnalysisManager to a \c Loop. +typedef OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop> + FunctionAnalysisManagerLoopProxy; + +/// Returns the minimum set of Analyses that all loop passes must preserve. +PreservedAnalyses getLoopPassPreservedAnalyses(); + +/// \brief Adaptor that maps from a function to its loops. +/// +/// Designed to allow composition of a LoopPass(Manager) and a +/// FunctionPassManager. Note that if this pass is constructed with a \c +/// FunctionAnalysisManager it will run the \c LoopAnalysisManagerFunctionProxy +/// analysis prior to running the loop passes over the function to enable a \c +/// LoopAnalysisManager to be used within this run safely. +template <typename LoopPassT> +class FunctionToLoopPassAdaptor + : public PassInfoMixin<FunctionToLoopPassAdaptor<LoopPassT>> { +public: + explicit FunctionToLoopPassAdaptor(LoopPassT Pass) + : Pass(std::move(Pass)) {} + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + FunctionToLoopPassAdaptor(const FunctionToLoopPassAdaptor &Arg) + : Pass(Arg.Pass) {} + FunctionToLoopPassAdaptor(FunctionToLoopPassAdaptor &&Arg) + : Pass(std::move(Arg.Pass)) {} + friend void swap(FunctionToLoopPassAdaptor &LHS, + FunctionToLoopPassAdaptor &RHS) { + using std::swap; + swap(LHS.Pass, RHS.Pass); + } + FunctionToLoopPassAdaptor &operator=(FunctionToLoopPassAdaptor RHS) { + swap(*this, RHS); + return *this; + } + + /// \brief Runs the loop passes across every loop in the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) { + // Setup the loop analysis manager from its proxy. + LoopAnalysisManager &LAM = + AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); + // Get the loop structure for this function + LoopInfo &LI = AM.getResult<LoopAnalysis>(F); + + PreservedAnalyses PA = PreservedAnalyses::all(); + + // We want to visit the loops in reverse post-order. We'll build the stack + // of loops to visit in Loops by first walking the loops in pre-order. + SmallVector<Loop *, 2> Loops; + SmallVector<Loop *, 2> WorkList(LI.begin(), LI.end()); + while (!WorkList.empty()) { + Loop *L = WorkList.pop_back_val(); + WorkList.insert(WorkList.end(), L->begin(), L->end()); + Loops.push_back(L); + } + + // Now pop each element off of the stack to visit the loops in reverse + // post-order. + for (auto *L : reverse(Loops)) { + PreservedAnalyses PassPA = Pass.run(*L, LAM); + assert(PassPA.preserved(getLoopPassPreservedAnalyses()) && + "Loop passes must preserve all relevant analyses"); + + // We know that the loop pass couldn't have invalidated any other loop's + // analyses (that's the contract of a loop pass), so directly handle the + // loop analysis manager's invalidation here. Also, update the + // preserved analyses to reflect that once invalidated these can again + // be preserved. + PassPA = LAM.invalidate(*L, std::move(PassPA)); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + PA.intersect(std::move(PassPA)); + } + + // By definition we preserve the proxy. This precludes *any* invalidation of + // loop analyses by the proxy, but that's OK because we've taken care to + // invalidate analyses in the loop analysis manager incrementally above. + PA.preserve<LoopAnalysisManagerFunctionProxy>(); + return PA; + } + +private: + LoopPassT Pass; +}; + +/// \brief A function to deduce a loop pass type and wrap it in the templated +/// adaptor. +template <typename LoopPassT> +FunctionToLoopPassAdaptor<LoopPassT> +createFunctionToLoopPassAdaptor(LoopPassT Pass) { + return FunctionToLoopPassAdaptor<LoopPassT>(std::move(Pass)); +} +} + +#endif // LLVM_ANALYSIS_LOOPPASSMANAGER_H diff --git a/include/llvm/Analysis/LoopUnrollAnalyzer.h b/include/llvm/Analysis/LoopUnrollAnalyzer.h new file mode 100644 index 000000000000..80f3e5fdcd43 --- /dev/null +++ b/include/llvm/Analysis/LoopUnrollAnalyzer.h @@ -0,0 +1,95 @@ +//===- llvm/Analysis/LoopUnrollAnalyzer.h - Loop Unroll Analyzer-*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements UnrolledInstAnalyzer class. It's used for predicting +// potential effects that loop unrolling might have, such as enabling constant +// propagation and other optimizations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPUNROLLANALYZER_H +#define LLVM_ANALYSIS_LOOPUNROLLANALYZER_H + +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/InstVisitor.h" + +// This class is used to get an estimate of the optimization effects that we +// could get from complete loop unrolling. It comes from the fact that some +// loads might be replaced with concrete constant values and that could trigger +// a chain of instruction simplifications. +// +// E.g. we might have: +// int a[] = {0, 1, 0}; +// v = 0; +// for (i = 0; i < 3; i ++) +// v += b[i]*a[i]; +// If we completely unroll the loop, we would get: +// v = b[0]*a[0] + b[1]*a[1] + b[2]*a[2] +// Which then will be simplified to: +// v = b[0]* 0 + b[1]* 1 + b[2]* 0 +// And finally: +// v = b[1] +namespace llvm { +class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> { + typedef InstVisitor<UnrolledInstAnalyzer, bool> Base; + friend class InstVisitor<UnrolledInstAnalyzer, bool>; + struct SimplifiedAddress { + Value *Base = nullptr; + ConstantInt *Offset = nullptr; + }; + +public: + UnrolledInstAnalyzer(unsigned Iteration, + DenseMap<Value *, Constant *> &SimplifiedValues, + ScalarEvolution &SE, const Loop *L) + : SimplifiedValues(SimplifiedValues), SE(SE), L(L) { + IterationNumber = SE.getConstant(APInt(64, Iteration)); + } + + // Allow access to the initial visit method. + using Base::visit; + +private: + /// \brief A cache of pointer bases and constant-folded offsets corresponding + /// to GEP (or derived from GEP) instructions. + /// + /// In order to find the base pointer one needs to perform non-trivial + /// traversal of the corresponding SCEV expression, so it's good to have the + /// results saved. + DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses; + + /// \brief SCEV expression corresponding to number of currently simulated + /// iteration. + const SCEV *IterationNumber; + + /// \brief A Value->Constant map for keeping values that we managed to + /// constant-fold on the given iteration. + /// + /// While we walk the loop instructions, we build up and maintain a mapping + /// of simplified values specific to this iteration. The idea is to propagate + /// any special information we have about loads that can be replaced with + /// constants after complete unrolling, and account for likely simplifications + /// post-unrolling. + DenseMap<Value *, Constant *> &SimplifiedValues; + + ScalarEvolution &SE; + const Loop *L; + + bool simplifyInstWithSCEV(Instruction *I); + + bool visitInstruction(Instruction &I) { return simplifyInstWithSCEV(&I); } + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitCastInst(CastInst &I); + bool visitCmpInst(CmpInst &I); + bool visitPHINode(PHINode &PN); +}; +} +#endif diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 493a99a4b11e..140b731c59de 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -32,6 +32,11 @@ class TargetLibraryInfo; class Type; class Value; +enum class ObjSizeMode { + Exact = 0, + Min = 1, + Max = 2 +}; /// \brief Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup @@ -130,8 +135,11 @@ static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { /// underlying object pointed to by Ptr. /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. +/// If Mode is Min or Max the size will be evaluated even if it depends on +/// a condition and corresponding value will be returned (min or max). bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, - const TargetLibraryInfo *TLI, bool RoundToAlign = false); + const TargetLibraryInfo *TLI, bool RoundToAlign = false, + ObjSizeMode Mode = ObjSizeMode::Exact); typedef std::pair<APInt, APInt> SizeOffsetType; @@ -143,6 +151,7 @@ class ObjectSizeOffsetVisitor const DataLayout &DL; const TargetLibraryInfo *TLI; bool RoundToAlign; + ObjSizeMode Mode; unsigned IntTyBits; APInt Zero; SmallPtrSet<Instruction *, 8> SeenInsts; @@ -155,19 +164,20 @@ class ObjectSizeOffsetVisitor public: ObjectSizeOffsetVisitor(const DataLayout &DL, const TargetLibraryInfo *TLI, - LLVMContext &Context, bool RoundToAlign = false); + LLVMContext &Context, bool RoundToAlign = false, + ObjSizeMode Mode = ObjSizeMode::Exact); SizeOffsetType compute(Value *V); - bool knownSize(SizeOffsetType &SizeOffset) { + static bool knownSize(const SizeOffsetType &SizeOffset) { return SizeOffset.first.getBitWidth() > 1; } - bool knownOffset(SizeOffsetType &SizeOffset) { + static bool knownOffset(const SizeOffsetType &SizeOffset) { return SizeOffset.second.getBitWidth() > 1; } - bool bothKnown(SizeOffsetType &SizeOffset) { + static bool bothKnown(const SizeOffsetType &SizeOffset) { return knownSize(SizeOffset) && knownOffset(SizeOffset); } @@ -198,7 +208,7 @@ typedef std::pair<Value*, Value*> SizeOffsetEvalType; class ObjectSizeOffsetEvaluator : public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetEvalType> { - typedef IRBuilder<true, TargetFolder> BuilderTy; + typedef IRBuilder<TargetFolder> BuilderTy; typedef std::pair<WeakVH, WeakVH> WeakEvalType; typedef DenseMap<const Value*, WeakEvalType> CacheMapTy; typedef SmallPtrSet<const Value*, 8> PtrSetTy; diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index daa1ba91c071..b19dabbfc32e 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -15,452 +15,493 @@ #define LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PointerSumType.h" +#include "llvm/ADT/PointerEmbeddedInt.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" namespace llvm { - class Function; - class FunctionPass; - class Instruction; - class CallSite; - class AssumptionCache; - class MemoryDependenceAnalysis; - class PredIteratorCache; - class DominatorTree; - class PHITransAddr; - - /// MemDepResult - A memory dependence query can return one of three different - /// answers, described below. - class MemDepResult { - enum DepType { - /// Invalid - Clients of MemDep never see this. - Invalid = 0, - - /// Clobber - This is a dependence on the specified instruction which - /// clobbers the desired value. The pointer member of the MemDepResult - /// pair holds the instruction that clobbers the memory. For example, - /// this occurs when we see a may-aliased store to the memory location we - /// care about. - /// - /// There are several cases that may be interesting here: - /// 1. Loads are clobbered by may-alias stores. - /// 2. Loads are considered clobbered by partially-aliased loads. The - /// client may choose to analyze deeper into these cases. - Clobber, - - /// Def - This is a dependence on the specified instruction which - /// defines/produces the desired memory location. The pointer member of - /// the MemDepResult pair holds the instruction that defines the memory. - /// Cases of interest: - /// 1. This could be a load or store for dependence queries on - /// load/store. The value loaded or stored is the produced value. - /// Note that the pointer operand may be different than that of the - /// queried pointer due to must aliases and phi translation. Note - /// that the def may not be the same type as the query, the pointers - /// may just be must aliases. - /// 2. For loads and stores, this could be an allocation instruction. In - /// this case, the load is loading an undef value or a store is the - /// first store to (that part of) the allocation. - /// 3. Dependence queries on calls return Def only when they are - /// readonly calls or memory use intrinsics with identical callees - /// and no intervening clobbers. No validation is done that the - /// operands to the calls are the same. - Def, - - /// Other - This marker indicates that the query has no known dependency - /// in the specified block. More detailed state info is encoded in the - /// upper part of the pair (i.e. the Instruction*) - Other - }; - /// If DepType is "Other", the upper part of the pair - /// (i.e. the Instruction* part) is instead used to encode more detailed - /// type information as follows - enum OtherType { - /// NonLocal - This marker indicates that the query has no dependency in - /// the specified block. To find out more, the client should query other - /// predecessor blocks. - NonLocal = 0x4, - /// NonFuncLocal - This marker indicates that the query has no - /// dependency in the specified function. - NonFuncLocal = 0x8, - /// Unknown - This marker indicates that the query dependency - /// is unknown. - Unknown = 0xc - }; - - typedef PointerIntPair<Instruction*, 2, DepType> PairTy; - PairTy Value; - explicit MemDepResult(PairTy V) : Value(V) {} - - public: - MemDepResult() : Value(nullptr, Invalid) {} - - /// get methods: These are static ctor methods for creating various - /// MemDepResult kinds. - static MemDepResult getDef(Instruction *Inst) { - assert(Inst && "Def requires inst"); - return MemDepResult(PairTy(Inst, Def)); - } - static MemDepResult getClobber(Instruction *Inst) { - assert(Inst && "Clobber requires inst"); - return MemDepResult(PairTy(Inst, Clobber)); - } - static MemDepResult getNonLocal() { - return MemDepResult( - PairTy(reinterpret_cast<Instruction*>(NonLocal), Other)); - } - static MemDepResult getNonFuncLocal() { - return MemDepResult( - PairTy(reinterpret_cast<Instruction*>(NonFuncLocal), Other)); - } - static MemDepResult getUnknown() { - return MemDepResult( - PairTy(reinterpret_cast<Instruction*>(Unknown), Other)); - } - - /// isClobber - Return true if this MemDepResult represents a query that is - /// an instruction clobber dependency. - bool isClobber() const { return Value.getInt() == Clobber; } - - /// isDef - Return true if this MemDepResult represents a query that is - /// an instruction definition dependency. - bool isDef() const { return Value.getInt() == Def; } - - /// isNonLocal - Return true if this MemDepResult represents a query that - /// is transparent to the start of the block, but where a non-local hasn't - /// been done. - bool isNonLocal() const { - return Value.getInt() == Other - && Value.getPointer() == reinterpret_cast<Instruction*>(NonLocal); - } - - /// isNonFuncLocal - Return true if this MemDepResult represents a query - /// that is transparent to the start of the function. - bool isNonFuncLocal() const { - return Value.getInt() == Other - && Value.getPointer() == reinterpret_cast<Instruction*>(NonFuncLocal); - } - - /// isUnknown - Return true if this MemDepResult represents a query which - /// cannot and/or will not be computed. - bool isUnknown() const { - return Value.getInt() == Other - && Value.getPointer() == reinterpret_cast<Instruction*>(Unknown); - } - - /// getInst() - If this is a normal dependency, return the instruction that - /// is depended on. Otherwise, return null. - Instruction *getInst() const { - if (Value.getInt() == Other) return nullptr; - return Value.getPointer(); - } - - bool operator==(const MemDepResult &M) const { return Value == M.Value; } - bool operator!=(const MemDepResult &M) const { return Value != M.Value; } - bool operator<(const MemDepResult &M) const { return Value < M.Value; } - bool operator>(const MemDepResult &M) const { return Value > M.Value; } - - private: - friend class MemoryDependenceAnalysis; - /// Dirty - Entries with this marker occur in a LocalDeps map or - /// NonLocalDeps map when the instruction they previously referenced was - /// removed from MemDep. In either case, the entry may include an - /// instruction pointer. If so, the pointer is an instruction in the - /// block where scanning can start from, saving some work. +class Function; +class FunctionPass; +class Instruction; +class CallSite; +class AssumptionCache; +class MemoryDependenceResults; +class PredIteratorCache; +class DominatorTree; +class PHITransAddr; + +/// A memory dependence query can return one of three different answers. +class MemDepResult { + enum DepType { + /// Clients of MemDep never see this. /// - /// In a default-constructed MemDepResult object, the type will be Dirty - /// and the instruction pointer will be null. + /// Entries with this marker occur in a LocalDeps map or NonLocalDeps map + /// when the instruction they previously referenced was removed from + /// MemDep. In either case, the entry may include an instruction pointer. + /// If so, the pointer is an instruction in the block where scanning can + /// start from, saving some work. /// + /// In a default-constructed MemDepResult object, the type will be Invalid + /// and the instruction pointer will be null. + Invalid = 0, - /// isDirty - Return true if this is a MemDepResult in its dirty/invalid. - /// state. - bool isDirty() const { return Value.getInt() == Invalid; } - - static MemDepResult getDirty(Instruction *Inst) { - return MemDepResult(PairTy(Inst, Invalid)); - } + /// This is a dependence on the specified instruction which clobbers the + /// desired value. The pointer member of the MemDepResult pair holds the + /// instruction that clobbers the memory. For example, this occurs when we + /// see a may-aliased store to the memory location we care about. + /// + /// There are several cases that may be interesting here: + /// 1. Loads are clobbered by may-alias stores. + /// 2. Loads are considered clobbered by partially-aliased loads. The + /// client may choose to analyze deeper into these cases. + Clobber, + + /// This is a dependence on the specified instruction which defines or + /// produces the desired memory location. The pointer member of the + /// MemDepResult pair holds the instruction that defines the memory. + /// + /// Cases of interest: + /// 1. This could be a load or store for dependence queries on + /// load/store. The value loaded or stored is the produced value. + /// Note that the pointer operand may be different than that of the + /// queried pointer due to must aliases and phi translation. Note + /// that the def may not be the same type as the query, the pointers + /// may just be must aliases. + /// 2. For loads and stores, this could be an allocation instruction. In + /// this case, the load is loading an undef value or a store is the + /// first store to (that part of) the allocation. + /// 3. Dependence queries on calls return Def only when they are readonly + /// calls or memory use intrinsics with identical callees and no + /// intervening clobbers. No validation is done that the operands to + /// the calls are the same. + Def, + + /// This marker indicates that the query has no known dependency in the + /// specified block. + /// + /// More detailed state info is encoded in the upper part of the pair (i.e. + /// the Instruction*) + Other }; - /// NonLocalDepEntry - This is an entry in the NonLocalDepInfo cache. For - /// each BasicBlock (the BB entry) it keeps a MemDepResult. - class NonLocalDepEntry { - BasicBlock *BB; - MemDepResult Result; + /// If DepType is "Other", the upper part of the sum type is an encoding of + /// the following more detailed type information. + enum OtherType { + /// This marker indicates that the query has no dependency in the specified + /// block. + /// + /// To find out more, the client should query other predecessor blocks. + NonLocal = 1, + /// This marker indicates that the query has no dependency in the specified + /// function. + NonFuncLocal, + /// This marker indicates that the query dependency is unknown. + Unknown + }; - public: - NonLocalDepEntry(BasicBlock *bb, MemDepResult result) + typedef PointerSumType< + DepType, PointerSumTypeMember<Invalid, Instruction *>, + PointerSumTypeMember<Clobber, Instruction *>, + PointerSumTypeMember<Def, Instruction *>, + PointerSumTypeMember<Other, PointerEmbeddedInt<OtherType, 3>>> + ValueTy; + ValueTy Value; + explicit MemDepResult(ValueTy V) : Value(V) {} + +public: + MemDepResult() : Value() {} + + /// get methods: These are static ctor methods for creating various + /// MemDepResult kinds. + static MemDepResult getDef(Instruction *Inst) { + assert(Inst && "Def requires inst"); + return MemDepResult(ValueTy::create<Def>(Inst)); + } + static MemDepResult getClobber(Instruction *Inst) { + assert(Inst && "Clobber requires inst"); + return MemDepResult(ValueTy::create<Clobber>(Inst)); + } + static MemDepResult getNonLocal() { + return MemDepResult(ValueTy::create<Other>(NonLocal)); + } + static MemDepResult getNonFuncLocal() { + return MemDepResult(ValueTy::create<Other>(NonFuncLocal)); + } + static MemDepResult getUnknown() { + return MemDepResult(ValueTy::create<Other>(Unknown)); + } + + /// Tests if this MemDepResult represents a query that is an instruction + /// clobber dependency. + bool isClobber() const { return Value.is<Clobber>(); } + + /// Tests if this MemDepResult represents a query that is an instruction + /// definition dependency. + bool isDef() const { return Value.is<Def>(); } + + /// Tests if this MemDepResult represents a query that is transparent to the + /// start of the block, but where a non-local hasn't been done. + bool isNonLocal() const { + return Value.is<Other>() && Value.cast<Other>() == NonLocal; + } + + /// Tests if this MemDepResult represents a query that is transparent to the + /// start of the function. + bool isNonFuncLocal() const { + return Value.is<Other>() && Value.cast<Other>() == NonFuncLocal; + } + + /// Tests if this MemDepResult represents a query which cannot and/or will + /// not be computed. + bool isUnknown() const { + return Value.is<Other>() && Value.cast<Other>() == Unknown; + } + + /// If this is a normal dependency, returns the instruction that is depended + /// on. Otherwise, returns null. + Instruction *getInst() const { + switch (Value.getTag()) { + case Invalid: + return Value.cast<Invalid>(); + case Clobber: + return Value.cast<Clobber>(); + case Def: + return Value.cast<Def>(); + case Other: + return nullptr; + } + llvm_unreachable("Unknown discriminant!"); + } + + bool operator==(const MemDepResult &M) const { return Value == M.Value; } + bool operator!=(const MemDepResult &M) const { return Value != M.Value; } + bool operator<(const MemDepResult &M) const { return Value < M.Value; } + bool operator>(const MemDepResult &M) const { return Value > M.Value; } + +private: + friend class MemoryDependenceResults; + + /// Tests if this is a MemDepResult in its dirty/invalid. state. + bool isDirty() const { return Value.is<Invalid>(); } + + static MemDepResult getDirty(Instruction *Inst) { + return MemDepResult(ValueTy::create<Invalid>(Inst)); + } +}; + +/// This is an entry in the NonLocalDepInfo cache. +/// +/// For each BasicBlock (the BB entry) it keeps a MemDepResult. +class NonLocalDepEntry { + BasicBlock *BB; + MemDepResult Result; + +public: + NonLocalDepEntry(BasicBlock *bb, MemDepResult result) : BB(bb), Result(result) {} - // This is used for searches. - NonLocalDepEntry(BasicBlock *bb) : BB(bb) {} + // This is used for searches. + NonLocalDepEntry(BasicBlock *bb) : BB(bb) {} - // BB is the sort key, it can't be changed. - BasicBlock *getBB() const { return BB; } + // BB is the sort key, it can't be changed. + BasicBlock *getBB() const { return BB; } - void setResult(const MemDepResult &R) { Result = R; } + void setResult(const MemDepResult &R) { Result = R; } - const MemDepResult &getResult() const { return Result; } + const MemDepResult &getResult() const { return Result; } - bool operator<(const NonLocalDepEntry &RHS) const { - return BB < RHS.BB; - } - }; + bool operator<(const NonLocalDepEntry &RHS) const { return BB < RHS.BB; } +}; - /// NonLocalDepResult - This is a result from a NonLocal dependence query. - /// For each BasicBlock (the BB entry) it keeps a MemDepResult and the - /// (potentially phi translated) address that was live in the block. - class NonLocalDepResult { - NonLocalDepEntry Entry; - Value *Address; +/// This is a result from a NonLocal dependence query. +/// +/// For each BasicBlock (the BB entry) it keeps a MemDepResult and the +/// (potentially phi translated) address that was live in the block. +class NonLocalDepResult { + NonLocalDepEntry Entry; + Value *Address; - public: - NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address) +public: + NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address) : Entry(bb, result), Address(address) {} - // BB is the sort key, it can't be changed. - BasicBlock *getBB() const { return Entry.getBB(); } + // BB is the sort key, it can't be changed. + BasicBlock *getBB() const { return Entry.getBB(); } - void setResult(const MemDepResult &R, Value *Addr) { - Entry.setResult(R); - Address = Addr; - } + void setResult(const MemDepResult &R, Value *Addr) { + Entry.setResult(R); + Address = Addr; + } - const MemDepResult &getResult() const { return Entry.getResult(); } + const MemDepResult &getResult() const { return Entry.getResult(); } - /// getAddress - Return the address of this pointer in this block. This can - /// be different than the address queried for the non-local result because - /// of phi translation. This returns null if the address was not available - /// in a block (i.e. because phi translation failed) or if this is a cached - /// result and that address was deleted. + /// Returns the address of this pointer in this block. + /// + /// This can be different than the address queried for the non-local result + /// because of phi translation. This returns null if the address was not + /// available in a block (i.e. because phi translation failed) or if this is + /// a cached result and that address was deleted. + /// + /// The address is always null for a non-local 'call' dependence. + Value *getAddress() const { return Address; } +}; + +/// Provides a lazy, caching interface for making common memory aliasing +/// information queries, backed by LLVM's alias analysis passes. +/// +/// The dependency information returned is somewhat unusual, but is pragmatic. +/// If queried about a store or call that might modify memory, the analysis +/// will return the instruction[s] that may either load from that memory or +/// store to it. If queried with a load or call that can never modify memory, +/// the analysis will return calls and stores that might modify the pointer, +/// but generally does not return loads unless a) they are volatile, or +/// b) they load from *must-aliased* pointers. Returning a dependence on +/// must-alias'd pointers instead of all pointers interacts well with the +/// internal caching mechanism. +class MemoryDependenceResults { + // A map from instructions to their dependency. + typedef DenseMap<Instruction *, MemDepResult> LocalDepMapType; + LocalDepMapType LocalDeps; + +public: + typedef std::vector<NonLocalDepEntry> NonLocalDepInfo; + +private: + /// A pair<Value*, bool> where the bool is true if the dependence is a read + /// only dependence, false if read/write. + typedef PointerIntPair<const Value *, 1, bool> ValueIsLoadPair; + + /// This pair is used when caching information for a block. + /// + /// If the pointer is null, the cache value is not a full query that starts + /// at the specified block. If non-null, the bool indicates whether or not + /// the contents of the block was skipped. + typedef PointerIntPair<BasicBlock *, 1, bool> BBSkipFirstBlockPair; + + /// This record is the information kept for each (value, is load) pair. + struct NonLocalPointerInfo { + /// The pair of the block and the skip-first-block flag. + BBSkipFirstBlockPair Pair; + /// The results of the query for each relevant block. + NonLocalDepInfo NonLocalDeps; + /// The maximum size of the dereferences of the pointer. + /// + /// May be UnknownSize if the sizes are unknown. + uint64_t Size; + /// The AA tags associated with dereferences of the pointer. /// - /// The address is always null for a non-local 'call' dependence. - Value *getAddress() const { return Address; } + /// The members may be null if there are no tags or conflicting tags. + AAMDNodes AATags; + + NonLocalPointerInfo() : Size(MemoryLocation::UnknownSize) {} }; - /// MemoryDependenceAnalysis - This is an analysis that determines, for a - /// given memory operation, what preceding memory operations it depends on. - /// It builds on alias analysis information, and tries to provide a lazy, - /// caching interface to a common kind of alias information query. + /// This map stores the cached results of doing a pointer lookup at the + /// bottom of a block. /// - /// The dependency information returned is somewhat unusual, but is pragmatic. - /// If queried about a store or call that might modify memory, the analysis - /// will return the instruction[s] that may either load from that memory or - /// store to it. If queried with a load or call that can never modify memory, - /// the analysis will return calls and stores that might modify the pointer, - /// but generally does not return loads unless a) they are volatile, or - /// b) they load from *must-aliased* pointers. Returning a dependence on - /// must-alias'd pointers instead of all pointers interacts well with the - /// internal caching mechanism. + /// The key of this map is the pointer+isload bit, the value is a list of + /// <bb->result> mappings. + typedef DenseMap<ValueIsLoadPair, NonLocalPointerInfo> + CachedNonLocalPointerInfo; + CachedNonLocalPointerInfo NonLocalPointerDeps; + + // A map from instructions to their non-local pointer dependencies. + typedef DenseMap<Instruction *, SmallPtrSet<ValueIsLoadPair, 4>> + ReverseNonLocalPtrDepTy; + ReverseNonLocalPtrDepTy ReverseNonLocalPtrDeps; + + /// This is the instruction we keep for each cached access that we have for + /// an instruction. /// - class MemoryDependenceAnalysis : public FunctionPass { - // A map from instructions to their dependency. - typedef DenseMap<Instruction*, MemDepResult> LocalDepMapType; - LocalDepMapType LocalDeps; - - public: - typedef std::vector<NonLocalDepEntry> NonLocalDepInfo; - - private: - /// ValueIsLoadPair - This is a pair<Value*, bool> where the bool is true if - /// the dependence is a read only dependence, false if read/write. - typedef PointerIntPair<const Value*, 1, bool> ValueIsLoadPair; - - /// BBSkipFirstBlockPair - This pair is used when caching information for a - /// block. If the pointer is null, the cache value is not a full query that - /// starts at the specified block. If non-null, the bool indicates whether - /// or not the contents of the block was skipped. - typedef PointerIntPair<BasicBlock*, 1, bool> BBSkipFirstBlockPair; - - /// NonLocalPointerInfo - This record is the information kept for each - /// (value, is load) pair. - struct NonLocalPointerInfo { - /// Pair - The pair of the block and the skip-first-block flag. - BBSkipFirstBlockPair Pair; - /// NonLocalDeps - The results of the query for each relevant block. - NonLocalDepInfo NonLocalDeps; - /// Size - The maximum size of the dereferences of the - /// pointer. May be UnknownSize if the sizes are unknown. - uint64_t Size; - /// AATags - The AA tags associated with dereferences of the - /// pointer. The members may be null if there are no tags or - /// conflicting tags. - AAMDNodes AATags; - - NonLocalPointerInfo() : Size(MemoryLocation::UnknownSize) {} - }; - - /// CachedNonLocalPointerInfo - This map stores the cached results of doing - /// a pointer lookup at the bottom of a block. The key of this map is the - /// pointer+isload bit, the value is a list of <bb->result> mappings. - typedef DenseMap<ValueIsLoadPair, - NonLocalPointerInfo> CachedNonLocalPointerInfo; - CachedNonLocalPointerInfo NonLocalPointerDeps; - - // A map from instructions to their non-local pointer dependencies. - typedef DenseMap<Instruction*, - SmallPtrSet<ValueIsLoadPair, 4> > ReverseNonLocalPtrDepTy; - ReverseNonLocalPtrDepTy ReverseNonLocalPtrDeps; - - /// PerInstNLInfo - This is the instruction we keep for each cached access - /// that we have for an instruction. The pointer is an owning pointer and - /// the bool indicates whether we have any dirty bits in the set. - typedef std::pair<NonLocalDepInfo, bool> PerInstNLInfo; - - // A map from instructions to their non-local dependencies. - typedef DenseMap<Instruction*, PerInstNLInfo> NonLocalDepMapType; - - NonLocalDepMapType NonLocalDeps; - - // A reverse mapping from dependencies to the dependees. This is - // used when removing instructions to keep the cache coherent. - typedef DenseMap<Instruction*, - SmallPtrSet<Instruction*, 4> > ReverseDepMapType; - ReverseDepMapType ReverseLocalDeps; - - // A reverse mapping from dependencies to the non-local dependees. - ReverseDepMapType ReverseNonLocalDeps; - - /// Current AA implementation, just a cache. - AliasAnalysis *AA; - DominatorTree *DT; - AssumptionCache *AC; - const TargetLibraryInfo *TLI; - PredIteratorCache PredCache; - - public: - MemoryDependenceAnalysis(); - ~MemoryDependenceAnalysis() override; - static char ID; - - /// Pass Implementation stuff. This doesn't do any analysis eagerly. - bool runOnFunction(Function &) override; - - /// Clean up memory in between runs - void releaseMemory() override; - - /// getAnalysisUsage - Does not modify anything. It uses Value Numbering - /// and Alias Analysis. - /// - void getAnalysisUsage(AnalysisUsage &AU) const override; - - /// getDependency - Return the instruction on which a memory operation - /// depends. See the class comment for more details. It is illegal to call - /// this on non-memory instructions. - MemDepResult getDependency(Instruction *QueryInst); + /// The pointer is an owning pointer and the bool indicates whether we have + /// any dirty bits in the set. + typedef std::pair<NonLocalDepInfo, bool> PerInstNLInfo; + + // A map from instructions to their non-local dependencies. + typedef DenseMap<Instruction *, PerInstNLInfo> NonLocalDepMapType; + + NonLocalDepMapType NonLocalDeps; + + // A reverse mapping from dependencies to the dependees. This is + // used when removing instructions to keep the cache coherent. + typedef DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> + ReverseDepMapType; + ReverseDepMapType ReverseLocalDeps; + + // A reverse mapping from dependencies to the non-local dependees. + ReverseDepMapType ReverseNonLocalDeps; + + /// Current AA implementation, just a cache. + AliasAnalysis &AA; + AssumptionCache &AC; + const TargetLibraryInfo &TLI; + DominatorTree &DT; + PredIteratorCache PredCache; + +public: + MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, + const TargetLibraryInfo &TLI, + DominatorTree &DT) + : AA(AA), AC(AC), TLI(TLI), DT(DT) {} + + /// Returns the instruction on which a memory operation depends. + /// + /// See the class comment for more details. It is illegal to call this on + /// non-memory instructions. + MemDepResult getDependency(Instruction *QueryInst); - /// getNonLocalCallDependency - Perform a full dependency query for the - /// specified call, returning the set of blocks that the value is - /// potentially live across. The returned set of results will include a - /// "NonLocal" result for all blocks where the value is live across. - /// - /// This method assumes the instruction returns a "NonLocal" dependency - /// within its own block. - /// - /// This returns a reference to an internal data structure that may be - /// invalidated on the next non-local query or when an instruction is - /// removed. Clients must copy this data if they want it around longer than - /// that. - const NonLocalDepInfo &getNonLocalCallDependency(CallSite QueryCS); - - /// getNonLocalPointerDependency - Perform a full dependency query for an - /// access to the QueryInst's specified memory location, returning the set - /// of instructions that either define or clobber the value. - /// - /// Warning: For a volatile query instruction, the dependencies will be - /// accurate, and thus usable for reordering, but it is never legal to - /// remove the query instruction. - /// - /// This method assumes the pointer has a "NonLocal" dependency within - /// QueryInst's parent basic block. - void getNonLocalPointerDependency(Instruction *QueryInst, + /// Perform a full dependency query for the specified call, returning the set + /// of blocks that the value is potentially live across. + /// + /// The returned set of results will include a "NonLocal" result for all + /// blocks where the value is live across. + /// + /// This method assumes the instruction returns a "NonLocal" dependency + /// within its own block. + /// + /// This returns a reference to an internal data structure that may be + /// invalidated on the next non-local query or when an instruction is + /// removed. Clients must copy this data if they want it around longer than + /// that. + const NonLocalDepInfo &getNonLocalCallDependency(CallSite QueryCS); + + /// Perform a full dependency query for an access to the QueryInst's + /// specified memory location, returning the set of instructions that either + /// define or clobber the value. + /// + /// Warning: For a volatile query instruction, the dependencies will be + /// accurate, and thus usable for reordering, but it is never legal to + /// remove the query instruction. + /// + /// This method assumes the pointer has a "NonLocal" dependency within + /// QueryInst's parent basic block. + void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result); - /// removeInstruction - Remove an instruction from the dependence analysis, - /// updating the dependence of instructions that previously depended on it. - void removeInstruction(Instruction *InstToRemove); - - /// invalidateCachedPointerInfo - This method is used to invalidate cached - /// information about the specified pointer, because it may be too - /// conservative in memdep. This is an optional call that can be used when - /// the client detects an equivalence between the pointer and some other - /// value and replaces the other value with ptr. This can make Ptr available - /// in more places that cached info does not necessarily keep. - void invalidateCachedPointerInfo(Value *Ptr); - - /// invalidateCachedPredecessors - Clear the PredIteratorCache info. - /// This needs to be done when the CFG changes, e.g., due to splitting - /// critical edges. - void invalidateCachedPredecessors(); - - /// \brief Return the instruction on which a memory location depends. - /// If isLoad is true, this routine ignores may-aliases with read-only - /// operations. If isLoad is false, this routine ignores may-aliases - /// with reads from read-only locations. If possible, pass the query - /// instruction as well; this function may take advantage of the metadata - /// annotated to the query instruction to refine the result. - /// - /// Note that this is an uncached query, and thus may be inefficient. - /// - MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, - bool isLoad, - BasicBlock::iterator ScanIt, - BasicBlock *BB, - Instruction *QueryInst = nullptr); - - MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, - bool isLoad, - BasicBlock::iterator ScanIt, - BasicBlock *BB, - Instruction *QueryInst); - - /// This analysis looks for other loads and stores with invariant.group - /// metadata and the same pointer operand. Returns Unknown if it does not - /// find anything, and Def if it can be assumed that 2 instructions load or - /// store the same value. - /// FIXME: This analysis works only on single block because of restrictions - /// at the call site. - MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, - BasicBlock *BB); - - /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that - /// looks at a memory location for a load (specified by MemLocBase, Offs, - /// and Size) and compares it against a load. If the specified load could - /// be safely widened to a larger integer load that is 1) still efficient, - /// 2) safe for the target, and 3) would provide the specified memory - /// location value, then this function returns the size in bytes of the - /// load width to use. If not, this returns zero. - static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, - int64_t MemLocOffs, - unsigned MemLocSize, - const LoadInst *LI); - - private: - MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall, - BasicBlock::iterator ScanIt, - BasicBlock *BB); - bool getNonLocalPointerDepFromBB(Instruction *QueryInst, - const PHITransAddr &Pointer, - const MemoryLocation &Loc, bool isLoad, - BasicBlock *BB, - SmallVectorImpl<NonLocalDepResult> &Result, - DenseMap<BasicBlock *, Value *> &Visited, - bool SkipFirstBlock = false); - MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst, - const MemoryLocation &Loc, bool isLoad, - BasicBlock *BB, NonLocalDepInfo *Cache, - unsigned NumSortedEntries); - - void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P); - - /// verifyRemoved - Verify that the specified instruction does not occur - /// in our internal data structures. - void verifyRemoved(Instruction *Inst) const; - }; + /// Removes an instruction from the dependence analysis, updating the + /// dependence of instructions that previously depended on it. + void removeInstruction(Instruction *InstToRemove); + + /// Invalidates cached information about the specified pointer, because it + /// may be too conservative in memdep. + /// + /// This is an optional call that can be used when the client detects an + /// equivalence between the pointer and some other value and replaces the + /// other value with ptr. This can make Ptr available in more places that + /// cached info does not necessarily keep. + void invalidateCachedPointerInfo(Value *Ptr); + + /// Clears the PredIteratorCache info. + /// + /// This needs to be done when the CFG changes, e.g., due to splitting + /// critical edges. + void invalidateCachedPredecessors(); + + /// Returns the instruction on which a memory location depends. + /// + /// If isLoad is true, this routine ignores may-aliases with read-only + /// operations. If isLoad is false, this routine ignores may-aliases + /// with reads from read-only locations. If possible, pass the query + /// instruction as well; this function may take advantage of the metadata + /// annotated to the query instruction to refine the result. + /// + /// Note that this is an uncached query, and thus may be inefficient. + MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, + BasicBlock::iterator ScanIt, + BasicBlock *BB, + Instruction *QueryInst = nullptr); + + MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, + bool isLoad, + BasicBlock::iterator ScanIt, + BasicBlock *BB, + Instruction *QueryInst); + + /// This analysis looks for other loads and stores with invariant.group + /// metadata and the same pointer operand. Returns Unknown if it does not + /// find anything, and Def if it can be assumed that 2 instructions load or + /// store the same value. + /// FIXME: This analysis works only on single block because of restrictions + /// at the call site. + MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB); + + /// Looks at a memory location for a load (specified by MemLocBase, Offs, and + /// Size) and compares it against a load. + /// + /// If the specified load could be safely widened to a larger integer load + /// that is 1) still efficient, 2) safe for the target, and 3) would provide + /// the specified memory location value, then this function returns the size + /// in bytes of the load width to use. If not, this returns zero. + static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, + int64_t MemLocOffs, + unsigned MemLocSize, + const LoadInst *LI); + + /// Release memory in caches. + void releaseMemory(); + +private: + MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall, + BasicBlock::iterator ScanIt, + BasicBlock *BB); + bool getNonLocalPointerDepFromBB(Instruction *QueryInst, + const PHITransAddr &Pointer, + const MemoryLocation &Loc, bool isLoad, + BasicBlock *BB, + SmallVectorImpl<NonLocalDepResult> &Result, + DenseMap<BasicBlock *, Value *> &Visited, + bool SkipFirstBlock = false); + MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst, + const MemoryLocation &Loc, bool isLoad, + BasicBlock *BB, NonLocalDepInfo *Cache, + unsigned NumSortedEntries); + + void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P); + + void verifyRemoved(Instruction *Inst) const; +}; + +/// An analysis that produces \c MemoryDependenceResults for a function. +/// +/// This is essentially a no-op because the results are computed entirely +/// lazily. +class MemoryDependenceAnalysis + : public AnalysisInfoMixin<MemoryDependenceAnalysis> { + friend AnalysisInfoMixin<MemoryDependenceAnalysis>; + static char PassID; + +public: + typedef MemoryDependenceResults Result; + + MemoryDependenceResults run(Function &F, AnalysisManager<Function> &AM); +}; + +/// A wrapper analysis pass for the legacy pass manager that exposes a \c +/// MemoryDepnedenceResults instance. +class MemoryDependenceWrapperPass : public FunctionPass { + Optional<MemoryDependenceResults> MemDep; +public: + MemoryDependenceWrapperPass(); + ~MemoryDependenceWrapperPass() override; + static char ID; + + /// Pass Implementation stuff. This doesn't do any analysis eagerly. + bool runOnFunction(Function &) override; + + /// Clean up memory in between runs + void releaseMemory() override; + + /// Does not modify anything. It uses Value Numbering and Alias Analysis. + void getAnalysisUsage(AnalysisUsage &AU) const override; + + MemoryDependenceResults &getMemDep() { return *MemDep; } +}; } // End llvm namespace diff --git a/include/llvm/Analysis/MemoryLocation.h b/include/llvm/Analysis/MemoryLocation.h index 426b49a3ecd7..f2cb2a123f2e 100644 --- a/include/llvm/Analysis/MemoryLocation.h +++ b/include/llvm/Analysis/MemoryLocation.h @@ -16,7 +16,7 @@ #ifndef LLVM_ANALYSIS_MEMORYLOCATION_H #define LLVM_ANALYSIS_MEMORYLOCATION_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Metadata.h" diff --git a/include/llvm/Analysis/ModuleSummaryAnalysis.h b/include/llvm/Analysis/ModuleSummaryAnalysis.h new file mode 100644 index 000000000000..9f03610ba5b1 --- /dev/null +++ b/include/llvm/Analysis/ModuleSummaryAnalysis.h @@ -0,0 +1,91 @@ +//===- ModuleSummaryAnalysis.h - Module summary index builder ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface to build a ModuleSummaryIndex for a module. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MODULESUMMARYANALYSIS_H +#define LLVM_ANALYSIS_MODULESUMMARYANALYSIS_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/Pass.h" + +namespace llvm { + +class BlockFrequencyInfo; + +/// Class to build a module summary index for the given Module, possibly from +/// a Pass. +class ModuleSummaryIndexBuilder { + /// The index being built + std::unique_ptr<ModuleSummaryIndex> Index; + /// The module for which we are building an index + const Module *M; + +public: + /// Default constructor + ModuleSummaryIndexBuilder() = default; + + /// Constructor that builds an index for the given Module. An optional + /// callback can be supplied to obtain the frequency info for a function. + ModuleSummaryIndexBuilder( + const Module *M, + std::function<BlockFrequencyInfo *(const Function &F)> Ftor = nullptr); + + /// Get a reference to the index owned by builder + ModuleSummaryIndex &getIndex() const { return *Index; } + + /// Take ownership of the built index + std::unique_ptr<ModuleSummaryIndex> takeIndex() { return std::move(Index); } + +private: + /// Compute summary for given function with optional frequency information + void computeFunctionSummary(const Function &F, + BlockFrequencyInfo *BFI = nullptr); + + /// Compute summary for given variable with optional frequency information + void computeVariableSummary(const GlobalVariable &V); +}; + +/// Legacy wrapper pass to provide the ModuleSummaryIndex object. +class ModuleSummaryIndexWrapperPass : public ModulePass { + std::unique_ptr<ModuleSummaryIndexBuilder> IndexBuilder; + +public: + static char ID; + + ModuleSummaryIndexWrapperPass(); + + /// Get the index built by pass + ModuleSummaryIndex &getIndex() { return IndexBuilder->getIndex(); } + const ModuleSummaryIndex &getIndex() const { + return IndexBuilder->getIndex(); + } + + bool runOnModule(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +//===--------------------------------------------------------------------===// +// +// createModuleSummaryIndexWrapperPass - This pass builds a ModuleSummaryIndex +// object for the module, to be written to bitcode or LLVM assembly. +// +ModulePass *createModuleSummaryIndexWrapperPass(); + +/// Returns true if \p M is eligible for ThinLTO promotion. +/// +/// Currently we check if it has any any InlineASM that uses an internal symbol. +bool moduleCanBeRenamedForThinLTO(const Module &M); +} + +#endif diff --git a/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/include/llvm/Analysis/ObjCARCAliasAnalysis.h index ac01154bac6c..067a964bcce1 100644 --- a/include/llvm/Analysis/ObjCARCAliasAnalysis.h +++ b/include/llvm/Analysis/ObjCARCAliasAnalysis.h @@ -24,7 +24,6 @@ #define LLVM_ANALYSIS_OBJCARCALIASANALYSIS_H #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Pass.h" namespace llvm { @@ -42,8 +41,7 @@ class ObjCARCAAResult : public AAResultBase<ObjCARCAAResult> { const DataLayout &DL; public: - explicit ObjCARCAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI) - : AAResultBase(TLI), DL(DL) {} + explicit ObjCARCAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} ObjCARCAAResult(ObjCARCAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL) {} @@ -63,20 +61,14 @@ public: }; /// Analysis pass providing a never-invalidated alias analysis result. -class ObjCARCAA { +class ObjCARCAA : public AnalysisInfoMixin<ObjCARCAA> { + friend AnalysisInfoMixin<ObjCARCAA>; + static char PassID; + public: typedef ObjCARCAAResult Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - ObjCARCAAResult run(Function &F, AnalysisManager<Function> *AM); - - /// \brief Provide access to a name for this pass for debugging purposes. - static StringRef name() { return "ObjCARCAA"; } - -private: - static char PassID; + ObjCARCAAResult run(Function &F, AnalysisManager<Function> &AM); }; /// Legacy wrapper pass to provide the ObjCARCAAResult object. diff --git a/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/include/llvm/Analysis/ObjCARCAnalysisUtils.h index 29d99c9d316d..5f4d8ecbbfbb 100644 --- a/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -54,6 +54,7 @@ inline bool ModuleHasARC(const Module &M) { M.getNamedValue("objc_release") || M.getNamedValue("objc_autorelease") || M.getNamedValue("objc_retainAutoreleasedReturnValue") || + M.getNamedValue("objc_unsafeClaimAutoreleasedReturnValue") || M.getNamedValue("objc_retainBlock") || M.getNamedValue("objc_autoreleaseReturnValue") || M.getNamedValue("objc_autoreleasePoolPush") || diff --git a/include/llvm/Analysis/ObjCARCInstKind.h b/include/llvm/Analysis/ObjCARCInstKind.h index 13efb4b160be..3b37ddf78f58 100644 --- a/include/llvm/Analysis/ObjCARCInstKind.h +++ b/include/llvm/Analysis/ObjCARCInstKind.h @@ -30,6 +30,7 @@ namespace objcarc { enum class ARCInstKind { Retain, ///< objc_retain RetainRV, ///< objc_retainAutoreleasedReturnValue + ClaimRV, ///< objc_unsafeClaimAutoreleasedReturnValue RetainBlock, ///< objc_retainBlock Release, ///< objc_release Autorelease, ///< objc_autorelease diff --git a/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/include/llvm/Analysis/OptimizationDiagnosticInfo.h new file mode 100644 index 000000000000..b455a6527bf6 --- /dev/null +++ b/include/llvm/Analysis/OptimizationDiagnosticInfo.h @@ -0,0 +1,104 @@ +//===- OptimizationDiagnosticInfo.h - Optimization Diagnostic ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Optimization diagnostic interfaces. It's packaged as an analysis pass so +// that by using this service passes become dependent on BFI as well. BFI is +// used to compute the "hotness" of the diagnostic message. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H +#define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H + +#include "llvm/ADT/Optional.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { +class BlockFrequencyInfo; +class DebugLoc; +class Function; +class LLVMContext; +class Loop; +class Pass; +class Twine; +class Value; + +class OptimizationRemarkEmitter { +public: + OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI) + : F(F), BFI(BFI) {} + + OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg) + : F(Arg.F), BFI(Arg.BFI) {} + + OptimizationRemarkEmitter &operator=(OptimizationRemarkEmitter &&RHS) { + F = RHS.F; + BFI = RHS.BFI; + return *this; + } + + /// Emit an optimization-missed message. + /// + /// \p PassName is the name of the pass emitting the message. If + /// -Rpass-missed= is given and the name matches the regular expression in + /// -Rpass, then the remark will be emitted. \p Fn is the function triggering + /// the remark, \p DLoc is the debug location where the diagnostic is + /// generated. \p V is the IR Value that identifies the code region. \p Msg is + /// the message string to use. + void emitOptimizationRemarkMissed(const char *PassName, const DebugLoc &DLoc, + Value *V, const Twine &Msg); + + /// \brief Same as above but derives the IR Value for the code region and the + /// debug location from the Loop parameter \p L. + void emitOptimizationRemarkMissed(const char *PassName, Loop *L, + const Twine &Msg); + +private: + Function *F; + + BlockFrequencyInfo *BFI; + + Optional<uint64_t> computeHotness(Value *V); + + OptimizationRemarkEmitter(const OptimizationRemarkEmitter &) = delete; + void operator=(const OptimizationRemarkEmitter &) = delete; +}; + +class OptimizationRemarkEmitterWrapperPass : public FunctionPass { + std::unique_ptr<OptimizationRemarkEmitter> ORE; + +public: + OptimizationRemarkEmitterWrapperPass(); + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + OptimizationRemarkEmitter &getORE() { + assert(ORE && "pass not run yet"); + return *ORE; + } + + static char ID; +}; + +class OptimizationRemarkEmitterAnalysis + : public AnalysisInfoMixin<OptimizationRemarkEmitterAnalysis> { + friend AnalysisInfoMixin<OptimizationRemarkEmitterAnalysis>; + static char PassID; + +public: + /// \brief Provide the result typedef for this analysis pass. + typedef OptimizationRemarkEmitter Result; + + /// \brief Run the analysis pass over a function and produce BFI. + Result run(Function &F, AnalysisManager<Function> &AM); +}; +} +#endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index da17457d3446..6d8f14fa32f9 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -25,13 +25,6 @@ namespace llvm { //===--------------------------------------------------------------------===// // - // createAAEvalPass - This pass implements a simple N^2 alias analysis - // accuracy evaluator. - // - FunctionPass *createAAEvalPass(); - - //===--------------------------------------------------------------------===// - // // createObjCARCAAWrapperPass - This pass implements ObjC-ARC-based // alias analysis. // @@ -47,10 +40,10 @@ namespace llvm { //===--------------------------------------------------------------------===// // - // createDependenceAnalysisPass - This creates an instance of the - // DependenceAnalysis pass. + // createDependenceAnalysisWrapperPass - This creates an instance of the + // DependenceAnalysisWrapper pass. // - FunctionPass *createDependenceAnalysisPass(); + FunctionPass *createDependenceAnalysisWrapperPass(); //===--------------------------------------------------------------------===// // diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h index 0f7e2b88d2d7..99240a40408e 100644 --- a/include/llvm/Analysis/PostDominators.h +++ b/include/llvm/Analysis/PostDominators.h @@ -15,78 +15,71 @@ #define LLVM_ANALYSIS_POSTDOMINATORS_H #include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManager.h" namespace llvm { /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used to /// compute the post-dominator tree. /// -struct PostDominatorTree : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - DominatorTreeBase<BasicBlock>* DT; - - PostDominatorTree() : FunctionPass(ID) { - initializePostDominatorTreePass(*PassRegistry::getPassRegistry()); - DT = new DominatorTreeBase<BasicBlock>(true); - } +struct PostDominatorTree : public DominatorTreeBase<BasicBlock> { + typedef DominatorTreeBase<BasicBlock> Base; - ~PostDominatorTree() override; + PostDominatorTree() : DominatorTreeBase<BasicBlock>(true) {} - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } + PostDominatorTree(PostDominatorTree &&Arg) + : Base(std::move(static_cast<Base &>(Arg))) {} - inline const std::vector<BasicBlock*> &getRoots() const { - return DT->getRoots(); + PostDominatorTree &operator=(PostDominatorTree &&RHS) { + Base::operator=(std::move(static_cast<Base &>(RHS))); + return *this; } +}; - inline DomTreeNode *getRootNode() const { - return DT->getRootNode(); - } +/// \brief Analysis pass which computes a \c PostDominatorTree. +class PostDominatorTreeAnalysis + : public AnalysisInfoMixin<PostDominatorTreeAnalysis> { + friend AnalysisInfoMixin<PostDominatorTreeAnalysis>; + static char PassID; - inline DomTreeNode *operator[](BasicBlock *BB) const { - return DT->getNode(BB); - } +public: + /// \brief Provide the result typedef for this analysis pass. + typedef PostDominatorTree Result; - inline DomTreeNode *getNode(BasicBlock *BB) const { - return DT->getNode(BB); - } + /// \brief Run the analysis pass over a function and produce a post dominator + /// tree. + PostDominatorTree run(Function &F, FunctionAnalysisManager &); +}; - inline bool dominates(DomTreeNode* A, DomTreeNode* B) const { - return DT->dominates(A, B); - } +/// \brief Printer pass for the \c PostDominatorTree. +class PostDominatorTreePrinterPass + : public PassInfoMixin<PostDominatorTreePrinterPass> { + raw_ostream &OS; - inline bool dominates(const BasicBlock* A, const BasicBlock* B) const { - return DT->dominates(A, B); - } +public: + explicit PostDominatorTreePrinterPass(raw_ostream &OS); + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); +}; - inline bool properlyDominates(const DomTreeNode* A, DomTreeNode* B) const { - return DT->properlyDominates(A, B); - } +struct PostDominatorTreeWrapperPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + PostDominatorTree DT; - inline bool properlyDominates(BasicBlock* A, BasicBlock* B) const { - return DT->properlyDominates(A, B); + PostDominatorTreeWrapperPass() : FunctionPass(ID) { + initializePostDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); } - inline BasicBlock *findNearestCommonDominator(BasicBlock *A, BasicBlock *B) { - return DT->findNearestCommonDominator(A, B); - } + PostDominatorTree &getPostDomTree() { return DT; } + const PostDominatorTree &getPostDomTree() const { return DT; } - inline const BasicBlock *findNearestCommonDominator(const BasicBlock *A, - const BasicBlock *B) { - return DT->findNearestCommonDominator(A, B); - } + bool runOnFunction(Function &F) override; - /// Get all nodes post-dominated by R, including R itself. - void getDescendants(BasicBlock *R, - SmallVectorImpl<BasicBlock *> &Result) const { - DT->getDescendants(R, Result); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); } void releaseMemory() override { - DT->releaseMemory(); + DT.releaseMemory(); } void print(raw_ostream &OS, const Module*) const override; diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h new file mode 100644 index 000000000000..cd624c8404da --- /dev/null +++ b/include/llvm/Analysis/ProfileSummaryInfo.h @@ -0,0 +1,113 @@ +//===- llvm/Analysis/ProfileSummaryInfo.h - profile summary ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that provides access to profile summary +// information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PROFILE_SUMMARY_INFO_H +#define LLVM_ANALYSIS_PROFILE_SUMMARY_INFO_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include <memory> + +namespace llvm { +class ProfileSummary; +/// \brief Analysis providing profile information. +/// +/// This is an immutable analysis pass that provides ability to query global +/// (program-level) profile information. The main APIs are isHotCount and +/// isColdCount that tells whether a given profile count is considered hot/cold +/// based on the profile summary. This also provides convenience methods to +/// check whether a function is hot or cold. + +// FIXME: Provide convenience methods to determine hotness/coldness of other IR +// units. This would require making this depend on BFI. +class ProfileSummaryInfo { +private: + Module &M; + std::unique_ptr<ProfileSummary> Summary; + void computeSummary(); + void computeThresholds(); + // Count thresholds to answer isHotCount and isColdCount queries. + Optional<uint64_t> HotCountThreshold, ColdCountThreshold; + +public: + ProfileSummaryInfo(Module &M) : M(M) {} + ProfileSummaryInfo(ProfileSummaryInfo &&Arg) + : M(Arg.M), Summary(std::move(Arg.Summary)) {} + /// \brief Returns true if \p F is a hot function. + bool isHotFunction(const Function *F); + /// \brief Returns true if \p F is a cold function. + bool isColdFunction(const Function *F); + /// \brief Returns true if count \p C is considered hot. + bool isHotCount(uint64_t C); + /// \brief Returns true if count \p C is considered cold. + bool isColdCount(uint64_t C); +}; + +/// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo. +class ProfileSummaryInfoWrapperPass : public ImmutablePass { + std::unique_ptr<ProfileSummaryInfo> PSI; + +public: + static char ID; + ProfileSummaryInfoWrapperPass(); + + ProfileSummaryInfo *getPSI(Module &M); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; + +/// An analysis pass based on the new PM to deliver ProfileSummaryInfo. +class ProfileSummaryAnalysis + : public AnalysisInfoMixin<ProfileSummaryAnalysis> { +public: + typedef ProfileSummaryInfo Result; + + ProfileSummaryAnalysis() {} + ProfileSummaryAnalysis(const ProfileSummaryAnalysis &Arg) {} + ProfileSummaryAnalysis(ProfileSummaryAnalysis &&Arg) {} + ProfileSummaryAnalysis &operator=(const ProfileSummaryAnalysis &RHS) { + return *this; + } + ProfileSummaryAnalysis &operator=(ProfileSummaryAnalysis &&RHS) { + return *this; + } + + Result run(Module &M, ModuleAnalysisManager &); + +private: + friend AnalysisInfoMixin<ProfileSummaryAnalysis>; + static char PassID; +}; + +/// \brief Printer pass that uses \c ProfileSummaryAnalysis. +class ProfileSummaryPrinterPass + : public PassInfoMixin<ProfileSummaryPrinterPass> { + raw_ostream &OS; + +public: + explicit ProfileSummaryPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM); +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h index 4988386fdc82..91bfd435f08c 100644 --- a/include/llvm/Analysis/RegionInfo.h +++ b/include/llvm/Analysis/RegionInfo.h @@ -41,6 +41,7 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManager.h" #include <map> #include <memory> #include <set> @@ -676,6 +677,22 @@ class RegionInfoBase { RegionInfoBase(const RegionInfoBase &) = delete; const RegionInfoBase &operator=(const RegionInfoBase &) = delete; + RegionInfoBase(RegionInfoBase &&Arg) + : DT(std::move(Arg.DT)), PDT(std::move(Arg.PDT)), DF(std::move(Arg.DF)), + TopLevelRegion(std::move(Arg.TopLevelRegion)), + BBtoRegion(std::move(Arg.BBtoRegion)) { + Arg.wipe(); + } + RegionInfoBase &operator=(RegionInfoBase &&RHS) { + DT = std::move(RHS.DT); + PDT = std::move(RHS.PDT); + DF = std::move(RHS.DF); + TopLevelRegion = std::move(RHS.TopLevelRegion); + BBtoRegion = std::move(RHS.BBtoRegion); + RHS.wipe(); + return *this; + } + DomTreeT *DT; PostDomTreeT *PDT; DomFrontierT *DF; @@ -687,6 +704,18 @@ private: /// Map every BB to the smallest region, that contains BB. BBtoRegionMap BBtoRegion; + /// \brief Wipe this region tree's state without releasing any resources. + /// + /// This is essentially a post-move helper only. It leaves the object in an + /// assignable and destroyable state, but otherwise invalid. + void wipe() { + DT = nullptr; + PDT = nullptr; + DF = nullptr; + TopLevelRegion = nullptr; + BBtoRegion.clear(); + } + // Check whether the entries of BBtoRegion for the BBs of region // SR are correct. Triggers an assertion if not. Calls itself recursively for // subregions. @@ -836,10 +865,19 @@ public: class RegionInfo : public RegionInfoBase<RegionTraits<Function>> { public: + typedef RegionInfoBase<RegionTraits<Function>> Base; + explicit RegionInfo(); ~RegionInfo() override; + RegionInfo(RegionInfo &&Arg) + : Base(std::move(static_cast<Base &>(Arg))) {} + RegionInfo &operator=(RegionInfo &&RHS) { + Base::operator=(std::move(static_cast<Base &>(RHS))); + return *this; + } + // updateStatistics - Update statistic about created regions. void updateStatistics(Region *R) final; @@ -884,6 +922,31 @@ public: //@} }; +/// \brief Analysis pass that exposes the \c RegionInfo for a function. +class RegionInfoAnalysis : public AnalysisInfoMixin<RegionInfoAnalysis> { + friend AnalysisInfoMixin<RegionInfoAnalysis>; + static char PassID; + +public: + typedef RegionInfo Result; + + RegionInfo run(Function &F, AnalysisManager<Function> &AM); +}; + +/// \brief Printer pass for the \c RegionInfo. +class RegionInfoPrinterPass : public PassInfoMixin<RegionInfoPrinterPass> { + raw_ostream &OS; + +public: + explicit RegionInfoPrinterPass(raw_ostream &OS); + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); +}; + +/// \brief Verifier pass for the \c RegionInfo. +struct RegionInfoVerifierPass : PassInfoMixin<RegionInfoVerifierPass> { + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); +}; + template <> template <> inline BasicBlock * diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h index 134cd8f96fbe..15dd1a2000e6 100644 --- a/include/llvm/Analysis/RegionInfoImpl.h +++ b/include/llvm/Analysis/RegionInfoImpl.h @@ -18,7 +18,6 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include <algorithm> @@ -666,7 +665,7 @@ typename Tr::RegionT *RegionInfoBase<Tr>::createRegion(BlockT *entry, new RegionT(entry, exit, static_cast<RegionInfoT *>(this), DT); BBtoRegion.insert(std::make_pair(entry, region)); -#ifdef XDEBUG +#ifdef EXPENSIVE_CHECKS region->verifyRegion(); #else DEBUG(region->verifyRegion()); @@ -765,7 +764,7 @@ void RegionInfoBase<Tr>::buildRegionsTree(DomTreeNodeT *N, RegionT *region) { } } -#ifdef XDEBUG +#ifdef EXPENSIVE_CHECKS template <class Tr> bool RegionInfoBase<Tr>::VerifyRegionInfo = true; #else @@ -799,7 +798,7 @@ void RegionInfoBase<Tr>::releaseMemory() { template <class Tr> void RegionInfoBase<Tr>::verifyAnalysis() const { - // Do only verify regions if explicitely activated using XDEBUG or + // Do only verify regions if explicitely activated using EXPENSIVE_CHECKS or // -verify-region-info if (!RegionInfoBase<Tr>::VerifyRegionInfo) return; diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index ef9305788849..535b623d31ac 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -23,17 +23,17 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/ConstantRange.h" -#include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/DataTypes.h" -#include <map> namespace llvm { class APInt; @@ -53,6 +53,7 @@ namespace llvm { class SCEVExpander; class SCEVPredicate; class SCEVUnknown; + class Function; template <> struct FoldingSetTrait<SCEV>; template <> struct FoldingSetTrait<SCEVPredicate>; @@ -168,8 +169,8 @@ namespace llvm { static bool classof(const SCEV *S); }; - /// SCEVPredicate - This class represents an assumption made using SCEV - /// expressions which can be checked at run-time. + /// This class represents an assumption made using SCEV expressions which can + /// be checked at run-time. class SCEVPredicate : public FoldingSetNode { friend struct FoldingSetTrait<SCEVPredicate>; @@ -178,7 +179,7 @@ namespace llvm { FoldingSetNodeIDRef FastID; public: - enum SCEVPredicateKind { P_Union, P_Equal }; + enum SCEVPredicateKind { P_Union, P_Equal, P_Wrap }; protected: SCEVPredicateKind Kind; @@ -191,23 +192,23 @@ namespace llvm { SCEVPredicateKind getKind() const { return Kind; } - /// \brief Returns the estimated complexity of this predicate. - /// This is roughly measured in the number of run-time checks required. + /// Returns the estimated complexity of this predicate. This is roughly + /// measured in the number of run-time checks required. virtual unsigned getComplexity() const { return 1; } - /// \brief Returns true if the predicate is always true. This means that no + /// Returns true if the predicate is always true. This means that no /// assumptions were made and nothing needs to be checked at run-time. virtual bool isAlwaysTrue() const = 0; - /// \brief Returns true if this predicate implies \p N. + /// Returns true if this predicate implies \p N. virtual bool implies(const SCEVPredicate *N) const = 0; - /// \brief Prints a textual representation of this predicate with an - /// indentation of \p Depth. + /// Prints a textual representation of this predicate with an indentation of + /// \p Depth. virtual void print(raw_ostream &OS, unsigned Depth = 0) const = 0; - /// \brief Returns the SCEV to which this predicate applies, or nullptr - /// if this is a SCEVUnionPredicate. + /// Returns the SCEV to which this predicate applies, or nullptr if this is + /// a SCEVUnionPredicate. virtual const SCEV *getExpr() const = 0; }; @@ -236,10 +237,9 @@ namespace llvm { } }; - /// SCEVEqualPredicate - This class represents an assumption that two SCEV - /// expressions are equal, and this can be checked at run-time. We assume - /// that the left hand side is a SCEVUnknown and the right hand side a - /// constant. + /// This class represents an assumption that two SCEV expressions are equal, + /// and this can be checked at run-time. We assume that the left hand side is + /// a SCEVUnknown and the right hand side a constant. class SCEVEqualPredicate final : public SCEVPredicate { /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a /// constant. @@ -256,10 +256,10 @@ namespace llvm { bool isAlwaysTrue() const override; const SCEV *getExpr() const override; - /// \brief Returns the left hand side of the equality. + /// Returns the left hand side of the equality. const SCEVUnknown *getLHS() const { return LHS; } - /// \brief Returns the right hand side of the equality. + /// Returns the right hand side of the equality. const SCEVConstant *getRHS() const { return RHS; } /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -268,9 +268,107 @@ namespace llvm { } }; - /// SCEVUnionPredicate - This class represents a composition of other - /// SCEV predicates, and is the class that most clients will interact with. - /// This is equivalent to a logical "AND" of all the predicates in the union. + /// This class represents an assumption made on an AddRec expression. Given an + /// affine AddRec expression {a,+,b}, we assume that it has the nssw or nusw + /// flags (defined below) in the first X iterations of the loop, where X is a + /// SCEV expression returned by getPredicatedBackedgeTakenCount). + /// + /// Note that this does not imply that X is equal to the backedge taken + /// count. This means that if we have a nusw predicate for i32 {0,+,1} with a + /// predicated backedge taken count of X, we only guarantee that {0,+,1} has + /// nusw in the first X iterations. {0,+,1} may still wrap in the loop if we + /// have more than X iterations. + class SCEVWrapPredicate final : public SCEVPredicate { + public: + /// Similar to SCEV::NoWrapFlags, but with slightly different semantics + /// for FlagNUSW. The increment is considered to be signed, and a + b + /// (where b is the increment) is considered to wrap if: + /// zext(a + b) != zext(a) + sext(b) + /// + /// If Signed is a function that takes an n-bit tuple and maps to the + /// integer domain as the tuples value interpreted as twos complement, + /// and Unsigned a function that takes an n-bit tuple and maps to the + /// integer domain as as the base two value of input tuple, then a + b + /// has IncrementNUSW iff: + /// + /// 0 <= Unsigned(a) + Signed(b) < 2^n + /// + /// The IncrementNSSW flag has identical semantics with SCEV::FlagNSW. + /// + /// Note that the IncrementNUSW flag is not commutative: if base + inc + /// has IncrementNUSW, then inc + base doesn't neccessarily have this + /// property. The reason for this is that this is used for sign/zero + /// extending affine AddRec SCEV expressions when a SCEVWrapPredicate is + /// assumed. A {base,+,inc} expression is already non-commutative with + /// regards to base and inc, since it is interpreted as: + /// (((base + inc) + inc) + inc) ... + enum IncrementWrapFlags { + IncrementAnyWrap = 0, // No guarantee. + IncrementNUSW = (1 << 0), // No unsigned with signed increment wrap. + IncrementNSSW = (1 << 1), // No signed with signed increment wrap + // (equivalent with SCEV::NSW) + IncrementNoWrapMask = (1 << 2) - 1 + }; + + /// Convenient IncrementWrapFlags manipulation methods. + static SCEVWrapPredicate::IncrementWrapFlags LLVM_ATTRIBUTE_UNUSED_RESULT + clearFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, + SCEVWrapPredicate::IncrementWrapFlags OffFlags) { + assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!"); + assert((OffFlags & IncrementNoWrapMask) == OffFlags && + "Invalid flags value!"); + return (SCEVWrapPredicate::IncrementWrapFlags)(Flags & ~OffFlags); + } + + static SCEVWrapPredicate::IncrementWrapFlags LLVM_ATTRIBUTE_UNUSED_RESULT + maskFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, int Mask) { + assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!"); + assert((Mask & IncrementNoWrapMask) == Mask && "Invalid mask value!"); + + return (SCEVWrapPredicate::IncrementWrapFlags)(Flags & Mask); + } + + static SCEVWrapPredicate::IncrementWrapFlags LLVM_ATTRIBUTE_UNUSED_RESULT + setFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, + SCEVWrapPredicate::IncrementWrapFlags OnFlags) { + assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!"); + assert((OnFlags & IncrementNoWrapMask) == OnFlags && + "Invalid flags value!"); + + return (SCEVWrapPredicate::IncrementWrapFlags)(Flags | OnFlags); + } + + /// Returns the set of SCEVWrapPredicate no wrap flags implied by a + /// SCEVAddRecExpr. + static SCEVWrapPredicate::IncrementWrapFlags + getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE); + + private: + const SCEVAddRecExpr *AR; + IncrementWrapFlags Flags; + + public: + explicit SCEVWrapPredicate(const FoldingSetNodeIDRef ID, + const SCEVAddRecExpr *AR, + IncrementWrapFlags Flags); + + /// Returns the set assumed no overflow flags. + IncrementWrapFlags getFlags() const { return Flags; } + /// Implementation of the SCEVPredicate interface + const SCEV *getExpr() const override; + bool implies(const SCEVPredicate *N) const override; + void print(raw_ostream &OS, unsigned Depth = 0) const override; + bool isAlwaysTrue() const override; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEVPredicate *P) { + return P->getKind() == P_Wrap; + } + }; + + /// This class represents a composition of other SCEV predicates, and is the + /// class that most clients will interact with. This is equivalent to a + /// logical "AND" of all the predicates in the union. class SCEVUnionPredicate final : public SCEVPredicate { private: typedef DenseMap<const SCEV *, SmallVector<const SCEVPredicate *, 4>> @@ -288,11 +386,11 @@ namespace llvm { return Preds; } - /// \brief Adds a predicate to this union. + /// Adds a predicate to this union. void add(const SCEVPredicate *N); - /// \brief Returns a reference to a vector containing all predicates - /// which apply to \p Expr. + /// Returns a reference to a vector containing all predicates which apply to + /// \p Expr. ArrayRef<const SCEVPredicate *> getPredicatesForExpr(const SCEV *Expr); /// Implementation of the SCEVPredicate interface @@ -301,8 +399,8 @@ namespace llvm { void print(raw_ostream &OS, unsigned Depth) const override; const SCEV *getExpr() const override; - /// \brief We estimate the complexity of a union predicate as the size - /// number of predicates in the union. + /// We estimate the complexity of a union predicate as the size number of + /// predicates in the union. unsigned getComplexity() const override { return Preds.size(); } /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -364,6 +462,12 @@ namespace llvm { /// Function &F; + /// Does the module have any calls to the llvm.experimental.guard intrinsic + /// at all? If this is false, we avoid doing work that will only help if + /// thare are guards present in the IR. + /// + bool HasGuards; + /// The target library information for the target we are targeting. /// TargetLibraryInfo &TLI; @@ -382,6 +486,21 @@ namespace llvm { /// This SCEV is used to represent unknown trip counts and things. std::unique_ptr<SCEVCouldNotCompute> CouldNotCompute; + /// The typedef for HasRecMap. + /// + typedef DenseMap<const SCEV *, bool> HasRecMapType; + + /// This is a cache to record whether a SCEV contains any scAddRecExpr. + HasRecMapType HasRecMap; + + /// The typedef for ExprValueMap. + /// + typedef DenseMap<const SCEV *, SetVector<Value *>> ExprValueMapType; + + /// ExprValueMap -- This map records the original values from which + /// the SCEV expr is generated from. + ExprValueMapType ExprValueMap; + /// The typedef for ValueExprMap. /// typedef DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *> > @@ -410,9 +529,14 @@ namespace llvm { const SCEV *Exact; const SCEV *Max; + /// A predicate union guard for this ExitLimit. The result is only + /// valid if this predicate evaluates to 'true' at run-time. + SCEVUnionPredicate Pred; + /*implicit*/ ExitLimit(const SCEV *E) : Exact(E), Max(E) {} - ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) { + ExitLimit(const SCEV *E, const SCEV *M, SCEVUnionPredicate &P) + : Exact(E), Max(M), Pred(P) { assert((isa<SCEVCouldNotCompute>(Exact) || !isa<SCEVCouldNotCompute>(Max)) && "Exact is not allowed to be less precise than Max"); @@ -424,30 +548,147 @@ namespace llvm { return !isa<SCEVCouldNotCompute>(Exact) || !isa<SCEVCouldNotCompute>(Max); } + + /// Test whether this ExitLimit contains all information. + bool hasFullInfo() const { return !isa<SCEVCouldNotCompute>(Exact); } }; + /// Forward declaration of ExitNotTakenExtras + struct ExitNotTakenExtras; + /// Information about the number of times a particular loop exit may be /// reached before exiting the loop. struct ExitNotTakenInfo { AssertingVH<BasicBlock> ExitingBlock; const SCEV *ExactNotTaken; - PointerIntPair<ExitNotTakenInfo*, 1> NextExit; - ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {} + ExitNotTakenExtras *ExtraInfo; + bool Complete; + + ExitNotTakenInfo() + : ExitingBlock(nullptr), ExactNotTaken(nullptr), ExtraInfo(nullptr), + Complete(true) {} + + ExitNotTakenInfo(BasicBlock *ExitBlock, const SCEV *Expr, + ExitNotTakenExtras *Ptr) + : ExitingBlock(ExitBlock), ExactNotTaken(Expr), ExtraInfo(Ptr), + Complete(true) {} /// Return true if all loop exits are computable. - bool isCompleteList() const { - return NextExit.getInt() == 0; + bool isCompleteList() const { return Complete; } + + /// Sets the incomplete property, indicating that one of the loop exits + /// doesn't have a corresponding ExitNotTakenInfo entry. + void setIncomplete() { Complete = false; } + + /// Returns a pointer to the predicate associated with this information, + /// or nullptr if this doesn't exist (meaning always true). + SCEVUnionPredicate *getPred() const { + if (ExtraInfo) + return &ExtraInfo->Pred; + + return nullptr; } - void setIncomplete() { NextExit.setInt(1); } + /// Return true if the SCEV predicate associated with this information + /// is always true. + bool hasAlwaysTruePred() const { + return !getPred() || getPred()->isAlwaysTrue(); + } - /// Return a pointer to the next exit's not-taken info. - ExitNotTakenInfo *getNextExit() const { - return NextExit.getPointer(); + /// Defines a simple forward iterator for ExitNotTakenInfo. + class ExitNotTakenInfoIterator + : public std::iterator<std::forward_iterator_tag, ExitNotTakenInfo> { + const ExitNotTakenInfo *Start; + unsigned Position; + + public: + ExitNotTakenInfoIterator(const ExitNotTakenInfo *Start, + unsigned Position) + : Start(Start), Position(Position) {} + + const ExitNotTakenInfo &operator*() const { + if (Position == 0) + return *Start; + + return Start->ExtraInfo->Exits[Position - 1]; + } + + const ExitNotTakenInfo *operator->() const { + if (Position == 0) + return Start; + + return &Start->ExtraInfo->Exits[Position - 1]; + } + + bool operator==(const ExitNotTakenInfoIterator &RHS) const { + return Start == RHS.Start && Position == RHS.Position; + } + + bool operator!=(const ExitNotTakenInfoIterator &RHS) const { + return Start != RHS.Start || Position != RHS.Position; + } + + ExitNotTakenInfoIterator &operator++() { // Preincrement + if (!Start) + return *this; + + unsigned Elements = + Start->ExtraInfo ? Start->ExtraInfo->Exits.size() + 1 : 1; + + ++Position; + + // We've run out of elements. + if (Position == Elements) { + Start = nullptr; + Position = 0; + } + + return *this; + } + ExitNotTakenInfoIterator operator++(int) { // Postincrement + ExitNotTakenInfoIterator Tmp = *this; + ++*this; + return Tmp; + } + }; + + /// Iterators + ExitNotTakenInfoIterator begin() const { + return ExitNotTakenInfoIterator(this, 0); + } + ExitNotTakenInfoIterator end() const { + return ExitNotTakenInfoIterator(nullptr, 0); } + }; - void setNextExit(ExitNotTakenInfo *ENT) { NextExit.setPointer(ENT); } + /// Describes the extra information that a ExitNotTakenInfo can have. + struct ExitNotTakenExtras { + /// The predicate associated with the ExitNotTakenInfo struct. + SCEVUnionPredicate Pred; + + /// The extra exits in the loop. Only the ExitNotTakenExtras structure + /// pointed to by the first ExitNotTakenInfo struct (associated with the + /// first loop exit) will populate this vector to prevent having + /// redundant information. + SmallVector<ExitNotTakenInfo, 4> Exits; + }; + + /// A struct containing the information attached to a backedge. + struct EdgeInfo { + EdgeInfo(BasicBlock *Block, const SCEV *Taken, SCEVUnionPredicate &P) : + ExitBlock(Block), Taken(Taken), Pred(std::move(P)) {} + + /// The exit basic block. + BasicBlock *ExitBlock; + + /// The (exact) number of time we take the edge back. + const SCEV *Taken; + + /// The SCEV predicated associated with Taken. If Pred doesn't evaluate + /// to true, the information in Taken is not valid (or equivalent with + /// a CouldNotCompute. + SCEVUnionPredicate Pred; }; /// Information about the backedge-taken count of a loop. This currently @@ -459,16 +700,16 @@ namespace llvm { ExitNotTakenInfo ExitNotTaken; /// An expression indicating the least maximum backedge-taken count of the - /// loop that is known, or a SCEVCouldNotCompute. + /// loop that is known, or a SCEVCouldNotCompute. This expression is only + /// valid if the predicates associated with all loop exits are true. const SCEV *Max; public: BackedgeTakenInfo() : Max(nullptr) {} /// Initialize BackedgeTakenInfo from a list of exact exit counts. - BackedgeTakenInfo( - SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts, - bool Complete, const SCEV *MaxCount); + BackedgeTakenInfo(SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete, + const SCEV *MaxCount); /// Test whether this BackedgeTakenInfo contains any computed information, /// or whether it's all SCEVCouldNotCompute values. @@ -476,11 +717,27 @@ namespace llvm { return ExitNotTaken.ExitingBlock || !isa<SCEVCouldNotCompute>(Max); } + /// Test whether this BackedgeTakenInfo contains complete information. + bool hasFullInfo() const { return ExitNotTaken.isCompleteList(); } + /// Return an expression indicating the exact backedge-taken count of the - /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the + /// loop if it is known or SCEVCouldNotCompute otherwise. This is the /// number of times the loop header can be guaranteed to execute, minus /// one. - const SCEV *getExact(ScalarEvolution *SE) const; + /// + /// If the SCEV predicate associated with the answer can be different + /// from AlwaysTrue, we must add a (non null) Predicates argument. + /// The SCEV predicate associated with the answer will be added to + /// Predicates. A run-time check needs to be emitted for the SCEV + /// predicate in order for the answer to be valid. + /// + /// Note that we should always know if we need to pass a predicate + /// argument or not from the way the ExitCounts vector was computed. + /// If we allowed SCEV predicates to be generated when populating this + /// vector, this information can contain them and therefore a + /// SCEVPredicate argument should be added to getExact. + const SCEV *getExact(ScalarEvolution *SE, + SCEVUnionPredicate *Predicates = nullptr) const; /// Return the number of times this loop exit may fall through to the back /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via @@ -501,7 +758,11 @@ namespace llvm { /// Cache the backedge-taken count of the loops for this function as they /// are computed. - DenseMap<const Loop*, BackedgeTakenInfo> BackedgeTakenCounts; + DenseMap<const Loop *, BackedgeTakenInfo> BackedgeTakenCounts; + + /// Cache the predicated backedge-taken count of the loops for this + /// function as they are computed. + DenseMap<const Loop *, BackedgeTakenInfo> PredicatedBackedgeTakenCounts; /// This map contains entries for all of the PHI instructions that we /// attempt to compute constant evolutions for. This allows us to avoid @@ -520,6 +781,16 @@ namespace llvm { SmallVector<PointerIntPair<const Loop *, 2, LoopDisposition>, 2>> LoopDispositions; + /// Cache for \c loopHasNoAbnormalExits. + DenseMap<const Loop *, bool> LoopHasNoAbnormalExits; + + /// Returns true if \p L contains no instruction that can abnormally exit + /// the loop (i.e. via throwing an exception, by terminating the thread + /// cleanly or by infinite looping in a called function). Strictly + /// speaking, the last one is not leaving the loop, but is identical to + /// leaving the loop for reasoning about undefined behavior. + bool loopHasNoAbnormalExits(const Loop *L); + /// Compute a LoopDisposition value. LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L); @@ -547,8 +818,7 @@ namespace llvm { DenseMap<const SCEV *, ConstantRange> &Cache = Hint == HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges; - std::pair<DenseMap<const SCEV *, ConstantRange>::iterator, bool> Pair = - Cache.insert(std::make_pair(S, CR)); + auto Pair = Cache.insert({S, CR}); if (!Pair.second) Pair.first->second = CR; return Pair.first->second; @@ -557,6 +827,19 @@ namespace llvm { /// Determine the range for a particular SCEV. ConstantRange getRange(const SCEV *S, RangeSignHint Hint); + /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Stop}. + /// Helper for \c getRange. + ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop, + const SCEV *MaxBECount, + unsigned BitWidth); + + /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p + /// Stop} by "factoring out" a ternary expression from the add recurrence. + /// Helper called by \c getRange. + ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop, + const SCEV *MaxBECount, + unsigned BitWidth); + /// We know that there is no SCEV for the specified value. Analyze the /// expression. const SCEV *createSCEV(Value *V); @@ -588,36 +871,59 @@ namespace llvm { /// This looks up computed SCEV values for all instructions that depend on /// the given instruction and removes them from the ValueExprMap map if they /// reference SymName. This is used during PHI resolution. - void ForgetSymbolicName(Instruction *I, const SCEV *SymName); + void forgetSymbolicName(Instruction *I, const SCEV *SymName); /// Return the BackedgeTakenInfo for the given loop, lazily computing new - /// values if the loop hasn't been analyzed yet. + /// values if the loop hasn't been analyzed yet. The returned result is + /// guaranteed not to be predicated. const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L); + /// Similar to getBackedgeTakenInfo, but will add predicates as required + /// with the purpose of returning complete information. + const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L); + /// Compute the number of times the specified loop will iterate. - BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L); + /// If AllowPredicates is set, we will create new SCEV predicates as + /// necessary in order to return an exact answer. + BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will - /// execute if it exits via the specified block. - ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock); + /// execute if it exits via the specified block. If AllowPredicates is set, + /// this call will try to use a minimal set of SCEV predicates in order to + /// return an exact answer. + ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of ExitCond, /// TBB, and FBB. + /// + /// \p ControlsExit is true if ExitCond directly controls the exit + /// branch. In this case, we can assume that the loop exits only if the + /// condition is true and can infer that failing to meet the condition prior + /// to integer wraparound results in undefined behavior. + /// + /// If \p AllowPredicates is set, this call will try to use a minimal set of + /// SCEV predicates in order to return an exact answer. ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr); + bool ControlsExit, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of the ICmpInst - /// ExitCond, TBB, and FBB. + /// ExitCond, TBB, and FBB. If AllowPredicates is set, this call will try + /// to use a minimal set of SCEV predicates in order to return an exact + /// answer. ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr); + bool IsSubExpr, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a switch with a single exiting case @@ -655,20 +961,35 @@ namespace llvm { /// Return the number of times an exit condition comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. - ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr); + /// If AllowPredicates is set, this call will try to use a minimal set of + /// SCEV predicates in order to return an exact answer. + ExitLimit howFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr, + bool AllowPredicates = false); /// Return the number of times an exit condition checking the specified /// value for nonzero will execute. If not computable, return /// CouldNotCompute. - ExitLimit HowFarToNonZero(const SCEV *V, const Loop *L); + ExitLimit howFarToNonZero(const SCEV *V, const Loop *L); /// Return the number of times an exit condition containing the specified /// less-than comparison will execute. If not computable, return - /// CouldNotCompute. isSigned specifies whether the less-than is signed. - ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, bool IsSubExpr); - ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, bool IsSubExpr); + /// CouldNotCompute. + /// + /// \p isSigned specifies whether the less-than is signed. + /// + /// \p ControlsExit is true when the LHS < RHS condition directly controls + /// the branch (loops exits only if condition is true). In this case, we can + /// use NoWrapFlags to skip overflow checks. + /// + /// If \p AllowPredicates is set, this call will try to use a minimal set of + /// SCEV predicates in order to return an exact answer. + ExitLimit howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, + bool isSigned, bool ControlsExit, + bool AllowPredicates = false); + + ExitLimit howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, + const Loop *L, bool isSigned, bool IsSubExpr, + bool AllowPredicates = false); /// Return a predecessor of BB (which may not be an immediate predecessor) /// which has exactly one successor from which BB is reachable, or null if @@ -707,12 +1028,18 @@ namespace llvm { /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is - /// true. Utility function used by isImpliedCondOperands. + /// true. Utility function used by isImpliedCondOperands. Tries to get + /// cases like "X `sgt` 0 => X - 1 `sgt` -1". bool isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS); + /// Return true if the condition denoted by \p LHS \p Pred \p RHS is implied + /// by a call to \c @llvm.experimental.guard in \p BB. + bool isImpliedViaGuard(BasicBlock *BB, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. @@ -733,8 +1060,8 @@ namespace llvm { /// Test if the given expression is known to satisfy the condition described /// by Pred and the known constant ranges of LHS and RHS. /// - bool isKnownPredicateWithRanges(ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS); + bool isKnownPredicateViaConstantRanges(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); /// Try to prove the condition described by "LHS Pred RHS" by ruling out /// integer overflow. @@ -778,6 +1105,9 @@ namespace llvm { bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, const Loop *L); + /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation. + SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR); + bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, bool &Increasing); @@ -793,11 +1123,35 @@ namespace llvm { bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, bool &Increasing); - // Return SCEV no-wrap flags that can be proven based on reasoning - // about how poison produced from no-wrap flags on this value - // (e.g. a nuw add) would trigger undefined behavior on overflow. + /// Return SCEV no-wrap flags that can be proven based on reasoning about + /// how poison produced from no-wrap flags on this value (e.g. a nuw add) + /// would trigger undefined behavior on overflow. SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V); + /// Return true if the SCEV corresponding to \p I is never poison. Proving + /// this is more complex than proving that just \p I is never poison, since + /// SCEV commons expressions across control flow, and you can have cases + /// like: + /// + /// idx0 = a + b; + /// ptr[idx0] = 100; + /// if (<condition>) { + /// idx1 = a +nsw b; + /// ptr[idx1] = 200; + /// } + /// + /// where the SCEV expression (+ a b) is guaranteed to not be poison (and + /// hence not sign-overflow) only if "<condition>" is true. Since both + /// `idx0` and `idx1` will be mapped to the same SCEV expression, (+ a b), + /// it is not okay to annotate (+ a b) with <nsw> in the above example. + bool isSCEVExprNeverPoison(const Instruction *I); + + /// This is like \c isSCEVExprNeverPoison but it specifically works for + /// instructions that will get mapped to SCEV add recurrences. Return true + /// if \p I will never generate poison under the assumption that \p I is an + /// add recurrence on the loop \p L. + bool isAddRecNeverPoison(const Instruction *I, const Loop *L); + public: ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI); @@ -821,6 +1175,17 @@ namespace llvm { /// return true. For pointer types, this is the pointer-sized integer type. Type *getEffectiveSCEVType(Type *Ty) const; + /// Return true if the SCEV is a scAddRecExpr or it contains + /// scAddRecExpr. The result will be cached in HasRecMap. + /// + bool containsAddRecurrence(const SCEV *S); + + /// Return the Value set from which the SCEV expr is generated. + SetVector<Value *> *getSCEVValues(const SCEV *S); + + /// Erase Value from ValueExprMap and ExprValueMap. + void eraseValueFromMap(Value *V); + /// Return a SCEV expression for the full generality of the specified /// expression. const SCEV *getSCEV(Value *V); @@ -867,7 +1232,7 @@ namespace llvm { SmallVector<const SCEV *, 4> NewOp(Operands.begin(), Operands.end()); return getAddRecExpr(NewOp, L, Flags); } - /// \brief Returns an expression for a GEP + /// Returns an expression for a GEP /// /// \p PointeeType The type used as the basis for the pointer arithmetics /// \p BaseExpr The expression for the pointer operand. @@ -885,10 +1250,10 @@ namespace llvm { const SCEV *getUnknown(Value *V); const SCEV *getCouldNotCompute(); - /// \brief Return a SCEV for the constant 0 of a specific type. + /// Return a SCEV for the constant 0 of a specific type. const SCEV *getZero(Type *Ty) { return getConstant(Ty, 0); } - /// \brief Return a SCEV for the constant 1 of a specific type. + /// Return a SCEV for the constant 1 of a specific type. const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); } /// Return an expression for sizeof AllocTy that is type IntTy @@ -981,7 +1346,7 @@ namespace llvm { bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); - /// \brief Returns the maximum trip count of the loop if it is a single-exit + /// Returns the maximum trip count of the loop if it is a single-exit /// loop and we can compute a small maximum for that loop. /// /// Implemented in terms of the \c getSmallConstantTripCount overload with @@ -997,7 +1362,7 @@ namespace llvm { /// prematurely via another branch. unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock); - /// \brief Returns the largest constant divisor of the trip count of the + /// Returns the largest constant divisor of the trip count of the /// loop if it is a single-exit loop and we can compute a small maximum for /// that loop. /// @@ -1031,6 +1396,13 @@ namespace llvm { /// const SCEV *getBackedgeTakenCount(const Loop *L); + /// Similar to getBackedgeTakenCount, except it will add a set of + /// SCEV predicates to Predicates that are required to be true in order for + /// the answer to be correct. Predicates can be checked with run-time + /// checks and can be used to perform loop versioning. + const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Predicates); + /// Similar to getBackedgeTakenCount, except return the least SCEV value /// that is known never to be less than the actual backedge taken count. const SCEV *getMaxBackedgeTakenCount(const Loop *L); @@ -1050,7 +1422,7 @@ namespace llvm { /// def-use chain linking it to a loop. void forgetValue(Value *V); - /// \brief Called when the client has changed the disposition of values in + /// Called when the client has changed the disposition of values in /// this loop. /// /// We don't have a way to invalidate per-loop dispositions. Clear and @@ -1154,7 +1526,8 @@ namespace llvm { const SCEV *getElementSize(Instruction *Inst); /// Compute the array dimensions Sizes from the set of Terms extracted from - /// the memory access function of this SCEVAddRecExpr. + /// the memory access function of this SCEVAddRecExpr (second step of + /// delinearization). void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize) const; @@ -1162,13 +1535,15 @@ namespace llvm { void print(raw_ostream &OS) const; void verify() const; - /// Collect parametric terms occurring in step expressions. + /// Collect parametric terms occurring in step expressions (first step of + /// delinearization). void collectParametricTerms(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Terms); - /// Return in Subscripts the access functions for each dimension in Sizes. + /// Return in Subscripts the access functions for each dimension in Sizes + /// (third step of delinearization). void computeAccessFunctions(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, SmallVectorImpl<const SCEV *> &Sizes); @@ -1251,8 +1626,18 @@ namespace llvm { const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS, const SCEVConstant *RHS); - /// Re-writes the SCEV according to the Predicates in \p Preds. - const SCEV *rewriteUsingPredicate(const SCEV *Scev, SCEVUnionPredicate &A); + const SCEVPredicate * + getWrapPredicate(const SCEVAddRecExpr *AR, + SCEVWrapPredicate::IncrementWrapFlags AddedFlags); + + /// Re-writes the SCEV according to the Predicates in \p A. + const SCEV *rewriteUsingPredicate(const SCEV *S, const Loop *L, + SCEVUnionPredicate &A); + /// Tries to convert the \p S expression to an AddRec expression, + /// adding additional predicates to \p Preds as required. + const SCEVAddRecExpr * + convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L, + SCEVUnionPredicate &Preds); private: /// Compute the backedge taken count knowing the interval difference, the @@ -1283,31 +1668,26 @@ namespace llvm { SCEVUnknown *FirstUnknown; }; - /// \brief Analysis pass that exposes the \c ScalarEvolution for a function. - class ScalarEvolutionAnalysis { + /// Analysis pass that exposes the \c ScalarEvolution for a function. + class ScalarEvolutionAnalysis + : public AnalysisInfoMixin<ScalarEvolutionAnalysis> { + friend AnalysisInfoMixin<ScalarEvolutionAnalysis>; static char PassID; public: typedef ScalarEvolution Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - /// \brief Provide a name for the analysis for debugging and logging. - static StringRef name() { return "ScalarEvolutionAnalysis"; } - - ScalarEvolution run(Function &F, AnalysisManager<Function> *AM); + ScalarEvolution run(Function &F, AnalysisManager<Function> &AM); }; - /// \brief Printer pass for the \c ScalarEvolutionAnalysis results. - class ScalarEvolutionPrinterPass { + /// Printer pass for the \c ScalarEvolutionAnalysis results. + class ScalarEvolutionPrinterPass + : public PassInfoMixin<ScalarEvolutionPrinterPass> { raw_ostream &OS; public: explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {} - PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM); - - static StringRef name() { return "ScalarEvolutionPrinterPass"; } + PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); }; class ScalarEvolutionWrapperPass : public FunctionPass { @@ -1343,42 +1723,81 @@ namespace llvm { /// - lowers the number of expression rewrites. class PredicatedScalarEvolution { public: - PredicatedScalarEvolution(ScalarEvolution &SE); + PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L); const SCEVUnionPredicate &getUnionPredicate() const; - /// \brief Returns the SCEV expression of V, in the context of the current - /// SCEV predicate. - /// The order of transformations applied on the expression of V returned - /// by ScalarEvolution is guaranteed to be preserved, even when adding new - /// predicates. + + /// Returns the SCEV expression of V, in the context of the current SCEV + /// predicate. The order of transformations applied on the expression of V + /// returned by ScalarEvolution is guaranteed to be preserved, even when + /// adding new predicates. const SCEV *getSCEV(Value *V); - /// \brief Adds a new predicate. + + /// Get the (predicated) backedge count for the analyzed loop. + const SCEV *getBackedgeTakenCount(); + + /// Adds a new predicate. void addPredicate(const SCEVPredicate &Pred); - /// \brief Returns the ScalarEvolution analysis used. + + /// Attempts to produce an AddRecExpr for V by adding additional SCEV + /// predicates. If we can't transform the expression into an AddRecExpr we + /// return nullptr and not add additional SCEV predicates to the current + /// context. + const SCEVAddRecExpr *getAsAddRec(Value *V); + + /// Proves that V doesn't overflow by adding SCEV predicate. + void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags); + + /// Returns true if we've proved that V doesn't wrap by means of a SCEV + /// predicate. + bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags); + + /// Returns the ScalarEvolution analysis used. ScalarEvolution *getSE() const { return &SE; } + /// We need to explicitly define the copy constructor because of FlagsMap. + PredicatedScalarEvolution(const PredicatedScalarEvolution&); + + /// Print the SCEV mappings done by the Predicated Scalar Evolution. + /// The printed text is indented by \p Depth. + void print(raw_ostream &OS, unsigned Depth) const; + private: - /// \brief Increments the version number of the predicate. - /// This needs to be called every time the SCEV predicate changes. + /// Increments the version number of the predicate. This needs to be called + /// every time the SCEV predicate changes. void updateGeneration(); + /// Holds a SCEV and the version number of the SCEV predicate used to /// perform the rewrite of the expression. typedef std::pair<unsigned, const SCEV *> RewriteEntry; + /// Maps a SCEV to the rewrite result of that SCEV at a certain version /// number. If this number doesn't match the current Generation, we will /// need to do a rewrite. To preserve the transformation order of previous /// rewrites, we will rewrite the previous result instead of the original /// SCEV. DenseMap<const SCEV *, RewriteEntry> RewriteMap; + + /// Records what NoWrap flags we've added to a Value *. + ValueMap<Value *, SCEVWrapPredicate::IncrementWrapFlags> FlagsMap; + /// The ScalarEvolution analysis. ScalarEvolution &SE; + + /// The analyzed Loop. + const Loop &L; + /// The SCEVPredicate that forms our context. We will rewrite all /// expressions assuming that this predicate true. SCEVUnionPredicate Preds; + /// Marks the version of the SCEV predicate used. When rewriting a SCEV /// expression we mark it with the version of the predicate. We use this to /// figure out if the predicate has changed from the last rewrite of the /// SCEV. If so, we need to perform a new rewrite. unsigned Generation; + + /// The backedge taken count. + const SCEV *BackedgeCount; }; } diff --git a/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h index 7bbbf5562047..ac10370b4131 100644 --- a/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h +++ b/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h @@ -28,8 +28,7 @@ class SCEVAAResult : public AAResultBase<SCEVAAResult> { ScalarEvolution &SE; public: - explicit SCEVAAResult(const TargetLibraryInfo &TLI, ScalarEvolution &SE) - : AAResultBase(TLI), SE(SE) {} + explicit SCEVAAResult(ScalarEvolution &SE) : AAResultBase(), SE(SE) {} SCEVAAResult(SCEVAAResult &&Arg) : AAResultBase(std::move(Arg)), SE(Arg.SE) {} AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); @@ -39,20 +38,14 @@ private: }; /// Analysis pass providing a never-invalidated alias analysis result. -class SCEVAA { +class SCEVAA : public AnalysisInfoMixin<SCEVAA> { + friend AnalysisInfoMixin<SCEVAA>; + static char PassID; + public: typedef SCEVAAResult Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - SCEVAAResult run(Function &F, AnalysisManager<Function> *AM); - - /// \brief Provide access to a name for this pass for debugging purposes. - static StringRef name() { return "SCEVAA"; } - -private: - static char PassID; + SCEVAAResult run(Function &F, AnalysisManager<Function> &AM); }; /// Legacy wrapper pass to provide the SCEVAAResult object. diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index b9939168a99d..2fa856a32f7d 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -80,9 +80,49 @@ namespace llvm { /// already in "expanded" form. bool LSRMode; - typedef IRBuilder<true, TargetFolder> BuilderType; + typedef IRBuilder<TargetFolder> BuilderType; BuilderType Builder; + // RAII object that stores the current insertion point and restores it when + // the object is destroyed. This includes the debug location. Duplicated + // from InsertPointGuard to add SetInsertPoint() which is used to updated + // InsertPointGuards stack when insert points are moved during SCEV + // expansion. + class SCEVInsertPointGuard { + IRBuilderBase &Builder; + AssertingVH<BasicBlock> Block; + BasicBlock::iterator Point; + DebugLoc DbgLoc; + SCEVExpander *SE; + + SCEVInsertPointGuard(const SCEVInsertPointGuard &) = delete; + SCEVInsertPointGuard &operator=(const SCEVInsertPointGuard &) = delete; + + public: + SCEVInsertPointGuard(IRBuilderBase &B, SCEVExpander *SE) + : Builder(B), Block(B.GetInsertBlock()), Point(B.GetInsertPoint()), + DbgLoc(B.getCurrentDebugLocation()), SE(SE) { + SE->InsertPointGuards.push_back(this); + } + + ~SCEVInsertPointGuard() { + // These guards should always created/destroyed in FIFO order since they + // are used to guard lexically scoped blocks of code in + // ScalarEvolutionExpander. + assert(SE->InsertPointGuards.back() == this); + SE->InsertPointGuards.pop_back(); + Builder.restoreIP(IRBuilderBase::InsertPoint(Block, Point)); + Builder.SetCurrentDebugLocation(DbgLoc); + } + + BasicBlock::iterator GetInsertPoint() const { return Point; } + void SetInsertPoint(BasicBlock::iterator I) { Point = I; } + }; + + /// Stack of pointers to saved insert points, used to keep insert points + /// consistent when instructions are moved. + SmallVector<SCEVInsertPointGuard *, 8> InsertPointGuards; + #ifndef NDEBUG const char *DebugType; #endif @@ -101,6 +141,11 @@ namespace llvm { #endif } + ~SCEVExpander() { + // Make sure the insert point guard stack is consistent. + assert(InsertPointGuards.empty()); + } + #ifndef NDEBUG void setDebugType(const char* s) { DebugType = s; } #endif @@ -162,6 +207,15 @@ namespace llvm { Value *expandEqualPredicate(const SCEVEqualPredicate *Pred, Instruction *Loc); + /// \brief Generates code that evaluates if the \p AR expression will + /// overflow. + Value *generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, + bool Signed); + + /// \brief A specialized variant of expandCodeForPredicate, handling the + /// case when we are expanding code for a SCEVWrapPredicate. + Value *expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc); + /// \brief A specialized variant of expandCodeForPredicate, handling the /// case when we are expanding code for a SCEVUnionPredicate. Value *expandUnionPredicate(const SCEVUnionPredicate *Pred, @@ -254,6 +308,9 @@ namespace llvm { const SCEV *const *op_end, PointerType *PTy, Type *Ty, Value *V); + /// \brief Find a previous Value in ExprValueMap for expand. + Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); + Value *expand(const SCEV *S); /// \brief Insert code to directly compute the specified SCEV expression @@ -306,6 +363,11 @@ namespace llvm { bool &InvertStep); Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, Type *ExpandTy, Type *IntTy, bool useSubtract); + + void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, + Instruction *Pos, PHINode *LoopPhi); + + void fixupInsertPoints(Instruction *I); }; } diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index 16992680577c..ff24cafbe680 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -32,9 +32,7 @@ namespace llvm { scUnknown, scCouldNotCompute }; - //===--------------------------------------------------------------------===// - /// SCEVConstant - This class represents a constant integer value. - /// + /// This class represents a constant integer value. class SCEVConstant : public SCEV { friend class ScalarEvolution; @@ -53,9 +51,7 @@ namespace llvm { } }; - //===--------------------------------------------------------------------===// - /// SCEVCastExpr - This is the base class for unary cast operator classes. - /// + /// This is the base class for unary cast operator classes. class SCEVCastExpr : public SCEV { protected: const SCEV *Op; @@ -76,10 +72,8 @@ namespace llvm { } }; - //===--------------------------------------------------------------------===// - /// SCEVTruncateExpr - This class represents a truncation of an integer value - /// to a smaller integer value. - /// + /// This class represents a truncation of an integer value to a + /// smaller integer value. class SCEVTruncateExpr : public SCEVCastExpr { friend class ScalarEvolution; @@ -93,10 +87,8 @@ namespace llvm { } }; - //===--------------------------------------------------------------------===// - /// SCEVZeroExtendExpr - This class represents a zero extension of a small - /// integer value to a larger integer value. - /// + /// This class represents a zero extension of a small integer value + /// to a larger integer value. class SCEVZeroExtendExpr : public SCEVCastExpr { friend class ScalarEvolution; @@ -110,10 +102,8 @@ namespace llvm { } }; - //===--------------------------------------------------------------------===// - /// SCEVSignExtendExpr - This class represents a sign extension of a small - /// integer value to a larger integer value. - /// + /// This class represents a sign extension of a small integer value + /// to a larger integer value. class SCEVSignExtendExpr : public SCEVCastExpr { friend class ScalarEvolution; @@ -128,10 +118,8 @@ namespace llvm { }; - //===--------------------------------------------------------------------===// - /// SCEVNAryExpr - This node is a base class providing common - /// functionality for n'ary operators. - /// + /// This node is a base class providing common functionality for + /// n'ary operators. class SCEVNAryExpr : public SCEV { protected: // Since SCEVs are immutable, ScalarEvolution allocates operand @@ -166,6 +154,18 @@ namespace llvm { return (NoWrapFlags)(SubclassData & Mask); } + bool hasNoUnsignedWrap() const { + return getNoWrapFlags(FlagNUW) != FlagAnyWrap; + } + + bool hasNoSignedWrap() const { + return getNoWrapFlags(FlagNSW) != FlagAnyWrap; + } + + bool hasNoSelfWrap() const { + return getNoWrapFlags(FlagNW) != FlagAnyWrap; + } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr || @@ -176,10 +176,7 @@ namespace llvm { } }; - //===--------------------------------------------------------------------===// - /// SCEVCommutativeExpr - This node is the base class for n'ary commutative - /// operators. - /// + /// This node is the base class for n'ary commutative operators. class SCEVCommutativeExpr : public SCEVNAryExpr { protected: SCEVCommutativeExpr(const FoldingSetNodeIDRef ID, @@ -202,9 +199,7 @@ namespace llvm { }; - //===--------------------------------------------------------------------===// - /// SCEVAddExpr - This node represents an addition of some number of SCEVs. - /// + /// This node represents an addition of some number of SCEVs. class SCEVAddExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; @@ -227,9 +222,8 @@ namespace llvm { } }; - //===--------------------------------------------------------------------===// - /// SCEVMulExpr - This node represents multiplication of some number of SCEVs. - /// + + /// This node represents multiplication of some number of SCEVs. class SCEVMulExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; @@ -246,9 +240,7 @@ namespace llvm { }; - //===--------------------------------------------------------------------===// - /// SCEVUDivExpr - This class represents a binary unsigned division operation. - /// + /// This class represents a binary unsigned division operation. class SCEVUDivExpr : public SCEV { friend class ScalarEvolution; @@ -277,12 +269,11 @@ namespace llvm { }; - //===--------------------------------------------------------------------===// - /// SCEVAddRecExpr - This node represents a polynomial recurrence on the trip - /// count of the specified loop. This is the primary focus of the - /// ScalarEvolution framework; all the other SCEV subclasses are mostly just - /// supporting infrastructure to allow SCEVAddRecExpr expressions to be - /// created and analyzed. + /// This node represents a polynomial recurrence on the trip count + /// of the specified loop. This is the primary focus of the + /// ScalarEvolution framework; all the other SCEV subclasses are + /// mostly just supporting infrastructure to allow SCEVAddRecExpr + /// expressions to be created and analyzed. /// /// All operands of an AddRec are required to be loop invariant. /// @@ -299,10 +290,10 @@ namespace llvm { const SCEV *getStart() const { return Operands[0]; } const Loop *getLoop() const { return L; } - /// getStepRecurrence - This method constructs and returns the recurrence - /// indicating how much this expression steps by. If this is a polynomial - /// of degree N, it returns a chrec of degree N-1. - /// We cannot determine whether the step recurrence has self-wraparound. + /// Constructs and returns the recurrence indicating how much this + /// expression steps by. If this is a polynomial of degree N, it + /// returns a chrec of degree N-1. We cannot determine whether + /// the step recurrence has self-wraparound. const SCEV *getStepRecurrence(ScalarEvolution &SE) const { if (isAffine()) return getOperand(1); return SE.getAddRecExpr(SmallVector<const SCEV *, 3>(op_begin()+1, @@ -310,17 +301,17 @@ namespace llvm { getLoop(), FlagAnyWrap); } - /// isAffine - Return true if this represents an expression - /// A + B*x where A and B are loop invariant values. + /// Return true if this represents an expression A + B*x where A + /// and B are loop invariant values. bool isAffine() const { // We know that the start value is invariant. This expression is thus // affine iff the step is also invariant. return getNumOperands() == 2; } - /// isQuadratic - Return true if this represents an expression - /// A + B*x + C*x^2 where A, B and C are loop invariant values. - /// This corresponds to an addrec of the form {L,+,M,+,N} + /// Return true if this represents an expression A + B*x + C*x^2 + /// where A, B and C are loop invariant values. This corresponds + /// to an addrec of the form {L,+,M,+,N} bool isQuadratic() const { return getNumOperands() == 3; } @@ -334,21 +325,21 @@ namespace llvm { SubclassData |= Flags; } - /// evaluateAtIteration - Return the value of this chain of recurrences at - /// the specified iteration number. + /// Return the value of this chain of recurrences at the specified + /// iteration number. const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const; - /// getNumIterationsInRange - Return the number of iterations of this loop - /// that produce values in the specified constant range. Another way of - /// looking at this is that it returns the first iteration number where the - /// value is not in the condition, thus computing the exit count. If the - /// iteration count can't be computed, an instance of SCEVCouldNotCompute is - /// returned. - const SCEV *getNumIterationsInRange(ConstantRange Range, - ScalarEvolution &SE) const; - - /// getPostIncExpr - Return an expression representing the value of - /// this expression one iteration of the loop ahead. + /// Return the number of iterations of this loop that produce + /// values in the specified constant range. Another way of + /// looking at this is that it returns the first iteration number + /// where the value is not in the condition, thus computing the + /// exit count. If the iteration count can't be computed, an + /// instance of SCEVCouldNotCompute is returned. + const SCEV *getNumIterationsInRange(const ConstantRange &Range, + ScalarEvolution &SE) const; + + /// Return an expression representing the value of this expression + /// one iteration of the loop ahead. const SCEVAddRecExpr *getPostIncExpr(ScalarEvolution &SE) const { return cast<SCEVAddRecExpr>(SE.getAddExpr(this, getStepRecurrence(SE))); } @@ -359,9 +350,7 @@ namespace llvm { } }; - //===--------------------------------------------------------------------===// - /// SCEVSMaxExpr - This class represents a signed maximum selection. - /// + /// This class represents a signed maximum selection. class SCEVSMaxExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; @@ -380,9 +369,7 @@ namespace llvm { }; - //===--------------------------------------------------------------------===// - /// SCEVUMaxExpr - This class represents an unsigned maximum selection. - /// + /// This class represents an unsigned maximum selection. class SCEVUMaxExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; @@ -400,11 +387,9 @@ namespace llvm { } }; - //===--------------------------------------------------------------------===// - /// SCEVUnknown - This means that we are dealing with an entirely unknown SCEV - /// value, and only represent it as its LLVM Value. This is the "bottom" - /// value for the analysis. - /// + /// This means that we are dealing with an entirely unknown SCEV + /// value, and only represent it as its LLVM Value. This is the + /// "bottom" value for the analysis. class SCEVUnknown final : public SCEV, private CallbackVH { friend class ScalarEvolution; @@ -412,13 +397,13 @@ namespace llvm { void deleted() override; void allUsesReplacedWith(Value *New) override; - /// SE - The parent ScalarEvolution value. This is used to update - /// the parent's maps when the value associated with a SCEVUnknown - /// is deleted or RAUW'd. + /// The parent ScalarEvolution value. This is used to update the + /// parent's maps when the value associated with a SCEVUnknown is + /// deleted or RAUW'd. ScalarEvolution *SE; - /// Next - The next pointer in the linked list of all - /// SCEVUnknown instances owned by a ScalarEvolution. + /// The next pointer in the linked list of all SCEVUnknown + /// instances owned by a ScalarEvolution. SCEVUnknown *Next; SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, @@ -428,15 +413,17 @@ namespace llvm { public: Value *getValue() const { return getValPtr(); } - /// isSizeOf, isAlignOf, isOffsetOf - Test whether this is a special - /// constant representing a type size, alignment, or field offset in - /// a target-independent manner, and hasn't happened to have been - /// folded with other operations into something unrecognizable. This - /// is mainly only useful for pretty-printing and other situations - /// where it isn't absolutely required for these to succeed. + /// @{ + /// Test whether this is a special constant representing a type + /// size, alignment, or field offset in a target-independent + /// manner, and hasn't happened to have been folded with other + /// operations into something unrecognizable. This is mainly only + /// useful for pretty-printing and other situations where it isn't + /// absolutely required for these to succeed. bool isSizeOf(Type *&AllocTy) const; bool isAlignOf(Type *&AllocTy) const; bool isOffsetOf(Type *&STy, Constant *&FieldNo) const; + /// @} Type *getType() const { return getValPtr()->getType(); } @@ -446,8 +433,8 @@ namespace llvm { } }; - /// SCEVVisitor - This class defines a simple visitor class that may be used - /// for various SCEV analysis purposes. + /// This class defines a simple visitor class that may be used for + /// various SCEV analysis purposes. template<typename SC, typename RetVal=void> struct SCEVVisitor { RetVal visit(const SCEV *S) { @@ -524,14 +511,10 @@ namespace llvm { case scMulExpr: case scSMaxExpr: case scUMaxExpr: - case scAddRecExpr: { - const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), - E = NAry->op_end(); I != E; ++I) { - push(*I); - } + case scAddRecExpr: + for (const auto *Op : cast<SCEVNAryExpr>(S)->operands()) + push(Op); break; - } case scUDivExpr: { const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); push(UDiv->getLHS()); @@ -697,13 +680,6 @@ namespace llvm { private: LoopToScevMapT ⤅ }; - -/// Applies the Map (Loop -> SCEV) to the given Scev. -static inline const SCEV *apply(const SCEV *Scev, LoopToScevMapT &Map, - ScalarEvolution &SE) { - return SCEVLoopAddRecRewriter::rewrite(Scev, Map, SE); -} - } #endif diff --git a/include/llvm/Analysis/ScopedNoAliasAA.h b/include/llvm/Analysis/ScopedNoAliasAA.h index 175561687157..87b85d4e6635 100644 --- a/include/llvm/Analysis/ScopedNoAliasAA.h +++ b/include/llvm/Analysis/ScopedNoAliasAA.h @@ -27,8 +27,7 @@ class ScopedNoAliasAAResult : public AAResultBase<ScopedNoAliasAAResult> { friend AAResultBase<ScopedNoAliasAAResult>; public: - explicit ScopedNoAliasAAResult(const TargetLibraryInfo &TLI) - : AAResultBase(TLI) {} + explicit ScopedNoAliasAAResult() : AAResultBase() {} ScopedNoAliasAAResult(ScopedNoAliasAAResult &&Arg) : AAResultBase(std::move(Arg)) {} @@ -48,20 +47,14 @@ private: }; /// Analysis pass providing a never-invalidated alias analysis result. -class ScopedNoAliasAA { +class ScopedNoAliasAA : public AnalysisInfoMixin<ScopedNoAliasAA> { + friend AnalysisInfoMixin<ScopedNoAliasAA>; + static char PassID; + public: typedef ScopedNoAliasAAResult Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - ScopedNoAliasAAResult run(Function &F, AnalysisManager<Function> *AM); - - /// \brief Provide access to a name for this pass for debugging purposes. - static StringRef name() { return "ScopedNoAliasAA"; } - -private: - static char PassID; + ScopedNoAliasAAResult run(Function &F, AnalysisManager<Function> &AM); }; /// Legacy wrapper pass to provide the ScopedNoAliasAAResult object. diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h index 2c7f5dd73547..d1a54171d8bd 100644 --- a/include/llvm/Analysis/SparsePropagation.h +++ b/include/llvm/Analysis/SparsePropagation.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/BasicBlock.h" #include <set> #include <vector> diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def index 7798e3c88248..b2a593d67dca 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.def +++ b/include/llvm/Analysis/TargetLibraryInfo.def @@ -195,6 +195,11 @@ TLI_DEFINE_STRING_INTERNAL("__memmove_chk") /// void *__memset_chk(void *s, char v, size_t n, size_t s1size); TLI_DEFINE_ENUM_INTERNAL(memset_chk) TLI_DEFINE_STRING_INTERNAL("__memset_chk") + +// int __nvvm_reflect(const char *) +TLI_DEFINE_ENUM_INTERNAL(nvvm_reflect) +TLI_DEFINE_STRING_INTERNAL("__nvvm_reflect") + /// double __sincospi_stret(double x); TLI_DEFINE_ENUM_INTERNAL(sincospi_stret) TLI_DEFINE_STRING_INTERNAL("__sincospi_stret") diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index 7becdf033dd2..7efa6f059707 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -11,15 +11,17 @@ #define LLVM_ANALYSIS_TARGETLIBRARYINFO_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { -/// VecDesc - Describes a possible vectorization of a function. +template <typename T> class ArrayRef; + +/// Describes a possible vectorization of a function. /// Function 'VectorFnName' is equivalent to 'ScalarFnName' vectorized /// by a factor 'VectorizationFactor'. struct VecDesc { @@ -27,7 +29,6 @@ struct VecDesc { const char *VectorFnName; unsigned VectorizationFactor; }; -class PreservedAnalyses; namespace LibFunc { enum Func { @@ -38,7 +39,7 @@ class PreservedAnalyses; }; } -/// \brief Implementation of the target library information. +/// Implementation of the target library information. /// /// This class constructs tables that hold the target library information and /// make it available. However, it is somewhat expensive to compute and only @@ -70,8 +71,13 @@ class TargetLibraryInfoImpl { /// on VectorFnName rather than ScalarFnName. std::vector<VecDesc> ScalarDescs; + /// Return true if the function type FTy is valid for the library function + /// F, regardless of whether the function is available. + bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc::Func F, + const DataLayout *DL) const; + public: - /// \brief List of known vector-functions libraries. + /// List of known vector-functions libraries. /// /// The vector-functions library defines, which functions are vectorizable /// and with which factor. The library can be specified by either frontend, @@ -92,24 +98,31 @@ public: TargetLibraryInfoImpl &operator=(const TargetLibraryInfoImpl &TLI); TargetLibraryInfoImpl &operator=(TargetLibraryInfoImpl &&TLI); - /// \brief Searches for a particular function name. + /// Searches for a particular function name. /// /// If it is one of the known library functions, return true and set F to the /// corresponding value. bool getLibFunc(StringRef funcName, LibFunc::Func &F) const; - /// \brief Forces a function to be marked as unavailable. + /// Searches for a particular function name, also checking that its type is + /// valid for the library function matching that name. + /// + /// If it is one of the known library functions, return true and set F to the + /// corresponding value. + bool getLibFunc(const Function &FDecl, LibFunc::Func &F) const; + + /// Forces a function to be marked as unavailable. void setUnavailable(LibFunc::Func F) { setState(F, Unavailable); } - /// \brief Forces a function to be marked as available. + /// Forces a function to be marked as available. void setAvailable(LibFunc::Func F) { setState(F, StandardName); } - /// \brief Forces a function to be marked as available and provide an - /// alternate name that must be used. + /// Forces a function to be marked as available and provide an alternate name + /// that must be used. void setAvailableWithName(LibFunc::Func F, StringRef Name) { if (StandardNames[F] != Name) { setState(F, CustomName); @@ -120,48 +133,47 @@ public: } } - /// \brief Disables all builtins. + /// Disables all builtins. /// /// This can be used for options like -fno-builtin. void disableAllFunctions(); - /// addVectorizableFunctions - Add a set of scalar -> vector mappings, - /// queryable via getVectorizedFunction and getScalarizedFunction. + /// Add a set of scalar -> vector mappings, queryable via + /// getVectorizedFunction and getScalarizedFunction. void addVectorizableFunctions(ArrayRef<VecDesc> Fns); /// Calls addVectorizableFunctions with a known preset of functions for the /// given vector library. void addVectorizableFunctionsFromVecLib(enum VectorLibrary VecLib); - /// isFunctionVectorizable - Return true if the function F has a - /// vector equivalent with vectorization factor VF. + /// Return true if the function F has a vector equivalent with vectorization + /// factor VF. bool isFunctionVectorizable(StringRef F, unsigned VF) const { return !getVectorizedFunction(F, VF).empty(); } - /// isFunctionVectorizable - Return true if the function F has a - /// vector equivalent with any vectorization factor. + /// Return true if the function F has a vector equivalent with any + /// vectorization factor. bool isFunctionVectorizable(StringRef F) const; - /// getVectorizedFunction - Return the name of the equivalent of - /// F, vectorized with factor VF. If no such mapping exists, - /// return the empty string. + /// Return the name of the equivalent of F, vectorized with factor VF. If no + /// such mapping exists, return the empty string. StringRef getVectorizedFunction(StringRef F, unsigned VF) const; - /// isFunctionScalarizable - Return true if the function F has a - /// scalar equivalent, and set VF to be the vectorization factor. + /// Return true if the function F has a scalar equivalent, and set VF to be + /// the vectorization factor. bool isFunctionScalarizable(StringRef F, unsigned &VF) const { return !getScalarizedFunction(F, VF).empty(); } - /// getScalarizedFunction - Return the name of the equivalent of - /// F, scalarized. If no such mapping exists, return the empty string. + /// Return the name of the equivalent of F, scalarized. If no such mapping + /// exists, return the empty string. /// /// Set VF to the vectorization factor. StringRef getScalarizedFunction(StringRef F, unsigned &VF) const; }; -/// \brief Provides information about what library functions are available for +/// Provides information about what library functions are available for /// the current target. /// /// This both allows optimizations to handle them specially and frontends to @@ -187,7 +199,7 @@ public: return *this; } - /// \brief Searches for a particular function name. + /// Searches for a particular function name. /// /// If it is one of the known library functions, return true and set F to the /// corresponding value. @@ -195,7 +207,11 @@ public: return Impl->getLibFunc(funcName, F); } - /// \brief Tests whether a library function is available. + bool getLibFunc(const Function &FDecl, LibFunc::Func &F) const { + return Impl->getLibFunc(FDecl, F); + } + + /// Tests whether a library function is available. bool has(LibFunc::Func F) const { return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable; } @@ -209,8 +225,8 @@ public: return Impl->getVectorizedFunction(F, VF); } - /// \brief Tests if the function is both available and a candidate for - /// optimized code generation. + /// Tests if the function is both available and a candidate for optimized code + /// generation. bool hasOptimizedCodeGen(LibFunc::Func F) const { if (Impl->getState(F) == TargetLibraryInfoImpl::Unavailable) return false; @@ -251,31 +267,28 @@ public: return Impl->CustomNames.find(F)->second; } - /// \brief Handle invalidation from the pass manager. + /// Handle invalidation from the pass manager. /// /// If we try to invalidate this info, just return false. It cannot become /// invalid even if the module changes. bool invalidate(Module &, const PreservedAnalyses &) { return false; } }; -/// \brief Analysis pass providing the \c TargetLibraryInfo. +/// Analysis pass providing the \c TargetLibraryInfo. /// /// Note that this pass's result cannot be invalidated, it is immutable for the /// life of the module. -class TargetLibraryAnalysis { +class TargetLibraryAnalysis : public AnalysisInfoMixin<TargetLibraryAnalysis> { public: typedef TargetLibraryInfo Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - /// \brief Default construct the library analysis. + /// Default construct the library analysis. /// /// This will use the module's triple to construct the library info for that /// module. TargetLibraryAnalysis() {} - /// \brief Construct a library analysis with preset info. + /// Construct a library analysis with preset info. /// /// This will directly copy the preset info into the result without /// consulting the module's triple. @@ -291,20 +304,18 @@ public: return *this; } - TargetLibraryInfo run(Module &M); - TargetLibraryInfo run(Function &F); - - /// \brief Provide access to a name for this pass for debugging purposes. - static StringRef name() { return "TargetLibraryAnalysis"; } + TargetLibraryInfo run(Module &M, ModuleAnalysisManager &); + TargetLibraryInfo run(Function &F, FunctionAnalysisManager &); private: + friend AnalysisInfoMixin<TargetLibraryAnalysis>; static char PassID; Optional<TargetLibraryInfoImpl> PresetInfoImpl; StringMap<std::unique_ptr<TargetLibraryInfoImpl>> Impls; - TargetLibraryInfoImpl &lookupInfoImpl(Triple T); + TargetLibraryInfoImpl &lookupInfoImpl(const Triple &T); }; class TargetLibraryInfoWrapperPass : public ImmutablePass { diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 3913cc3f107c..7570d22a803c 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -25,6 +25,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/DataTypes.h" #include <functional> @@ -34,7 +36,6 @@ namespace llvm { class Function; class GlobalValue; class Loop; -class PreservedAnalyses; class Type; class User; class Value; @@ -165,6 +166,14 @@ public: /// This overload allows specifying a set of candidate argument values. int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const; + /// \returns A value by which our inlining threshold should be multiplied. + /// This is primarily used to bump up the inlining threshold wholesale on + /// targets where calls are unusually expensive. + /// + /// TODO: This is a rather blunt instrument. Perhaps altering the costs of + /// individual classes of instructions would be better. + unsigned getInliningThresholdMultiplier() const; + /// \brief Estimate the cost of an intrinsic when lowered. /// /// Mirrors the \c getCallCost method but uses an intrinsic identifier. @@ -260,6 +269,10 @@ public: // (set to UINT_MAX to disable). This does not apply in cases where the // loop is being fully unrolled. unsigned MaxCount; + /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but + /// applies even if full unrolling is selected. This allows a target to fall + /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount. + unsigned FullUnrollMaxCount; /// Allow partial unrolling (unrolling of loops to expand the size of the /// loop body, not only to eliminate small constant-trip-count loops). bool Partial; @@ -267,9 +280,14 @@ public: /// loop body even when the number of loop iterations is not known at /// compile time). bool Runtime; + /// Allow generation of a loop remainder (extra iterations after unroll). + bool AllowRemainder; /// Allow emitting expensive instructions (such as divisions) when computing /// the trip count of a loop for runtime unrolling. bool AllowExpensiveTripCount; + /// Apply loop unroll on any kind of loop + /// (mainly to loops that fail runtime unrolling). + bool Force; }; /// \brief Get target-customized preferences for the generic loop unrolling @@ -313,8 +331,7 @@ public: unsigned AddrSpace = 0) const; /// \brief Return true if the target supports masked load/store - /// AVX2 and AVX-512 targets allow masks for consecutive load and store for - /// 32 and 64 bit elements. + /// AVX2 and AVX-512 targets allow masks for consecutive load and store bool isLegalMaskedStore(Type *DataType) const; bool isLegalMaskedLoad(Type *DataType) const; @@ -362,6 +379,20 @@ public: /// \brief Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; + /// \brief Indicate that it is potentially unsafe to automatically vectorize + /// floating-point operations because the semantics of vector and scalar + /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math + /// does not support IEEE-754 denormal numbers, while depending on the + /// platform, scalar floating-point math does. + /// This applies to floating-point math operations and calls, not memory + /// operations, shuffles, or casts. + bool isFPVectorizationPotentiallyUnsafe() const; + + /// \brief Determine if the target supports unaligned memory accesses. + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0, + unsigned Alignment = 1, + bool *Fast = nullptr) const; + /// \brief Return hardware support for population count. PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; @@ -383,6 +414,16 @@ public: Type *Ty) const; int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const; + + /// \brief Return the expected cost for the given integer when optimising + /// for size. This is different than the other integer immediate cost + /// functions in that it is subtarget agnostic. This is useful when you e.g. + /// target one ISA such as Aarch32 but smaller encodings could be possible + /// with another such as Thumb. This return value is used as a penalty when + /// the total costs for a constant is calculated (the bigger the cost, the + /// more beneficial constant hoisting is). + int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) const; /// @} /// \name Vector Target Information @@ -416,6 +457,27 @@ public: /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; + /// \return The bitwidth of the largest vector type that should be used to + /// load/store in the given address space. + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; + + /// \return The size of a cache line in bytes. + unsigned getCacheLineSize() const; + + /// \return How much before a load we should place the prefetch instruction. + /// This is currently measured in number of instructions. + unsigned getPrefetchDistance() const; + + /// \return Some HW prefetchers can handle accesses up to a certain constant + /// stride. This is the minimum stride in bytes where it makes sense to start + /// adding SW prefetches. The default is 1, i.e. prefetch with any stride. + unsigned getMinPrefetchStride() const; + + /// \return The maximum number of iterations to prefetch ahead. If the + /// required number of iterations is more than this number, no prefetching is + /// performed. + unsigned getMaxPrefetchIterationsAhead() const; + /// \return The maximum interleave factor that any transform should try to /// perform for this target. This number depends on the level of parallelism /// and the number of execution units in the CPU. @@ -438,6 +500,11 @@ public: /// zext, etc. int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; + /// \return The expected cost of a sign- or zero-extended vector extract. Use + /// -1 to indicate that there is no information about the index value. + int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, + unsigned Index = -1) const; + /// \return The expected cost of control-flow related instructions such as /// Phi, Ret, Br. int getCFInstrCost(unsigned Opcode) const; @@ -497,11 +564,11 @@ public: /// \returns The cost of Intrinsic instructions. Types analysis only. int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) const; + ArrayRef<Type *> Tys, FastMathFlags FMF) const; /// \returns The cost of Intrinsic instructions. Analyses the real arguments. int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) const; + ArrayRef<Value *> Args, FastMathFlags FMF) const; /// \returns The cost of Call instructions. int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const; @@ -569,6 +636,7 @@ public: virtual int getCallCost(const Function *F, int NumArgs) = 0; virtual int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) = 0; + virtual unsigned getInliningThresholdMultiplier() = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, @@ -599,9 +667,16 @@ public: virtual bool shouldBuildLookupTables() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual bool enableInterleavedAccessVectorization() = 0; + virtual bool isFPVectorizationPotentiallyUnsafe() = 0; + virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) = 0; virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; virtual int getFPOpCost(Type *Ty) = 0; + virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) = 0; virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) = 0; @@ -609,6 +684,11 @@ public: Type *Ty) = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; virtual unsigned getRegisterBitWidth(bool Vector) = 0; + virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) = 0; + virtual unsigned getCacheLineSize() = 0; + virtual unsigned getPrefetchDistance() = 0; + virtual unsigned getMinPrefetchStride() = 0; + virtual unsigned getMaxPrefetchIterationsAhead() = 0; virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, @@ -618,6 +698,8 @@ public: virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) = 0; virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0; + virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, + VectorType *VecTy, unsigned Index) = 0; virtual int getCFInstrCost(unsigned Opcode) = 0; virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) = 0; @@ -639,9 +721,11 @@ public: virtual int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) = 0; + ArrayRef<Type *> Tys, + FastMathFlags FMF) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) = 0; + ArrayRef<Value *> Args, + FastMathFlags FMF) = 0; virtual int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) = 0; virtual unsigned getNumberOfParts(Type *Tp) = 0; @@ -684,6 +768,9 @@ public: ArrayRef<const Value *> Arguments) override { return Impl.getCallCost(F, Arguments); } + unsigned getInliningThresholdMultiplier() override { + return Impl.getInliningThresholdMultiplier(); + } int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) override { return Impl.getIntrinsicCost(IID, RetTy, ParamTys); @@ -751,6 +838,14 @@ public: bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); } + bool isFPVectorizationPotentiallyUnsafe() override { + return Impl.isFPVectorizationPotentiallyUnsafe(); + } + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace, + unsigned Alignment, bool *Fast) override { + return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace, + Alignment, Fast); + } PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { return Impl.getPopcntSupport(IntTyWidthInBit); } @@ -758,6 +853,10 @@ public: int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } + int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) override { + return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); + } int getIntImmCost(const APInt &Imm, Type *Ty) override { return Impl.getIntImmCost(Imm, Ty); } @@ -775,6 +874,21 @@ public: unsigned getRegisterBitWidth(bool Vector) override { return Impl.getRegisterBitWidth(Vector); } + + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) override { + return Impl.getLoadStoreVecRegBitWidth(AddrSpace); + } + + unsigned getCacheLineSize() override { + return Impl.getCacheLineSize(); + } + unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); } + unsigned getMinPrefetchStride() override { + return Impl.getMinPrefetchStride(); + } + unsigned getMaxPrefetchIterationsAhead() override { + return Impl.getMaxPrefetchIterationsAhead(); + } unsigned getMaxInterleaveFactor(unsigned VF) override { return Impl.getMaxInterleaveFactor(VF); } @@ -793,6 +907,10 @@ public: int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override { return Impl.getCastInstrCost(Opcode, Dst, Src); } + int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, + unsigned Index) override { + return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); + } int getCFInstrCost(unsigned Opcode) override { return Impl.getCFInstrCost(Opcode); } @@ -826,13 +944,14 @@ public: bool IsPairwiseForm) override { return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm); } - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Tys); + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, + FastMathFlags FMF) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF); } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Args); + ArrayRef<Value *> Args, + FastMathFlags FMF) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF); } int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) override { @@ -876,16 +995,10 @@ TargetTransformInfo::TargetTransformInfo(T Impl) /// is done in a subtarget specific way and LLVM supports compiling different /// functions targeting different subtargets in order to support runtime /// dispatch according to the observed subtarget. -class TargetIRAnalysis { +class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> { public: typedef TargetTransformInfo Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - /// \brief Provide access to a name for this pass for debugging purposes. - static StringRef name() { return "TargetIRAnalysis"; } - /// \brief Default construct a target IR analysis. /// /// This will use the module's datalayout to construct a baseline @@ -912,9 +1025,10 @@ public: return *this; } - Result run(const Function &F); + Result run(const Function &F, AnalysisManager<Function> &); private: + friend AnalysisInfoMixin<TargetIRAnalysis>; static char PassID; /// \brief The callback used to produce a result. diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 43815234051e..a97624bc2ab0 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -102,8 +102,8 @@ public: } } - unsigned getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands) { + int getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands) { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -128,6 +128,8 @@ public: return TTI::TCC_Basic * (NumArgs + 1); } + unsigned getInliningThresholdMultiplier() { return 1; } + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) { switch (IID) { @@ -240,6 +242,13 @@ public: bool enableInterleavedAccessVectorization() { return false; } + bool isFPVectorizationPotentiallyUnsafe() { return false; } + + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) { return false; } + TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { return TTI::PSK_Software; } @@ -248,6 +257,11 @@ public: unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } + int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) { + return 0; + } + unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, @@ -264,6 +278,16 @@ public: unsigned getRegisterBitWidth(bool Vector) { return 32; } + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) { return 128; } + + unsigned getCacheLineSize() { return 0; } + + unsigned getPrefetchDistance() { return 0; } + + unsigned getMinPrefetchStride() { return 1; } + + unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } + unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, @@ -281,6 +305,11 @@ public: unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { return 1; } + unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, + VectorType *VecTy, unsigned Index) { + return 1; + } + unsigned getCFInstrCost(unsigned Opcode) { return 1; } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { @@ -316,11 +345,11 @@ public: } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) { + ArrayRef<Type *> Tys, FastMathFlags FMF) { return 1; } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) { + ArrayRef<Value *> Args, FastMathFlags FMF) { return 1; } @@ -404,8 +433,8 @@ public: using BaseT::getGEPCost; - unsigned getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands) { + int getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands) { const GlobalValue *BaseGV = nullptr; if (Ptr != nullptr) { // TODO: will remove this when pointers have an opaque type. @@ -421,7 +450,7 @@ public: // Assumes the address space is 0 when Ptr is nullptr. unsigned AS = (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); - auto GTI = gep_type_begin(PointerType::get(PointeeType, AS), Operands); + auto GTI = gep_type_begin(PointeeType, AS, Operands); for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { // We assume that the cost of Scalar GEP with constant index and the // cost of Vector GEP with splat constant index are the same. diff --git a/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/include/llvm/Analysis/TypeBasedAliasAnalysis.h index 7b44ac73f1fa..229b0f97b983 100644 --- a/include/llvm/Analysis/TypeBasedAliasAnalysis.h +++ b/include/llvm/Analysis/TypeBasedAliasAnalysis.h @@ -27,8 +27,7 @@ class TypeBasedAAResult : public AAResultBase<TypeBasedAAResult> { friend AAResultBase<TypeBasedAAResult>; public: - explicit TypeBasedAAResult(const TargetLibraryInfo &TLI) - : AAResultBase(TLI) {} + explicit TypeBasedAAResult() {} TypeBasedAAResult(TypeBasedAAResult &&Arg) : AAResultBase(std::move(Arg)) {} /// Handle invalidation events from the new pass manager. @@ -49,20 +48,14 @@ private: }; /// Analysis pass providing a never-invalidated alias analysis result. -class TypeBasedAA { +class TypeBasedAA : public AnalysisInfoMixin<TypeBasedAA> { + friend AnalysisInfoMixin<TypeBasedAA>; + static char PassID; + public: typedef TypeBasedAAResult Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - TypeBasedAAResult run(Function &F, AnalysisManager<Function> *AM); - - /// \brief Provide access to a name for this pass for debugging purposes. - static StringRef name() { return "TypeBasedAA"; } - -private: - static char PassID; + TypeBasedAAResult run(Function &F, AnalysisManager<Function> &AM); }; /// Legacy wrapper pass to provide the TypeBasedAAResult object. diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h new file mode 100644 index 000000000000..c3f688f5d7f1 --- /dev/null +++ b/include/llvm/Analysis/TypeMetadataUtils.h @@ -0,0 +1,45 @@ +//===- TypeMetadataUtils.h - Utilities related to type metadata --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains functions that make it easier to manipulate type metadata +// for devirtualization. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TYPEMETADATAUTILS_H +#define LLVM_ANALYSIS_TYPEMETADATAUTILS_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/CallSite.h" + +namespace llvm { + +/// A call site that could be devirtualized. +struct DevirtCallSite { + /// The offset from the address point to the virtual function. + uint64_t Offset; + /// The call site itself. + CallSite CS; +}; + +/// Given a call to the intrinsic @llvm.type.test, find all devirtualizable +/// call sites based on the call and return them in DevirtCalls. +void findDevirtualizableCallsForTypeTest( + SmallVectorImpl<DevirtCallSite> &DevirtCalls, + SmallVectorImpl<CallInst *> &Assumes, CallInst *CI); + +/// Given a call to the intrinsic @llvm.type.checked.load, find all +/// devirtualizable call sites based on the call and return them in DevirtCalls. +void findDevirtualizableCallsForTypeCheckedLoad( + SmallVectorImpl<DevirtCallSite> &DevirtCalls, + SmallVectorImpl<Instruction *> &LoadedPtrs, + SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, CallInst *CI); +} + +#endif diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index 8e0291068472..2c6221d4933f 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -15,17 +15,20 @@ #ifndef LLVM_ANALYSIS_VALUETRACKING_H #define LLVM_ANALYSIS_VALUETRACKING_H -#include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/DataTypes.h" namespace llvm { +template <typename T> class ArrayRef; class APInt; class AddOperator; class AssumptionCache; class DataLayout; class DominatorTree; + class GEPOperator; class Instruction; class Loop; class LoopInfo; @@ -34,6 +37,10 @@ namespace llvm { class TargetLibraryInfo; class Value; + namespace Intrinsic { + enum ID : unsigned; + } + /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. /// @@ -58,29 +65,29 @@ namespace llvm { const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); - /// ComputeSignBit - Determine whether the sign bit is known to be zero or - /// one. Convenience wrapper around computeKnownBits. + /// Determine whether the sign bit is known to be zero or one. Convenience + /// wrapper around computeKnownBits. void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); - /// isKnownToBeAPowerOfTwo - Return true if the given value is known to have - /// exactly one bit set when defined. For vectors return true if every - /// element is known to be a power of two when defined. Supports values with - /// integer or pointer type and vectors of integers. If 'OrZero' is set then - /// return true if the given value is either a power of two or zero. + /// Return true if the given value is known to have exactly one bit set when + /// defined. For vectors return true if every element is known to be a power + /// of two when defined. Supports values with integer or pointer type and + /// vectors of integers. If 'OrZero' is set, then return true if the given + /// value is either a power of two or zero. bool isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero = false, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); - /// isKnownNonZero - Return true if the given value is known to be non-zero - /// when defined. For vectors return true if every element is known to be - /// non-zero when defined. Supports values with integer or pointer type and - /// vectors of integers. + /// Return true if the given value is known to be non-zero when defined. For + /// vectors, return true if every element is known to be non-zero when + /// defined. Supports values with integer or pointer type and vectors of + /// integers. bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, @@ -92,16 +99,30 @@ namespace llvm { const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); - /// isKnownNonEqual - Return true if the given values are known to be - /// non-equal when defined. Supports scalar integer types only. + /// Returns true if the given value is known be positive (i.e. non-negative + /// and non-zero). + bool isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + + /// Returns true if the given value is known be negative (i.e. non-positive + /// and non-zero). + bool isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + + /// Return true if the given values are known to be non-equal when defined. + /// Supports scalar integer types only. bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); - /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use - /// this predicate to simplify operations downstream. Mask is known to be - /// zero for bits that V cannot have. + /// Return true if 'V & Mask' is known to be zero. We use this predicate to + /// simplify operations downstream. Mask is known to be zero for bits that V + /// cannot have. /// /// This function is defined on values with integer type, values with pointer /// type, and vectors of integers. In the case @@ -113,48 +134,52 @@ namespace llvm { const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); - /// ComputeNumSignBits - Return the number of times the sign bit of the - /// register is replicated into the other bits. We know that at least 1 bit - /// is always equal to the sign bit (itself), but other cases can give us - /// information. For example, immediately after an "ashr X, 2", we know that - /// the top 3 bits are all equal to each other, so we return 3. - /// - /// 'Op' must have a scalar integer type. - /// + /// Return the number of times the sign bit of the register is replicated into + /// the other bits. We know that at least 1 bit is always equal to the sign + /// bit (itself), but other cases can give us information. For example, + /// immediately after an "ashr X, 2", we know that the top 3 bits are all + /// equal to each other, so we return 3. For vectors, return the number of + /// sign bits for the vector element with the mininum number of known sign + /// bits. unsigned ComputeNumSignBits(Value *Op, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); - /// ComputeMultiple - This function computes the integer multiple of Base that - /// equals V. If successful, it returns true and returns the multiple in - /// Multiple. If unsuccessful, it returns false. Also, if V can be - /// simplified to an integer, then the simplified V is returned in Val. Look - /// through sext only if LookThroughSExt=true. + /// This function computes the integer multiple of Base that equals V. If + /// successful, it returns true and returns the multiple in Multiple. If + /// unsuccessful, it returns false. Also, if V can be simplified to an + /// integer, then the simplified V is returned in Val. Look through sext only + /// if LookThroughSExt=true. bool ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, bool LookThroughSExt = false, unsigned Depth = 0); - /// CannotBeNegativeZero - Return true if we can prove that the specified FP - /// value is never equal to -0.0. - /// - bool CannotBeNegativeZero(const Value *V, unsigned Depth = 0); - - /// CannotBeOrderedLessThanZero - Return true if we can prove that the - /// specified FP value is either a NaN or never less than 0.0. - /// - bool CannotBeOrderedLessThanZero(const Value *V, unsigned Depth = 0); - - /// isBytewiseValue - If the specified value can be set by repeating the same - /// byte in memory, return the i8 value that it is represented with. This is - /// true for all i8 values obviously, but is also true for i32 0, i32 -1, - /// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated - /// byte store (e.g. i16 0x1234), return null. + /// Map a call instruction to an intrinsic ID. Libcalls which have equivalent + /// intrinsics are treated as-if they were intrinsics. + Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, + const TargetLibraryInfo *TLI); + + /// Return true if we can prove that the specified FP value is never equal to + /// -0.0. + bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + + /// Return true if we can prove that the specified FP value is either a NaN or + /// never less than 0.0. + bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + + /// If the specified value can be set by repeating the same byte in memory, + /// return the i8 value that it is represented with. This is true for all i8 + /// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double + /// 0.0 etc. If the value can't be handled with a repeated byte store (e.g. + /// i16 0x1234), return null. Value *isBytewiseValue(Value *V); - /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if - /// the scalar value indexed is already around as a register, for example if - /// it were inserted directly into the aggregrate. + /// Given an aggregrate and an sequence of indices, see if the scalar value + /// indexed is already around as a register, for example if it were inserted + /// directly into the aggregrate. /// /// If InsertBefore is not null, this function will duplicate (modified) /// insertvalues when a part of a nested struct is extracted. @@ -162,9 +187,8 @@ namespace llvm { ArrayRef<unsigned> idx_range, Instruction *InsertBefore = nullptr); - /// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if - /// it can be expressed as a base pointer plus a constant offset. Return the - /// base and offset to the caller. + /// Analyze the specified pointer to see if it can be expressed as a base + /// pointer plus a constant offset. Return the base and offset to the caller. Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL); static inline const Value * @@ -174,24 +198,28 @@ namespace llvm { DL); } - /// getConstantStringInfo - This function computes the length of a - /// null-terminated C string pointed to by V. If successful, it returns true - /// and returns the string in Str. If unsuccessful, it returns false. This - /// does not include the trailing nul character by default. If TrimAtNul is - /// set to false, then this returns any trailing nul characters as well as any - /// other characters that come after it. + /// Returns true if the GEP is based on a pointer to a string (array of i8), + /// and is indexing into this string. + bool isGEPBasedOnPointerToString(const GEPOperator *GEP); + + /// This function computes the length of a null-terminated C string pointed to + /// by V. If successful, it returns true and returns the string in Str. If + /// unsuccessful, it returns false. This does not include the trailing null + /// character by default. If TrimAtNul is set to false, then this returns any + /// trailing null characters as well as any other characters that come after + /// it. bool getConstantStringInfo(const Value *V, StringRef &Str, uint64_t Offset = 0, bool TrimAtNul = true); - /// GetStringLength - If we can compute the length of the string pointed to by - /// the specified pointer, return 'len+1'. If we can't, return 0. + /// If we can compute the length of the string pointed to by the specified + /// pointer, return 'len+1'. If we can't, return 0. uint64_t GetStringLength(Value *V); - /// GetUnderlyingObject - This method strips off any GEP address adjustments - /// and pointer casts from the specified value, returning the original object - /// being addressed. Note that the returned value has pointer type if the - /// specified value does. If the MaxLookup value is non-zero, it limits the - /// number of instructions to be stripped off. + /// This method strips off any GEP address adjustments and pointer casts from + /// the specified value, returning the original object being addressed. Note + /// that the returned value has pointer type if the specified value does. If + /// the MaxLookup value is non-zero, it limits the number of instructions to + /// be stripped off. Value *GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup = 6); static inline const Value *GetUnderlyingObject(const Value *V, @@ -232,32 +260,11 @@ namespace llvm { const DataLayout &DL, LoopInfo *LI = nullptr, unsigned MaxLookup = 6); - /// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer - /// are lifetime markers. + /// Return true if the only users of this pointer are lifetime markers. bool onlyUsedByLifetimeMarkers(const Value *V); - /// isDereferenceablePointer - Return true if this is always a dereferenceable - /// pointer. If the context instruction is specified perform context-sensitive - /// analysis and return true if the pointer is dereferenceable at the - /// specified instruction. - bool isDereferenceablePointer(const Value *V, const DataLayout &DL, - const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr, - const TargetLibraryInfo *TLI = nullptr); - - /// Returns true if V is always a dereferenceable pointer with alignment - /// greater or equal than requested. If the context instruction is specified - /// performs context-sensitive analysis and returns true if the pointer is - /// dereferenceable at the specified instruction. - bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, - const DataLayout &DL, - const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr, - const TargetLibraryInfo *TLI = nullptr); - - /// isSafeToSpeculativelyExecute - Return true if the instruction does not - /// have any effects besides calculating the result and does not have - /// undefined behavior. + /// Return true if the instruction does not have any effects besides + /// calculating the result and does not have undefined behavior. /// /// This method never returns true for an instruction that returns true for /// mayHaveSideEffects; however, this method also does some other checks in @@ -281,8 +288,7 @@ namespace llvm { /// for such instructions, moving them may change the resulting value. bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr, - const TargetLibraryInfo *TLI = nullptr); + const DominatorTree *DT = nullptr); /// Returns true if the result or effects of the given instructions \p I /// depend on or influence global memory. @@ -294,19 +300,18 @@ namespace llvm { /// operands are not memory dependent. bool mayBeMemoryDependent(const Instruction &I); - /// isKnownNonNull - Return true if this pointer couldn't possibly be null by - /// its definition. This returns true for allocas, non-extern-weak globals - /// and byval arguments. - bool isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI = nullptr); + /// Return true if this pointer couldn't possibly be null by its definition. + /// This returns true for allocas, non-extern-weak globals, and byval + /// arguments. + bool isKnownNonNull(const Value *V); - /// isKnownNonNullAt - Return true if this pointer couldn't possibly be null. - /// If the context instruction is specified perform context-sensitive analysis - /// and return true if the pointer couldn't possibly be null at the specified + /// Return true if this pointer couldn't possibly be null. If the context + /// instruction is specified, perform context-sensitive analysis and return + /// true if the pointer couldn't possibly be null at the specified /// instruction. bool isKnownNonNullAt(const Value *V, const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr, - const TargetLibraryInfo *TLI = nullptr); + const DominatorTree *DT = nullptr); /// Return true if it is valid to use the assumptions provided by an /// assume intrinsic, I, at the point in the control-flow identified by the @@ -337,6 +342,11 @@ namespace llvm { const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); + /// Returns true if the arithmetic part of the \p II 's result is + /// used only along the paths control dependent on the computation + /// not overflowing, \p II being an <op>.with.overflow intrinsic. + bool isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT); + /// Return true if this function can prove that the instruction I will /// always transfer execution to one of its successors (including the next /// instruction that follows within a basic block). E.g. this is not @@ -441,18 +451,20 @@ namespace llvm { /// E.g. if RangeMD is !{i32 0, i32 10, i32 15, i32 20} then return [0, 20). ConstantRange getConstantRangeFromMetadata(MDNode &RangeMD); - /// Return true if RHS is known to be implied by LHS. A & B must be i1 - /// (boolean) values or a vector of such values. Note that the truth table for - /// implication is the same as <=u on i1 values (but not <=s!). The truth - /// table for both is: + /// Return true if RHS is known to be implied true by LHS. Return false if + /// RHS is known to be implied false by LHS. Otherwise, return None if no + /// implication can be made. + /// A & B must be i1 (boolean) values or a vector of such values. Note that + /// the truth table for implication is the same as <=u on i1 values (but not + /// <=s!). The truth table for both is: /// | T | F (B) /// T | T | F /// F | T | T /// (A) - bool isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL, - unsigned Depth = 0, AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + Optional<bool> isImpliedCondition( + Value *LHS, Value *RHS, const DataLayout &DL, bool InvertAPred = false, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); } // end namespace llvm #endif diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h index 531803adf5e4..eaa068b89c77 100644 --- a/include/llvm/Analysis/VectorUtils.h +++ b/include/llvm/Analysis/VectorUtils.h @@ -14,15 +14,13 @@ #ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H #define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" namespace llvm { -struct DemandedBits; +template <typename T> class ArrayRef; +class DemandedBits; class GetElementPtrInst; class Loop; class ScalarEvolution; @@ -30,6 +28,10 @@ class TargetTransformInfo; class Type; class Value; +namespace Intrinsic { +enum ID : unsigned; +} + /// \brief Identify if the intrinsic is trivially vectorizable. /// This method returns true if the intrinsic's argument types are all /// scalars for the scalar form of the intrinsic and all vectors for @@ -40,26 +42,11 @@ bool isTriviallyVectorizable(Intrinsic::ID ID); /// ctlz,cttz and powi special intrinsics whose argument is scalar. bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx); -/// \brief Identify if call has a unary float signature -/// It returns input intrinsic ID if call has a single argument, -/// argument type and call instruction type should be floating -/// point type and call should only reads memory. -/// else return not_intrinsic. -Intrinsic::ID checkUnaryFloatSignature(const CallInst &I, - Intrinsic::ID ValidIntrinsicID); - -/// \brief Identify if call has a binary float signature -/// It returns input intrinsic ID if call has two arguments, -/// arguments type and call instruction type should be floating -/// point type and call should only reads memory. -/// else return not_intrinsic. -Intrinsic::ID checkBinaryFloatSignature(const CallInst &I, - Intrinsic::ID ValidIntrinsicID); - /// \brief Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns /// its intrinsic ID, in case it does not found it return not_intrinsic. -Intrinsic::ID getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI); +Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, + const TargetLibraryInfo *TLI); /// \brief Find the operand of the GEP that should be checked for consecutive /// stores. This ignores trailing indices that have no effect on the final @@ -126,7 +113,16 @@ MapVector<Instruction*, uint64_t> computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr); - + +/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, +/// MD_nontemporal]. For K in Kinds, we get the MDNode for K from each of the +/// elements of VL, compute their "intersection" (i.e., the most generic +/// metadata value that covers all of the individual values), and set I's +/// metadata for M equal to the intersection value. +/// +/// This function always sets a (possibly null) value for each K in Kinds. +Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL); + } // llvm namespace #endif |