diff options
Diffstat (limited to 'llvm/include/llvm/Analysis')
65 files changed, 2316 insertions, 1267 deletions
diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index 9f7461243f35..7fec0feb09d5 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -78,21 +78,64 @@ class Value; /// /// See docs/AliasAnalysis.html for more information on the specific meanings /// of these values. -enum AliasResult : uint8_t { - /// The two locations do not alias at all. - /// - /// This value is arranged to convert to false, while all other values - /// convert to true. This allows a boolean context to convert the result to - /// a binary flag indicating whether there is the possibility of aliasing. - NoAlias = 0, - /// The two locations may or may not alias. This is the least precise result. - MayAlias, - /// The two locations alias, but only due to a partial overlap. - PartialAlias, - /// The two locations precisely alias each other. - MustAlias, +class AliasResult { +private: + static const int OffsetBits = 23; + static const int AliasBits = 8; + static_assert(AliasBits + 1 + OffsetBits <= 32, + "AliasResult size is intended to be 4 bytes!"); + + unsigned int Alias : AliasBits; + unsigned int HasOffset : 1; + signed int Offset : OffsetBits; + +public: + enum Kind : uint8_t { + /// The two locations do not alias at all. + /// + /// This value is arranged to convert to false, while all other values + /// convert to true. This allows a boolean context to convert the result to + /// a binary flag indicating whether there is the possibility of aliasing. + NoAlias = 0, + /// The two locations may or may not alias. This is the least precise + /// result. + MayAlias, + /// The two locations alias, but only due to a partial overlap. + PartialAlias, + /// The two locations precisely alias each other. + MustAlias, + }; + static_assert(MustAlias < (1 << AliasBits), + "Not enough bit field size for the enum!"); + + explicit AliasResult() = delete; + constexpr AliasResult(const Kind &Alias) + : Alias(Alias), HasOffset(false), Offset(0) {} + + operator Kind() const { return static_cast<Kind>(Alias); } + + constexpr bool hasOffset() const { return HasOffset; } + constexpr int32_t getOffset() const { + assert(HasOffset && "No offset!"); + return Offset; + } + void setOffset(int32_t NewOffset) { + if (isInt<OffsetBits>(NewOffset)) { + HasOffset = true; + Offset = NewOffset; + } + } + + /// Helper for processing AliasResult for swapped memory location pairs. + void swap(bool DoSwap = true) { + if (DoSwap && hasOffset()) + setOffset(-getOffset()); + } }; +static_assert(sizeof(AliasResult) == 4, + "AliasResult size is intended to be 4 bytes!"); + /// << operator for AliasResult. raw_ostream &operator<<(raw_ostream &OS, AliasResult AR); @@ -335,6 +378,31 @@ createModRefInfo(const FunctionModRefBehavior FMRB) { return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef)); } +/// Reduced version of MemoryLocation that only stores a pointer and size. +/// Used for caching AATags independent BasicAA results. +struct AACacheLoc { + const Value *Ptr; + LocationSize Size; +}; + +template <> struct DenseMapInfo<AACacheLoc> { + static inline AACacheLoc getEmptyKey() { + return {DenseMapInfo<const Value *>::getEmptyKey(), + DenseMapInfo<LocationSize>::getEmptyKey()}; + } + static inline AACacheLoc getTombstoneKey() { + return {DenseMapInfo<const Value *>::getTombstoneKey(), + DenseMapInfo<LocationSize>::getTombstoneKey()}; + } + static unsigned getHashValue(const AACacheLoc &Val) { + return DenseMapInfo<const Value *>::getHashValue(Val.Ptr) ^ + DenseMapInfo<LocationSize>::getHashValue(Val.Size); + } + static bool isEqual(const AACacheLoc &LHS, const AACacheLoc &RHS) { + return LHS.Ptr == RHS.Ptr && LHS.Size == RHS.Size; + } +}; + /// This class stores info we want to provide to or retain within an alias /// query. By default, the root query is stateless and starts with a freshly /// constructed info object. Specific alias analyses can use this query info to @@ -345,7 +413,7 @@ createModRefInfo(const FunctionModRefBehavior FMRB) { /// caches used by BasicAA, but can further be extended to fit other AA needs. class AAQueryInfo { public: - using LocPair = std::pair<MemoryLocation, MemoryLocation>; + using LocPair = std::pair<AACacheLoc, AACacheLoc>; struct CacheEntry { AliasResult Result; /// Number of times a NoAlias assumption has been used. @@ -360,6 +428,9 @@ public: using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>; IsCapturedCacheT IsCapturedCache; + /// Query depth used to distinguish recursive queries. + unsigned Depth = 0; + /// How many active NoAlias assumption uses there are. int NumAssumptionUses = 0; @@ -369,6 +440,15 @@ public: SmallVector<AAQueryInfo::LocPair, 4> AssumptionBasedResults; AAQueryInfo() : AliasCache(), IsCapturedCache() {} + + /// Create a new AAQueryInfo based on this one, but with the cache cleared. + /// This is used for recursive queries across phis, where cache results may + /// not be valid. + AAQueryInfo withEmptyCache() { + AAQueryInfo NewAAQI; + NewAAQI.Depth = Depth; + return NewAAQI; + } }; class BatchAAResults; @@ -428,7 +508,7 @@ public: /// A trivial helper function to check to see if the specified pointers are /// no-alias. bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { - return alias(LocA, LocB) == NoAlias; + return alias(LocA, LocB) == AliasResult::NoAlias; } /// A convenience wrapper around the \c isNoAlias helper interface. @@ -446,13 +526,13 @@ public: /// A trivial helper function to check to see if the specified pointers are /// must-alias. bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { - return alias(LocA, LocB) == MustAlias; + return alias(LocA, LocB) == AliasResult::MustAlias; } /// A convenience wrapper around the \c isMustAlias helper interface. bool isMustAlias(const Value *V1, const Value *V2) { return alias(V1, LocationSize::precise(1), V2, LocationSize::precise(1)) == - MustAlias; + AliasResult::MustAlias; } /// Checks whether the given location points to constant memory, or if @@ -715,7 +795,11 @@ public: /// Early exits in callCapturesBefore may lead to ModRefInfo::Must not being /// set. ModRefInfo callCapturesBefore(const Instruction *I, - const MemoryLocation &MemLoc, DominatorTree *DT); + const MemoryLocation &MemLoc, + DominatorTree *DT) { + AAQueryInfo AAQIP; + return callCapturesBefore(I, MemLoc, DT, AAQIP); + } /// A convenience wrapper to synthesize a memory location. ModRefInfo callCapturesBefore(const Instruction *I, const Value *P, @@ -784,6 +868,9 @@ private: ModRefInfo getModRefInfo(const Instruction *I, const Optional<MemoryLocation> &OptLoc, AAQueryInfo &AAQIP); + ModRefInfo callCapturesBefore(const Instruction *I, + const MemoryLocation &MemLoc, DominatorTree *DT, + AAQueryInfo &AAQIP); class Concept; @@ -797,9 +884,6 @@ private: std::vector<AnalysisKey *> AADeps; - /// Query depth used to distinguish recursive queries. - unsigned Depth = 0; - friend class BatchAAResults; }; @@ -841,11 +925,17 @@ public: return AA.getModRefBehavior(Call); } bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { - return alias(LocA, LocB) == MustAlias; + return alias(LocA, LocB) == AliasResult::MustAlias; } bool isMustAlias(const Value *V1, const Value *V2) { return alias(MemoryLocation(V1, LocationSize::precise(1)), - MemoryLocation(V2, LocationSize::precise(1))) == MustAlias; + MemoryLocation(V2, LocationSize::precise(1))) == + AliasResult::MustAlias; + } + ModRefInfo callCapturesBefore(const Instruction *I, + const MemoryLocation &MemLoc, + DominatorTree *DT) { + return AA.callCapturesBefore(I, MemLoc, DT, AAQI); } }; @@ -1073,7 +1163,7 @@ protected: public: AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI) { - return MayAlias; + return AliasResult::MayAlias; } bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h index b27fd5aa92a7..b66ff395454d 100644 --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -35,20 +35,17 @@ namespace llvm { class AAResults; +class AliasResult; class AliasSetTracker; -class BasicBlock; -class LoadInst; -class Loop; -class MemorySSA; class AnyMemSetInst; class AnyMemTransferInst; +class BasicBlock; +class LoadInst; class raw_ostream; class StoreInst; class VAArgInst; class Value; -enum AliasResult : uint8_t; - class AliasSet : public ilist_node<AliasSet> { friend class AliasSetTracker; @@ -235,11 +232,16 @@ public: void dump() const; /// Define an iterator for alias sets... this is just a forward iterator. - class iterator : public std::iterator<std::forward_iterator_tag, - PointerRec, ptrdiff_t> { + class iterator { PointerRec *CurNode; public: + using iterator_category = std::forward_iterator_tag; + using value_type = PointerRec; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + explicit iterator(PointerRec *CN = nullptr) : CurNode(CN) {} bool operator==(const iterator& x) const { @@ -343,8 +345,6 @@ class AliasSetTracker { struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {}; AAResults &AA; - MemorySSA *MSSA = nullptr; - Loop *L = nullptr; ilist<AliasSet> AliasSets; using PointerMapType = DenseMap<ASTCallbackVH, AliasSet::PointerRec *, @@ -357,8 +357,6 @@ public: /// Create an empty collection of AliasSets, and use the specified alias /// analysis object to disambiguate load and store addresses. explicit AliasSetTracker(AAResults &AA) : AA(AA) {} - explicit AliasSetTracker(AAResults &AA, MemorySSA *MSSA, Loop *L) - : AA(AA), MSSA(MSSA), L(L) {} ~AliasSetTracker() { clear(); } /// These methods are used to add different types of instructions to the alias @@ -383,7 +381,6 @@ public: void add(BasicBlock &BB); // Add all instructions in basic block void add(const AliasSetTracker &AST); // Add alias relations from another AST void addUnknown(Instruction *I); - void addAllInstructionsInLoopUsingMSSA(); void clear(); diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h index 4d2884284d67..49c0cd89a4db 100644 --- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h +++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h @@ -11,11 +11,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_UTILS_ASSUMEBUNDLEQUERIES_H -#define LLVM_TRANSFORMS_UTILS_ASSUMEBUNDLEQUERIES_H +#ifndef LLVM_ANALYSIS_ASSUMEBUNDLEQUERIES_H +#define LLVM_ANALYSIS_ASSUMEBUNDLEQUERIES_H #include "llvm/IR/Attributes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/ADT/DenseMap.h" namespace llvm { @@ -39,12 +40,12 @@ enum AssumeBundleArg { /// /// Return true iff the queried attribute was found. /// If ArgVal is set. the argument will be stored to ArgVal. -bool hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn, StringRef AttrName, +bool hasAttributeInAssume(AssumeInst &Assume, Value *IsOn, StringRef AttrName, uint64_t *ArgVal = nullptr); -inline bool hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn, +inline bool hasAttributeInAssume(AssumeInst &Assume, Value *IsOn, Attribute::AttrKind Kind, uint64_t *ArgVal = nullptr) { - return hasAttributeInAssume(AssumeCI, IsOn, + return hasAttributeInAssume(Assume, IsOn, Attribute::getNameFromAttrKind(Kind), ArgVal); } @@ -87,7 +88,7 @@ using RetainedKnowledgeMap = /// many queries are going to be made on the same llvm.assume. /// String attributes are not inserted in the map. /// If the IR changes the map will be outdated. -void fillMapFromAssume(CallInst &AssumeCI, RetainedKnowledgeMap &Result); +void fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result); /// Represent one information held inside an operand bundle of an llvm.assume. /// AttrKind is the property that holds. @@ -106,19 +107,28 @@ struct RetainedKnowledge { ArgValue == Other.ArgValue; } bool operator!=(RetainedKnowledge Other) const { return !(*this == Other); } + /// This is only intended for use in std::min/std::max between attribute that + /// only differ in ArgValue. + bool operator<(RetainedKnowledge Other) const { + assert(((AttrKind == Other.AttrKind && WasOn == Other.WasOn) || + AttrKind == Attribute::None || Other.AttrKind == Attribute::None) && + "This is only intend for use in min/max to select the best for " + "RetainedKnowledge that is otherwise equal"); + return ArgValue < Other.ArgValue; + } operator bool() const { return AttrKind != Attribute::None; } static RetainedKnowledge none() { return RetainedKnowledge{}; } }; /// Retreive the information help by Assume on the operand at index Idx. /// Assume should be an llvm.assume and Idx should be in the operand bundle. -RetainedKnowledge getKnowledgeFromOperandInAssume(CallInst &Assume, +RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume, unsigned Idx); /// Retreive the information help by the Use U of an llvm.assume. the use should /// be in the operand bundle. inline RetainedKnowledge getKnowledgeFromUseInAssume(const Use *U) { - return getKnowledgeFromOperandInAssume(*cast<CallInst>(U->getUser()), + return getKnowledgeFromOperandInAssume(*cast<AssumeInst>(U->getUser()), U->getOperandNo()); } @@ -133,7 +143,7 @@ constexpr StringRef IgnoreBundleTag = "ignore"; /// /// the argument to the call of llvm.assume may still be useful even if the /// function returned true. -bool isAssumeWithEmptyBundle(CallInst &Assume); +bool isAssumeWithEmptyBundle(AssumeInst &Assume); /// Return a valid Knowledge associated to the Use U if its Attribute kind is /// in AttrKinds. @@ -159,7 +169,7 @@ RetainedKnowledge getKnowledgeValidInContext( /// This extracts the Knowledge from an element of an operand bundle. /// This is mostly for use in the assume builder. -RetainedKnowledge getKnowledgeFromBundle(CallInst &Assume, +RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI); } // namespace llvm diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h index 0ef63dc68e1c..51d04bd8cf02 100644 --- a/llvm/include/llvm/Analysis/AssumptionCache.h +++ b/llvm/include/llvm/Analysis/AssumptionCache.h @@ -26,7 +26,7 @@ namespace llvm { -class CallInst; +class AssumeInst; class Function; class raw_ostream; class Value; @@ -45,7 +45,7 @@ public: enum : unsigned { ExprResultIdx = std::numeric_limits<unsigned>::max() }; struct ResultElem { - WeakTrackingVH Assume; + WeakVH Assume; /// contains either ExprResultIdx or the index of the operand bundle /// containing the knowledge. @@ -116,15 +116,15 @@ public: /// /// The call passed in must be an instruction within this function and must /// not already be in the cache. - void registerAssumption(CallInst *CI); + void registerAssumption(AssumeInst *CI); /// Remove an \@llvm.assume intrinsic from this function's cache if it has /// been added to the cache earlier. - void unregisterAssumption(CallInst *CI); + void unregisterAssumption(AssumeInst *CI); /// Update the cache of values being affected by this assumption (i.e. /// the values about which this assumption provides information). - void updateAffectedValues(CallInst *CI); + void updateAffectedValues(AssumeInst *CI); /// Clear the cache of \@llvm.assume intrinsics for a function. /// diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index 46b8cd1f3a88..991c0cbb642a 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -35,7 +35,6 @@ class DataLayout; class DominatorTree; class Function; class GEPOperator; -class LoopInfo; class PHINode; class SelectInst; class TargetLibraryInfo; @@ -56,23 +55,20 @@ class BasicAAResult : public AAResultBase<BasicAAResult> { const TargetLibraryInfo &TLI; AssumptionCache &AC; DominatorTree *DT; - LoopInfo *LI; PhiValues *PV; public: BasicAAResult(const DataLayout &DL, const Function &F, const TargetLibraryInfo &TLI, AssumptionCache &AC, - DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, - PhiValues *PV = nullptr) - : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), PV(PV) - {} + DominatorTree *DT = nullptr, PhiValues *PV = nullptr) + : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), PV(PV) {} BasicAAResult(const BasicAAResult &Arg) : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), - DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {} + DT(Arg.DT), PV(Arg.PV) {} BasicAAResult(BasicAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), - AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {} + AC(Arg.AC), DT(Arg.DT), PV(Arg.PV) {} /// Handle invalidation events in the new pass manager. bool invalidate(Function &Fn, const PreservedAnalyses &PA, @@ -120,14 +116,8 @@ private: // Context instruction to use when querying information about this index. const Instruction *CxtI; - bool operator==(const VariableGEPIndex &Other) const { - return V == Other.V && ZExtBits == Other.ZExtBits && - SExtBits == Other.SExtBits && Scale == Other.Scale; - } - - bool operator!=(const VariableGEPIndex &Other) const { - return !operator==(Other); - } + /// True if all operations in this expression are NSW. + bool IsNSW; void dump() const { print(dbgs()); @@ -152,6 +142,9 @@ private: SmallVector<VariableGEPIndex, 4> VarIndices; // Is GEP index scale compile-time constant. bool HasCompileTimeConstantScale; + // Are all operations inbounds GEPs or non-indexing operations? + // (None iff expression doesn't involve any geps) + Optional<bool> InBounds; void dump() const { print(dbgs()); @@ -159,15 +152,15 @@ private: } void print(raw_ostream &OS) const { OS << "(DecomposedGEP Base=" << Base->getName() - << ", Offset=" << Offset - << ", VarIndices=["; + << ", Offset=" << Offset + << ", VarIndices=["; for (size_t i = 0; i < VarIndices.size(); i++) { - if (i != 0) - OS << ", "; - VarIndices[i].print(OS); + if (i != 0) + OS << ", "; + VarIndices[i].print(OS); } OS << "], HasCompileTimeConstantScale=" << HasCompileTimeConstantScale - << ")"; + << ")"; } }; @@ -190,12 +183,6 @@ private: /// Tracks instructions visited by pointsToConstantMemory. SmallPtrSet<const Value *, 16> Visited; - static const Value * - GetLinearExpression(const Value *V, APInt &Scale, APInt &Offset, - unsigned &ZExtBits, unsigned &SExtBits, - const DataLayout &DL, unsigned Depth, AssumptionCache *AC, - DominatorTree *DT, bool &NSW, bool &NUW); - static DecomposedGEP DecomposeGEPExpression(const Value *V, const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT); @@ -224,29 +211,23 @@ private: const SmallVectorImpl<VariableGEPIndex> &Src); AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size, - const AAMDNodes &V1AAInfo, const Value *V2, - LocationSize V2Size, const AAMDNodes &V2AAInfo, + const Value *V2, LocationSize V2Size, const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI); AliasResult aliasPHI(const PHINode *PN, LocationSize PNSize, - const AAMDNodes &PNAAInfo, const Value *V2, - LocationSize V2Size, const AAMDNodes &V2AAInfo, - AAQueryInfo &AAQI); + const Value *V2, LocationSize V2Size, AAQueryInfo &AAQI); AliasResult aliasSelect(const SelectInst *SI, LocationSize SISize, - const AAMDNodes &SIAAInfo, const Value *V2, - LocationSize V2Size, const AAMDNodes &V2AAInfo, + const Value *V2, LocationSize V2Size, AAQueryInfo &AAQI); AliasResult aliasCheck(const Value *V1, LocationSize V1Size, - const AAMDNodes &V1AATag, const Value *V2, - LocationSize V2Size, const AAMDNodes &V2AATag, + const Value *V2, LocationSize V2Size, AAQueryInfo &AAQI); AliasResult aliasCheckRecursive(const Value *V1, LocationSize V1Size, - const AAMDNodes &V1AATag, const Value *V2, - LocationSize V2Size, const AAMDNodes &V2AATag, + const Value *V2, LocationSize V2Size, AAQueryInfo &AAQI, const Value *O1, const Value *O2); }; diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h index c22787531117..f581b18bff17 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -42,15 +42,20 @@ #include <iterator> #include <limits> #include <list> +#include <queue> #include <string> +#include <unordered_set> #include <utility> #include <vector> #define DEBUG_TYPE "block-freq" +namespace llvm { extern llvm::cl::opt<bool> CheckBFIUnknownBlockQueries; -namespace llvm { +extern llvm::cl::opt<bool> UseIterativeBFIInference; +extern llvm::cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock; +extern llvm::cl::opt<double> IterativeBFIPrecision; class BranchProbabilityInfo; class Function; @@ -450,12 +455,6 @@ public: bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight); - LoopData &getLoopPackage(const BlockNode &Head) { - assert(Head.Index < Working.size()); - assert(Working[Head.Index].isLoopHeader()); - return *Working[Head.Index].Loop; - } - /// Analyze irreducible SCCs. /// /// Separate irreducible SCCs from \c G, which is an explicit graph of \c @@ -968,6 +967,45 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { return bfi_detail::getBlockName(getBlock(Node)); } + /// The current implementation for computing relative block frequencies does + /// not handle correctly control-flow graphs containing irreducible loops. To + /// resolve the problem, we apply a post-processing step, which iteratively + /// updates block frequencies based on the frequencies of their predesessors. + /// This corresponds to finding the stationary point of the Markov chain by + /// an iterative method aka "PageRank computation". + /// The algorithm takes at most O(|E| * IterativeBFIMaxIterations) steps but + /// typically converges faster. + /// + /// Decide whether we want to apply iterative inference for a given function. + bool needIterativeInference() const; + + /// Apply an iterative post-processing to infer correct counts for irr loops. + void applyIterativeInference(); + + using ProbMatrixType = std::vector<std::vector<std::pair<size_t, Scaled64>>>; + + /// Run iterative inference for a probability matrix and initial frequencies. + void iterativeInference(const ProbMatrixType &ProbMatrix, + std::vector<Scaled64> &Freq) const; + + /// Find all blocks to apply inference on, that is, reachable from the entry + /// and backward reachable from exists along edges with positive probability. + void findReachableBlocks(std::vector<const BlockT *> &Blocks) const; + + /// Build a matrix of probabilities with transitions (edges) between the + /// blocks: ProbMatrix[I] holds pairs (J, P), where Pr[J -> I | J] = P + void initTransitionProbabilities( + const std::vector<const BlockT *> &Blocks, + const DenseMap<const BlockT *, size_t> &BlockIndex, + ProbMatrixType &ProbMatrix) const; + +#ifndef NDEBUG + /// Compute the discrepancy between current block frequencies and the + /// probability matrix. + Scaled64 discrepancy(const ProbMatrixType &ProbMatrix, + const std::vector<Scaled64> &Freq) const; +#endif + public: BlockFrequencyInfoImpl() = default; @@ -1094,6 +1132,10 @@ void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F, computeMassInLoops(); computeMassInFunction(); unwrapLoops(); + // Apply a post-processing step improving computed frequencies for functions + // with irreducible loops. + if (needIterativeInference()) + applyIterativeInference(); finalizeMetrics(); if (CheckBFIUnknownBlockQueries) { @@ -1314,6 +1356,294 @@ template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() { llvm_unreachable("unhandled irreducible control flow"); } +template <class BT> +bool BlockFrequencyInfoImpl<BT>::needIterativeInference() const { + if (!UseIterativeBFIInference) + return false; + if (!F->getFunction().hasProfileData()) + return false; + // Apply iterative inference only if the function contains irreducible loops; + // otherwise, computed block frequencies are reasonably correct. + for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) { + if (L->isIrreducible()) + return true; + } + return false; +} + +template <class BT> void BlockFrequencyInfoImpl<BT>::applyIterativeInference() { + // Extract blocks for processing: a block is considered for inference iff it + // can be reached from the entry by edges with a positive probability. + // Non-processed blocks are assigned with the zero frequency and are ignored + // in the computation + std::vector<const BlockT *> ReachableBlocks; + findReachableBlocks(ReachableBlocks); + if (ReachableBlocks.empty()) + return; + + // The map is used to to index successors/predecessors of reachable blocks in + // the ReachableBlocks vector + DenseMap<const BlockT *, size_t> BlockIndex; + // Extract initial frequencies for the reachable blocks + auto Freq = std::vector<Scaled64>(ReachableBlocks.size()); + Scaled64 SumFreq; + for (size_t I = 0; I < ReachableBlocks.size(); I++) { + const BlockT *BB = ReachableBlocks[I]; + BlockIndex[BB] = I; + Freq[I] = getFloatingBlockFreq(BB); + SumFreq += Freq[I]; + } + assert(!SumFreq.isZero() && "empty initial block frequencies"); + + LLVM_DEBUG(dbgs() << "Applying iterative inference for " << F->getName() + << " with " << ReachableBlocks.size() << " blocks\n"); + + // Normalizing frequencies so they sum up to 1.0 + for (auto &Value : Freq) { + Value /= SumFreq; + } + + // Setting up edge probabilities using sparse matrix representation: + // ProbMatrix[I] holds a vector of pairs (J, P) where Pr[J -> I | J] = P + ProbMatrixType ProbMatrix; + initTransitionProbabilities(ReachableBlocks, BlockIndex, ProbMatrix); + + // Run the propagation + iterativeInference(ProbMatrix, Freq); + + // Assign computed frequency values + for (const BlockT &BB : *F) { + auto Node = getNode(&BB); + if (!Node.isValid()) + continue; + if (BlockIndex.count(&BB)) { + Freqs[Node.Index].Scaled = Freq[BlockIndex[&BB]]; + } else { + Freqs[Node.Index].Scaled = Scaled64::getZero(); + } + } +} + +template <class BT> +void BlockFrequencyInfoImpl<BT>::iterativeInference( + const ProbMatrixType &ProbMatrix, std::vector<Scaled64> &Freq) const { + assert(0.0 < IterativeBFIPrecision && IterativeBFIPrecision < 1.0 && + "incorrectly specified precision"); + // Convert double precision to Scaled64 + const auto Precision = + Scaled64::getInverse(static_cast<uint64_t>(1.0 / IterativeBFIPrecision)); + const size_t MaxIterations = IterativeBFIMaxIterationsPerBlock * Freq.size(); + +#ifndef NDEBUG + LLVM_DEBUG(dbgs() << " Initial discrepancy = " + << discrepancy(ProbMatrix, Freq).toString() << "\n"); +#endif + + // Successors[I] holds unique sucessors of the I-th block + auto Successors = std::vector<std::vector<size_t>>(Freq.size()); + for (size_t I = 0; I < Freq.size(); I++) { + for (auto &Jump : ProbMatrix[I]) { + Successors[Jump.first].push_back(I); + } + } + + // To speedup computation, we maintain a set of "active" blocks whose + // frequencies need to be updated based on the incoming edges. + // The set is dynamic and changes after every update. Initially all blocks + // with a positive frequency are active + auto IsActive = std::vector<bool>(Freq.size(), false); + std::queue<size_t> ActiveSet; + for (size_t I = 0; I < Freq.size(); I++) { + if (Freq[I] > 0) { + ActiveSet.push(I); + IsActive[I] = true; + } + } + + // Iterate over the blocks propagating frequencies + size_t It = 0; + while (It++ < MaxIterations && !ActiveSet.empty()) { + size_t I = ActiveSet.front(); + ActiveSet.pop(); + IsActive[I] = false; + + // Compute a new frequency for the block: NewFreq := Freq \times ProbMatrix. + // A special care is taken for self-edges that needs to be scaled by + // (1.0 - SelfProb), where SelfProb is the sum of probabilities on the edges + Scaled64 NewFreq; + Scaled64 OneMinusSelfProb = Scaled64::getOne(); + for (auto &Jump : ProbMatrix[I]) { + if (Jump.first == I) { + OneMinusSelfProb -= Jump.second; + } else { + NewFreq += Freq[Jump.first] * Jump.second; + } + } + if (OneMinusSelfProb != Scaled64::getOne()) + NewFreq /= OneMinusSelfProb; + + // If the block's frequency has changed enough, then + // make sure the block and its successors are in the active set + auto Change = Freq[I] >= NewFreq ? Freq[I] - NewFreq : NewFreq - Freq[I]; + if (Change > Precision) { + ActiveSet.push(I); + IsActive[I] = true; + for (size_t Succ : Successors[I]) { + if (!IsActive[Succ]) { + ActiveSet.push(Succ); + IsActive[Succ] = true; + } + } + } + + // Update the frequency for the block + Freq[I] = NewFreq; + } + + LLVM_DEBUG(dbgs() << " Completed " << It << " inference iterations" + << format(" (%0.0f per block)", double(It) / Freq.size()) + << "\n"); +#ifndef NDEBUG + LLVM_DEBUG(dbgs() << " Final discrepancy = " + << discrepancy(ProbMatrix, Freq).toString() << "\n"); +#endif +} + +template <class BT> +void BlockFrequencyInfoImpl<BT>::findReachableBlocks( + std::vector<const BlockT *> &Blocks) const { + // Find all blocks to apply inference on, that is, reachable from the entry + // along edges with non-zero probablities + std::queue<const BlockT *> Queue; + std::unordered_set<const BlockT *> Reachable; + const BlockT *Entry = &F->front(); + Queue.push(Entry); + Reachable.insert(Entry); + while (!Queue.empty()) { + const BlockT *SrcBB = Queue.front(); + Queue.pop(); + for (const BlockT *DstBB : children<const BlockT *>(SrcBB)) { + auto EP = BPI->getEdgeProbability(SrcBB, DstBB); + if (EP.isZero()) + continue; + if (Reachable.find(DstBB) == Reachable.end()) { + Queue.push(DstBB); + Reachable.insert(DstBB); + } + } + } + + // Find all blocks to apply inference on, that is, backward reachable from + // the entry along (backward) edges with non-zero probablities + std::unordered_set<const BlockT *> InverseReachable; + for (const BlockT &BB : *F) { + // An exit block is a block without any successors + bool HasSucc = GraphTraits<const BlockT *>::child_begin(&BB) != + GraphTraits<const BlockT *>::child_end(&BB); + if (!HasSucc && Reachable.count(&BB)) { + Queue.push(&BB); + InverseReachable.insert(&BB); + } + } + while (!Queue.empty()) { + const BlockT *SrcBB = Queue.front(); + Queue.pop(); + for (const BlockT *DstBB : children<Inverse<const BlockT *>>(SrcBB)) { + auto EP = BPI->getEdgeProbability(DstBB, SrcBB); + if (EP.isZero()) + continue; + if (InverseReachable.find(DstBB) == InverseReachable.end()) { + Queue.push(DstBB); + InverseReachable.insert(DstBB); + } + } + } + + // Collect the result + Blocks.reserve(F->size()); + for (const BlockT &BB : *F) { + if (Reachable.count(&BB) && InverseReachable.count(&BB)) { + Blocks.push_back(&BB); + } + } +} + +template <class BT> +void BlockFrequencyInfoImpl<BT>::initTransitionProbabilities( + const std::vector<const BlockT *> &Blocks, + const DenseMap<const BlockT *, size_t> &BlockIndex, + ProbMatrixType &ProbMatrix) const { + const size_t NumBlocks = Blocks.size(); + auto Succs = std::vector<std::vector<std::pair<size_t, Scaled64>>>(NumBlocks); + auto SumProb = std::vector<Scaled64>(NumBlocks); + + // Find unique successors and corresponding probabilities for every block + for (size_t Src = 0; Src < NumBlocks; Src++) { + const BlockT *BB = Blocks[Src]; + std::unordered_set<const BlockT *> UniqueSuccs; + for (const auto SI : children<const BlockT *>(BB)) { + // Ignore cold blocks + if (BlockIndex.find(SI) == BlockIndex.end()) + continue; + // Ignore parallel edges between BB and SI blocks + if (UniqueSuccs.find(SI) != UniqueSuccs.end()) + continue; + UniqueSuccs.insert(SI); + // Ignore jumps with zero probability + auto EP = BPI->getEdgeProbability(BB, SI); + if (EP.isZero()) + continue; + + auto EdgeProb = + Scaled64::getFraction(EP.getNumerator(), EP.getDenominator()); + size_t Dst = BlockIndex.find(SI)->second; + Succs[Src].push_back(std::make_pair(Dst, EdgeProb)); + SumProb[Src] += EdgeProb; + } + } + + // Add transitions for every jump with positive branch probability + ProbMatrix = ProbMatrixType(NumBlocks); + for (size_t Src = 0; Src < NumBlocks; Src++) { + // Ignore blocks w/o successors + if (Succs[Src].empty()) + continue; + + assert(!SumProb[Src].isZero() && "Zero sum probability of non-exit block"); + for (auto &Jump : Succs[Src]) { + size_t Dst = Jump.first; + Scaled64 Prob = Jump.second; + ProbMatrix[Dst].push_back(std::make_pair(Src, Prob / SumProb[Src])); + } + } + + // Add transitions from sinks to the source + size_t EntryIdx = BlockIndex.find(&F->front())->second; + for (size_t Src = 0; Src < NumBlocks; Src++) { + if (Succs[Src].empty()) { + ProbMatrix[EntryIdx].push_back(std::make_pair(Src, Scaled64::getOne())); + } + } +} + +#ifndef NDEBUG +template <class BT> +BlockFrequencyInfoImplBase::Scaled64 BlockFrequencyInfoImpl<BT>::discrepancy( + const ProbMatrixType &ProbMatrix, const std::vector<Scaled64> &Freq) const { + assert(Freq[0] > 0 && "Incorrectly computed frequency of the entry block"); + Scaled64 Discrepancy; + for (size_t I = 0; I < ProbMatrix.size(); I++) { + Scaled64 Sum; + for (const auto &Jump : ProbMatrix[I]) { + Sum += Freq[Jump.first] * Jump.second; + } + Discrepancy += Freq[I] >= Sum ? Freq[I] - Sum : Sum - Freq[I]; + } + // Normalizing by the frequency of the entry block + return Discrepancy / Freq[0]; +} +#endif + /// \note This should be a lambda, but that crashes GCC 4.7. namespace bfi_detail { diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 6a286236a80e..e2099eba0f65 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -168,12 +168,6 @@ public: /// as having a relative probability >= 80%. bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const; - /// Retrieve the hot successor of a block if one exists. - /// - /// Given a basic block, look through its successors and if one exists for - /// which \see isEdgeHot would return true, return that successor block. - const BasicBlock *getHotSucc(const BasicBlock *BB) const; - /// Print an edge's probability. /// /// Retrieves an edge's probability similarly to \see getEdgeProbability, but diff --git a/llvm/include/llvm/Analysis/CFG.h b/llvm/include/llvm/Analysis/CFG.h index a36ceb484f14..b90258f8efff 100644 --- a/llvm/include/llvm/Analysis/CFG.h +++ b/llvm/include/llvm/Analysis/CFG.h @@ -77,21 +77,10 @@ bool isPotentiallyReachable( /// Determine whether there is a path from From to To within a single function. /// Returns false only if we can prove that once 'From' has been reached then /// 'To' can not be executed. Conservatively returns true. -bool isPotentiallyReachable(const BasicBlock *From, const BasicBlock *To, - const DominatorTree *DT = nullptr, - const LoopInfo *LI = nullptr); - -/// Determine whether there is at least one path from a block in -/// 'Worklist' to 'StopBB', returning true if uncertain. -/// -/// Determine whether there is a path from at least one block in Worklist to -/// StopBB within a single function. Returns false only if we can prove that -/// once any block in 'Worklist' has been reached then 'StopBB' can not be -/// executed. Conservatively returns true. -bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist, - BasicBlock *StopBB, - const DominatorTree *DT = nullptr, - const LoopInfo *LI = nullptr); +bool isPotentiallyReachable( + const BasicBlock *From, const BasicBlock *To, + const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr, + const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr); /// Determine whether there is at least one path from a block in /// 'Worklist' to 'StopBB' without passing through any blocks in diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h index 53700798b6b3..c0cabceb4a54 100644 --- a/llvm/include/llvm/Analysis/CFGPrinter.h +++ b/llvm/include/llvm/Analysis/CFGPrinter.h @@ -72,15 +72,15 @@ public: RawWeights = !!BFI; // Print RawWeights when BFI is available. } - const BlockFrequencyInfo *getBFI() { return BFI; } + const BlockFrequencyInfo *getBFI() const { return BFI; } - const BranchProbabilityInfo *getBPI() { return BPI; } + const BranchProbabilityInfo *getBPI() const { return BPI; } - const Function *getFunction() { return this->F; } + const Function *getFunction() const { return this->F; } - uint64_t getMaxFreq() { return MaxFreq; } + uint64_t getMaxFreq() const { return MaxFreq; } - uint64_t getFreq(const BasicBlock *BB) { + uint64_t getFreq(const BasicBlock *BB) const { return BFI->getBlockFreq(BB).getFrequency(); } @@ -123,7 +123,7 @@ template <> struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits { // Cache for is hidden property - llvm::DenseMap<const BasicBlock *, bool> isHiddenBasicBlock; + llvm::DenseMap<const BasicBlock *, bool> isOnDeoptOrUnreachablePath; DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} @@ -296,7 +296,7 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits { return Attrs; } bool isNodeHidden(const BasicBlock *Node, const DOTFuncInfo *CFGInfo); - void computeHiddenNodes(const Function *F); + void computeDeoptOrUnreachablePaths(const Function *F); }; } // End llvm namespace diff --git a/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h index 135321616b7c..ec05b3706ca3 100644 --- a/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h @@ -73,7 +73,7 @@ public: AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI) { if (LocA.Ptr == LocB.Ptr) - return MustAlias; + return AliasResult::MustAlias; // Comparisons between global variables and other constants should be // handled by BasicAA. @@ -84,7 +84,7 @@ public: return AAResultBase::alias(LocA, LocB, AAQI); AliasResult QueryResult = query(LocA, LocB); - if (QueryResult == MayAlias) + if (QueryResult == AliasResult::MayAlias) return AAResultBase::alias(LocA, LocB, AAQI); return QueryResult; diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h index 985424a74054..e361cccef960 100644 --- a/llvm/include/llvm/Analysis/CGSCCPassManager.h +++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h @@ -373,12 +373,12 @@ private: /// templated adaptor. template <typename CGSCCPassT> ModuleToPostOrderCGSCCPassAdaptor -createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) { +createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass) { using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT, PreservedAnalyses, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult &>; return ModuleToPostOrderCGSCCPassAdaptor( - std::make_unique<PassModelT>(std::move(Pass))); + std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass))); } /// A proxy from a \c FunctionAnalysisManager to an \c SCC. @@ -491,12 +491,12 @@ private: /// templated adaptor. template <typename FunctionPassT> CGSCCToFunctionPassAdaptor -createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) { +createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass) { using PassModelT = detail::PassModel<Function, FunctionPassT, PreservedAnalyses, FunctionAnalysisManager>; return CGSCCToFunctionPassAdaptor( - std::make_unique<PassModelT>(std::move(Pass))); + std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass))); } /// A helper that repeats an SCC pass each time an indirect call is refined to @@ -536,13 +536,14 @@ private: /// A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename CGSCCPassT> -DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT Pass, +DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT &&Pass, int MaxIterations) { using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT, PreservedAnalyses, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult &>; - return DevirtSCCRepeatedPass(std::make_unique<PassModelT>(std::move(Pass)), - MaxIterations); + return DevirtSCCRepeatedPass( + std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)), + MaxIterations); } // Clear out the debug logging macro. diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h index ef6e66b2b88e..62742fdf9a91 100644 --- a/llvm/include/llvm/Analysis/ConstantFolding.h +++ b/llvm/include/llvm/Analysis/ConstantFolding.h @@ -136,7 +136,9 @@ Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout & /// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a /// getelementptr constantexpr, return the constant value being addressed by the /// constant expression, or null if something is funny and we can't decide. -Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE); +Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE, + Type *Ty, + const DataLayout &DL); /// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr /// indices (with an *implied* zero pointer index that is not in the list), diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h index 83c1fb4485fd..d5b8f208172b 100644 --- a/llvm/include/llvm/Analysis/ConstraintSystem.h +++ b/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -30,9 +30,6 @@ class ConstraintSystem { // Eliminate constraints from the system using Fourier–Motzkin elimination. bool eliminateUsingFM(); - /// Print the constraints in the system, using \p Names as variable names. - void dump(ArrayRef<std::string> Names) const; - /// Print the constraints in the system, using x0...xn as variable names. void dump() const; @@ -82,6 +79,9 @@ public: /// Returns the number of rows in the constraint system. unsigned size() const { return Constraints.size(); } + + /// Print the constraints in the system, using \p Names as variable names. + void dump(ArrayRef<std::string> Names) const; }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h index e3bef33e55c3..51dd4a738f00 100644 --- a/llvm/include/llvm/Analysis/DDG.h +++ b/llvm/include/llvm/Analysis/DDG.h @@ -275,7 +275,7 @@ public: virtual ~DependenceGraphInfo() {} /// Return the label that is used to name this graph. - const StringRef getName() const { return Name; } + StringRef getName() const { return Name; } /// Return the root node of the graph. NodeType &getRoot() const { @@ -293,8 +293,8 @@ public: /// Return a string representing the type of dependence that the dependence /// analysis identified between the two given nodes. This function assumes /// that there is a memory dependence between the given two nodes. - const std::string getDependenceString(const NodeType &Src, - const NodeType &Dst) const; + std::string getDependenceString(const NodeType &Src, + const NodeType &Dst) const; protected: // Name of the graph. @@ -470,7 +470,7 @@ bool DependenceGraphInfo<NodeType>::getDependencies( } template <typename NodeType> -const std::string +std::string DependenceGraphInfo<NodeType>::getDependenceString(const NodeType &Src, const NodeType &Dst) const { std::string Str; diff --git a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h index ecf54cd8a680..59737744f576 100644 --- a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -97,7 +97,7 @@ public: errs() << "Writing '" << Filename << "'..."; - raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF); std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph); std::string Title = GraphName + " for '" + F.getName().str() + "' function"; @@ -160,7 +160,7 @@ public: errs() << "Writing '" << Filename << "'..."; - raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF); std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph); if (!EC) diff --git a/llvm/include/llvm/Analysis/DemandedBits.h b/llvm/include/llvm/Analysis/DemandedBits.h index 7a8618a27ce7..5a68fcbebfea 100644 --- a/llvm/include/llvm/Analysis/DemandedBits.h +++ b/llvm/include/llvm/Analysis/DemandedBits.h @@ -18,8 +18,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_DEMANDED_BITS_H -#define LLVM_ANALYSIS_DEMANDED_BITS_H +#ifndef LLVM_ANALYSIS_DEMANDEDBITS_H +#define LLVM_ANALYSIS_DEMANDEDBITS_H #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" @@ -53,6 +53,9 @@ public: /// accepted, but will always produce a mask with all bits set. APInt getDemandedBits(Instruction *I); + /// Return the bits demanded from use U. + APInt getDemandedBits(Use *U); + /// Return true if, during analysis, I could not be reached. bool isInstructionDead(Instruction *I); @@ -146,4 +149,4 @@ FunctionPass *createDemandedBitsWrapperPass(); } // end namespace llvm -#endif // LLVM_ANALYSIS_DEMANDED_BITS_H +#endif // LLVM_ANALYSIS_DEMANDEDBITS_H diff --git a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h index 6f4e1be94164..332829cbc8a9 100644 --- a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h +++ b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H -#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H +#ifndef LLVM_ANALYSIS_DEPENDENCEGRAPHBUILDER_H +#define LLVM_ANALYSIS_DEPENDENCEGRAPHBUILDER_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" @@ -200,4 +200,4 @@ protected: } // namespace llvm -#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H +#endif // LLVM_ANALYSIS_DEPENDENCEGRAPHBUILDER_H diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h index 2e4ae65d0981..6f759a81fdef 100644 --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H -#define LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H +#ifndef LLVM_ANALYSIS_DIVERGENCEANALYSIS_H +#define LLVM_ANALYSIS_DIVERGENCEANALYSIS_H #include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/SyncDependenceAnalysis.h" @@ -34,7 +34,7 @@ class TargetTransformInfo; /// This analysis propagates divergence in a data-parallel context from sources /// of divergence to all users. It requires reducible CFGs. All assignments /// should be in SSA form. -class DivergenceAnalysis { +class DivergenceAnalysisImpl { public: /// \brief This instance will analyze the whole function \p F or the loop \p /// RegionLoop. @@ -43,9 +43,9 @@ public: /// Otherwise the whole function is analyzed. /// \param IsLCSSAForm whether the analysis may assume that the IR in the /// region in in LCSSA form. - DivergenceAnalysis(const Function &F, const Loop *RegionLoop, - const DominatorTree &DT, const LoopInfo &LI, - SyncDependenceAnalysis &SDA, bool IsLCSSAForm); + DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop, + const DominatorTree &DT, const LoopInfo &LI, + SyncDependenceAnalysis &SDA, bool IsLCSSAForm); /// \brief The loop that defines the analyzed region (if any). const Loop *getRegionLoop() const { return RegionLoop; } @@ -82,8 +82,6 @@ public: /// divergent. bool isDivergentUse(const Use &U) const; - void print(raw_ostream &OS, const Module *) const; - private: /// \brief Mark \p Term as divergent and push all Instructions that become /// divergent as a result on the worklist. @@ -114,13 +112,6 @@ private: bool isTemporalDivergent(const BasicBlock &ObservingBlock, const Value &Val) const; - /// \brief Whether \p Block is join divergent - /// - /// (see markBlockJoinDivergent). - bool isJoinDivergent(const BasicBlock &Block) const { - return DivergentJoinBlocks.contains(&Block); - } - private: const Function &F; // If regionLoop != nullptr, analysis is only performed within \p RegionLoop. @@ -142,9 +133,6 @@ private: // Set of known-uniform values. DenseSet<const Value *> UniformOverrides; - // Blocks with joining divergent control from different predecessors. - DenseSet<const BasicBlock *> DivergentJoinBlocks; // FIXME Deprecated - // Detected/marked divergent values. DenseSet<const Value *> DivergentValues; @@ -152,28 +140,39 @@ private: std::vector<const Instruction *> Worklist; }; -/// \brief Divergence analysis frontend for GPU kernels. -class GPUDivergenceAnalysis { - SyncDependenceAnalysis SDA; - DivergenceAnalysis DA; +class DivergenceInfo { + Function &F; + + // If the function contains an irreducible region the divergence + // analysis can run indefinitely. We set ContainsIrreducible and no + // analysis is actually performed on the function. All values in + // this function are conservatively reported as divergent instead. + bool ContainsIrreducible; + std::unique_ptr<SyncDependenceAnalysis> SDA; + std::unique_ptr<DivergenceAnalysisImpl> DA; public: - /// Runs the divergence analysis on @F, a GPU kernel - GPUDivergenceAnalysis(Function &F, const DominatorTree &DT, - const PostDominatorTree &PDT, const LoopInfo &LI, - const TargetTransformInfo &TTI); + DivergenceInfo(Function &F, const DominatorTree &DT, + const PostDominatorTree &PDT, const LoopInfo &LI, + const TargetTransformInfo &TTI, bool KnownReducible); /// Whether any divergence was detected. - bool hasDivergence() const { return DA.hasDetectedDivergence(); } + bool hasDivergence() const { + return ContainsIrreducible || DA->hasDetectedDivergence(); + } /// The GPU kernel this analysis result is for - const Function &getFunction() const { return DA.getFunction(); } + const Function &getFunction() const { return F; } /// Whether \p V is divergent at its definition. - bool isDivergent(const Value &V) const; + bool isDivergent(const Value &V) const { + return ContainsIrreducible || DA->isDivergent(V); + } /// Whether \p U is divergent. Uses of a uniform value can be divergent. - bool isDivergentUse(const Use &U) const; + bool isDivergentUse(const Use &U) const { + return ContainsIrreducible || DA->isDivergentUse(U); + } /// Whether \p V is uniform/non-divergent. bool isUniform(const Value &V) const { return !isDivergent(V); } @@ -181,11 +180,32 @@ public: /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be /// divergent. bool isUniformUse(const Use &U) const { return !isDivergentUse(U); } +}; + +/// \brief Divergence analysis frontend for GPU kernels. +class DivergenceAnalysis : public AnalysisInfoMixin<DivergenceAnalysis> { + friend AnalysisInfoMixin<DivergenceAnalysis>; + + static AnalysisKey Key; + +public: + using Result = DivergenceInfo; - /// Print all divergent values in the kernel. - void print(raw_ostream &OS, const Module *) const; + /// Runs the divergence analysis on @F, a GPU kernel + Result run(Function &F, FunctionAnalysisManager &AM); }; +/// Printer pass to dump divergence analysis results. +struct DivergenceAnalysisPrinterPass + : public PassInfoMixin<DivergenceAnalysisPrinterPass> { + DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + +private: + raw_ostream &OS; +}; // class DivergenceAnalysisPrinterPass + } // namespace llvm -#endif // LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H +#endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h index a5f96e72ce97..cf07c873b17c 100644 --- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h +++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_FUNCTIONPROPERTIESANALYSIS_H_ -#define LLVM_FUNCTIONPROPERTIESANALYSIS_H_ +#ifndef LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H +#define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/PassManager.h" @@ -83,4 +83,4 @@ public: }; } // namespace llvm -#endif // LLVM_FUNCTIONPROPERTIESANALYSIS_H_ +#endif // LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 9e97541e542b..b623b9ca58d8 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -654,12 +654,6 @@ public: IRSimilarityIdentifier() : Mapper(&InstDataAllocator, &InstDataListAllocator) {} - /// \param M the module to find similarity in. - explicit IRSimilarityIdentifier(Module &M) - : Mapper(&InstDataAllocator, &InstDataListAllocator) { - findSimilarity(M); - } - private: /// Map the instructions in the module to unsigned integers, using mapping /// already present in the Mapper if possible. diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 28546110ba04..82e1b14960bd 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -14,6 +14,7 @@ #define LLVM_ANALYSIS_IVDESCRIPTORS_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -68,29 +69,31 @@ public: RecurrenceDescriptor() = default; RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K, - FastMathFlags FMF, Instruction *UAI, Type *RT, - bool Signed, SmallPtrSetImpl<Instruction *> &CI) + FastMathFlags FMF, Instruction *ExactFP, Type *RT, + bool Signed, bool Ordered, + SmallPtrSetImpl<Instruction *> &CI) : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), - UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) { + ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed), + IsOrdered(Ordered) { CastInsts.insert(CI.begin(), CI.end()); } /// This POD struct holds information about a potential recurrence operation. class InstDesc { public: - InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr) + InstDesc(bool IsRecur, Instruction *I, Instruction *ExactFP = nullptr) : IsRecurrence(IsRecur), PatternLastInst(I), - RecKind(RecurKind::None), UnsafeAlgebraInst(UAI) {} + RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {} - InstDesc(Instruction *I, RecurKind K, Instruction *UAI = nullptr) + InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr) : IsRecurrence(true), PatternLastInst(I), RecKind(K), - UnsafeAlgebraInst(UAI) {} + ExactFPMathInst(ExactFP) {} bool isRecurrence() const { return IsRecurrence; } - bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; } + bool needsExactFPMath() const { return ExactFPMathInst != nullptr; } - Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; } + Instruction *getExactFPMathInst() const { return ExactFPMathInst; } RecurKind getRecKind() const { return RecKind; } @@ -104,8 +107,8 @@ public: Instruction *PatternLastInst; // If this is a min/max pattern. RecurKind RecKind; - // Recurrence has unsafe algebra. - Instruction *UnsafeAlgebraInst; + // Recurrence does not allow floating-point reassociation. + Instruction *ExactFPMathInst; }; /// Returns a struct describing if the instruction 'I' can be a recurrence @@ -114,7 +117,7 @@ public: /// compare instruction to the select instruction and stores this pointer in /// 'PatternLastInst' member of the returned struct. static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind, - InstDesc &Prev, bool HasFunNoNaNAttr); + InstDesc &Prev, FastMathFlags FMF); /// Returns true if instruction I has multiple uses in Insts static bool hasMultipleUsesOf(Instruction *I, @@ -136,7 +139,8 @@ public: static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I); /// Returns identity corresponding to the RecurrenceKind. - static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp); + static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp, + FastMathFlags FMF); /// Returns the opcode corresponding to the RecurrenceKind. static unsigned getOpcode(RecurKind Kind); @@ -146,7 +150,7 @@ public: /// non-null, the minimal bit width needed to compute the reduction will be /// computed. static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, - bool HasFunNoNaNAttr, + FastMathFlags FMF, RecurrenceDescriptor &RedDes, DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr, @@ -171,7 +175,7 @@ public: /// to handle Phi as a first-order recurrence. static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, - DenseMap<Instruction *, Instruction *> &SinkAfter, + MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT); RecurKind getRecurrenceKind() const { return Kind; } @@ -184,12 +188,12 @@ public: Instruction *getLoopExitInstr() const { return LoopExitInstr; } - /// Returns true if the recurrence has unsafe algebra which requires a relaxed - /// floating-point model. - bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; } + /// Returns true if the recurrence has floating-point math that requires + /// precise (ordered) operations. + bool hasExactFPMath() const { return ExactFPMathInst != nullptr; } - /// Returns first unsafe algebra instruction in the PHI node's use-chain. - Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; } + /// Returns 1st non-reassociative FP instruction in the PHI node's use-chain. + Instruction *getExactFPMathInst() const { return ExactFPMathInst; } /// Returns true if the recurrence kind is an integer kind. static bool isIntegerRecurrenceKind(RecurKind Kind); @@ -227,6 +231,9 @@ public: /// Returns true if all source operands of the recurrence are SExtInsts. bool isSigned() const { return IsSigned; } + /// Expose an ordered FP reduction to the instance users. + bool isOrdered() const { return IsOrdered; } + /// Attempts to find a chain of operations from Phi to LoopExitInst that can /// be treated as a set of reductions instructions for in-loop reductions. SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi, @@ -243,12 +250,16 @@ private: // The fast-math flags on the recurrent instructions. We propagate these // fast-math flags into the vectorized FP instructions we generate. FastMathFlags FMF; - // First occurrence of unasfe algebra in the PHI's use-chain. - Instruction *UnsafeAlgebraInst = nullptr; + // First instance of non-reassociative floating-point in the PHI's use-chain. + Instruction *ExactFPMathInst = nullptr; // The type of the recurrence. Type *RecurrenceType = nullptr; // True if all source operands of the recurrence are SExtInsts. bool IsSigned = false; + // True if this recurrence can be treated as an in-order reduction. + // Currently only a non-reassociative FAdd can be considered in-order, + // if it is also the only FAdd in the PHI's use chain. + bool IsOrdered = false; // Instructions used for type-promoting the recurrence. SmallPtrSet<Instruction *, 8> CastInsts; }; @@ -302,23 +313,14 @@ public: PredicatedScalarEvolution &PSE, InductionDescriptor &D, bool Assume = false); - /// Returns true if the induction type is FP and the binary operator does - /// not have the "fast-math" property. Such operation requires a relaxed FP - /// mode. - bool hasUnsafeAlgebra() { - return (IK == IK_FpInduction) && InductionBinOp && - !cast<FPMathOperator>(InductionBinOp)->isFast(); - } - - /// Returns induction operator that does not have "fast-math" property - /// and requires FP unsafe mode. - Instruction *getUnsafeAlgebraInst() { - if (IK != IK_FpInduction) - return nullptr; - - if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast()) - return nullptr; - return InductionBinOp; + /// Returns floating-point induction operator that does not allow + /// reassociation (transforming the induction requires an override of normal + /// floating-point rules). + Instruction *getExactFPMathInst() { + if (IK == IK_FpInduction && InductionBinOp && + !InductionBinOp->hasAllowReassoc()) + return InductionBinOp; + return nullptr; } /// Returns binary opcode of the induction operator. diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index c39fae13d3b8..c27aaf0db8f2 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -6,13 +6,13 @@ // //===----------------------------------------------------------------------===// // -#ifndef LLVM_INLINEADVISOR_H_ -#define LLVM_INLINEADVISOR_H_ +#ifndef LLVM_ANALYSIS_INLINEADVISOR_H +#define LLVM_ANALYSIS_INLINEADVISOR_H #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/PassManager.h" -#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include <memory> #include <unordered_set> @@ -36,11 +36,7 @@ class OptimizationRemarkEmitter; /// requires the full C Tensorflow API library, and evaluates models /// dynamically. This mode also permits generating training logs, for offline /// training. -enum class InliningAdvisorMode : int { - Default, - Release, - Development -}; +enum class InliningAdvisorMode : int { Default, Release, Development }; class InlineAdvisor; /// Capture state between an inlining decision having had been made, and @@ -283,4 +279,4 @@ void setInlineRemark(CallBase &CB, StringRef Message); /// Utility for extracting the inline cost message to a string. std::string inlineCostStr(const InlineCost &IC); } // namespace llvm -#endif // LLVM_INLINEADVISOR_H_ +#endif // LLVM_ANALYSIS_INLINEADVISOR_H diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index 7f04a8ce8f5f..4e1b28d4633f 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -15,6 +15,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/InlineModelFeatureMaps.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include <cassert> #include <climits> @@ -43,7 +44,7 @@ const int OptAggressiveThreshold = 250; // Various magic constants used to adjust heuristics. const int InstrCost = 5; const int IndirectCallThreshold = 100; -const int CallPenalty = 25; +const int LoopPenalty = 25; const int LastCallToStaticBonus = 15000; const int ColdccPenalty = 2000; /// Do not inline functions which allocate this many bytes on the stack @@ -54,6 +55,20 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024; const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536; } // namespace InlineConstants +// The cost-benefit pair computed by cost-benefit analysis. +class CostBenefitPair { +public: + CostBenefitPair(APInt Cost, APInt Benefit) : Cost(Cost), Benefit(Benefit) {} + + const APInt &getCost() const { return Cost; } + + const APInt &getBenefit() const { return Benefit; } + +private: + APInt Cost; + APInt Benefit; +}; + /// Represents the cost of inlining a function. /// /// This supports special values for functions which should "always" or @@ -76,9 +91,14 @@ class InlineCost { /// Must be set for Always and Never instances. const char *Reason = nullptr; + /// The cost-benefit pair computed by cost-benefit analysis. + Optional<CostBenefitPair> CostBenefit = None; + // Trivial constructor, interesting logic in the factory functions below. - InlineCost(int Cost, int Threshold, const char *Reason = nullptr) - : Cost(Cost), Threshold(Threshold), Reason(Reason) { + InlineCost(int Cost, int Threshold, const char *Reason = nullptr, + Optional<CostBenefitPair> CostBenefit = None) + : Cost(Cost), Threshold(Threshold), Reason(Reason), + CostBenefit(CostBenefit) { assert((isVariable() || Reason) && "Reason must be provided for Never or Always"); } @@ -89,11 +109,13 @@ public: assert(Cost < NeverInlineCost && "Cost crosses sentinel value"); return InlineCost(Cost, Threshold); } - static InlineCost getAlways(const char *Reason) { - return InlineCost(AlwaysInlineCost, 0, Reason); + static InlineCost getAlways(const char *Reason, + Optional<CostBenefitPair> CostBenefit = None) { + return InlineCost(AlwaysInlineCost, 0, Reason, CostBenefit); } - static InlineCost getNever(const char *Reason) { - return InlineCost(NeverInlineCost, 0, Reason); + static InlineCost getNever(const char *Reason, + Optional<CostBenefitPair> CostBenefit = None) { + return InlineCost(NeverInlineCost, 0, Reason, CostBenefit); } /// Test whether the inline cost is low enough for inlining. @@ -116,6 +138,9 @@ public: return Threshold; } + /// Get the cost-benefit pair which was computed by cost-benefit analysis + Optional<CostBenefitPair> getCostBenefit() const { return CostBenefit; } + /// Get the reason of Always or Never. const char *getReason() const { assert((Reason || isVariable()) && @@ -270,6 +295,15 @@ Optional<int> getInliningCostEstimate( ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); +/// Get the expanded cost features. The features are returned unconditionally, +/// even if inlining is impossible. +Optional<InlineCostFeatures> getInliningCostFeatures( + CallBase &Call, TargetTransformInfo &CalleeTTI, + function_ref<AssumptionCache &(Function &)> GetAssumptionCache, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, + ProfileSummaryInfo *PSI = nullptr, + OptimizationRemarkEmitter *ORE = nullptr); + /// Minimal filter to detect invalid constructs for inlining. InlineResult isInlineViable(Function &Callee); diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h index 8da442cc4a53..1afa8a825f15 100644 --- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h +++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h @@ -16,6 +16,61 @@ namespace llvm { +// List of cost features. A "cost" feature is a summand of the heuristic-based +// inline cost, and we define them separately to preserve the original heuristic +// behavior. +#define INLINE_COST_FEATURE_ITERATOR(M) \ + M(SROASavings, "sroa_savings") \ + M(SROALosses, "sroa_losses") \ + M(LoadElimination, "load_elimination") \ + M(CallPenalty, "call_penalty") \ + M(CallArgumentSetup, "call_argument_setup") \ + M(LoadRelativeIntrinsic, "load_relative_intrinsic") \ + M(LoweredCallArgSetup, "lowered_call_arg_setup") \ + M(IndirectCallPenalty, "indirect_call_penalty") \ + M(JumpTablePenalty, "jump_table_penalty") \ + M(CaseClusterPenalty, "case_cluster_penalty") \ + M(SwitchPenalty, "switch_penalty") \ + M(UnsimplifiedCommonInstructions, "unsimplified_common_instructions") \ + M(NumLoops, "num_loops") \ + M(DeadBlocks, "dead_blocks") \ + M(SimplifiedInstructions, "simplified_instructions") \ + M(ConstantArgs, "constant_args") \ + M(ConstantOffsetPtrArgs, "constant_offset_ptr_args") \ + M(CallSiteCost, "callsite_cost") \ + M(ColdCcPenalty, "cold_cc_penalty") \ + M(LastCallToStaticBonus, "last_call_to_static_bonus") \ + M(IsMultipleBlocks, "is_multiple_blocks") \ + M(NestedInlines, "nested_inlines") \ + M(NestedInlineCostEstimate, "nested_inline_cost_estimate") \ + M(Threshold, "threshold") + +// clang-format off +enum class InlineCostFeatureIndex : size_t { +#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME, + INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES) +#undef POPULATE_INDICES + + NumberOfFeatures +}; +// clang-format on + +using InlineCostFeatures = + std::array<int, + static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures)>; + +constexpr bool isHeuristicInlineCostFeature(InlineCostFeatureIndex Feature) { + return Feature != InlineCostFeatureIndex::SROASavings && + Feature != InlineCostFeatureIndex::IsMultipleBlocks && + Feature != InlineCostFeatureIndex::DeadBlocks && + Feature != InlineCostFeatureIndex::SimplifiedInstructions && + Feature != InlineCostFeatureIndex::ConstantArgs && + Feature != InlineCostFeatureIndex::ConstantOffsetPtrArgs && + Feature != InlineCostFeatureIndex::NestedInlines && + Feature != InlineCostFeatureIndex::NestedInlineCostEstimate && + Feature != InlineCostFeatureIndex::Threshold; +} + // List of features. Each feature is defined through a triple: // - the name of an enum member, which will be the feature index // - a textual name, used for Tensorflow model binding (so it needs to match the @@ -34,11 +89,10 @@ namespace llvm { M(NrCtantParams, "nr_ctant_params", \ "number of parameters in the call site that are constants") \ M(CostEstimate, "cost_estimate", "total cost estimate (threshold - free)") \ - M(EdgeCount, "edge_count", \ + M(EdgeCount, "edge_count", "total number of calls in the module") \ + M(CallerUsers, "caller_users", \ "number of module-internal users of the caller, +1 if the caller is " \ "exposed externally") \ - M(CallerUsers, "caller_users", \ - "number of blocks reached from a conditional instruction, in the caller") \ M(CallerConditionallyExecutedBlocks, "caller_conditionally_executed_blocks", \ "number of blocks reached from a conditional instruction, in the caller") \ M(CallerBasicBlockCount, "caller_basic_block_count", \ @@ -46,14 +100,29 @@ namespace llvm { M(CalleeConditionallyExecutedBlocks, "callee_conditionally_executed_blocks", \ "number of blocks reached from a conditional instruction, in the callee") \ M(CalleeUsers, "callee_users", \ - "number of blocks reached from a conditional instruction, in the callee") + "number of module-internal users of the callee, +1 if the callee is " \ + "exposed externally") +// clang-format off enum class FeatureIndex : size_t { +// InlineCost features - these must come first +#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME, + INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES) +#undef POPULATE_INDICES + +// Non-cost features #define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME, INLINE_FEATURE_ITERATOR(POPULATE_INDICES) #undef POPULATE_INDICES - NumberOfFeatures + + NumberOfFeatures }; +// clang-format on + +constexpr FeatureIndex +inlineCostFeatureToMlFeature(InlineCostFeatureIndex Feature) { + return static_cast<FeatureIndex>(static_cast<size_t>(Feature)); +} constexpr size_t NumberOfFeatures = static_cast<size_t>(FeatureIndex::NumberOfFeatures); diff --git a/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h b/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h index 46bc974c4a7f..192630e62a54 100644 --- a/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h +++ b/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h @@ -80,6 +80,11 @@ public: /// It makes all necessary updates to internal caches to keep them consistent. void removeInstruction(const Instruction *Inst); + /// Notifies this tracking that we are going to replace all uses of \p Inst. + /// It makes all necessary updates to internal caches to keep them consistent. + /// Should typically be called before a RAUW. + void removeUsersOf(const Instruction *Inst); + /// Invalidates all information from this tracking. void clear(); }; diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index 17d6f30a35cb..efaf1847276b 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -37,6 +37,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" namespace llvm { @@ -133,7 +134,9 @@ struct SimplifyQuery { bool isUndefValue(Value *V) const { if (!CanUseUndef) return false; - return isa<UndefValue>(V); + + using namespace PatternMatch; + return match(V, m_Undef()); } }; @@ -142,8 +145,7 @@ struct SimplifyQuery { // Please use the SimplifyQuery versions in new code. /// Given operand for an FNeg, fold the result or return null. -Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF, - const SimplifyQuery &Q); +Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q); /// Given operands for an Add, fold the result or return null. Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, @@ -154,23 +156,34 @@ Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const SimplifyQuery &Q); /// Given operands for an FAdd, fold the result or return null. -Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +Value * +SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q, + fp::ExceptionBehavior ExBehavior = fp::ebIgnore, + RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for an FSub, fold the result or return null. -Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +Value * +SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q, + fp::ExceptionBehavior ExBehavior = fp::ebIgnore, + RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for an FMul, fold the result or return null. -Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +Value * +SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q, + fp::ExceptionBehavior ExBehavior = fp::ebIgnore, + RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for the multiplication of a FMA, fold the result or return /// null. In contrast to SimplifyFMulInst, this function will not perform /// simplifications whose unrounded results differ when rounded to the argument /// type. Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); + const SimplifyQuery &Q, + fp::ExceptionBehavior ExBehavior = fp::ebIgnore, + RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for a Mul, fold the result or return null. Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); @@ -182,8 +195,11 @@ Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an FDiv, fold the result or return null. -Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +Value * +SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q, + fp::ExceptionBehavior ExBehavior = fp::ebIgnore, + RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for an SRem, fold the result or return null. Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); @@ -192,8 +208,11 @@ Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); /// Given operands for an FRem, fold the result or return null. -Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +Value * +SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q, + fp::ExceptionBehavior ExBehavior = fp::ebIgnore, + RoundingMode Rounding = RoundingMode::NearestTiesToEven); /// Given operands for a Shl, fold the result or return null. Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, @@ -277,8 +296,8 @@ Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// Given operands for a BinaryOperator, fold the result or return null. /// Try to use FastMathFlags when folding the result. -Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q); +Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); /// Given a callsite, fold the result or return null. Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q); @@ -292,11 +311,18 @@ Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q); Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE = nullptr); +/// Like \p SimplifyInstruction but the operands of \p I are replaced with +/// \p NewOps. Returns a simplified value, or null if none was found. +Value * +SimplifyInstructionWithOperands(Instruction *I, ArrayRef<Value *> NewOps, + const SimplifyQuery &Q, + OptimizationRemarkEmitter *ORE = nullptr); + /// See if V simplifies when its operand Op is replaced with RepOp. If not, /// return null. -/// AllowRefinement specifies whether the simplification can be a refinement, -/// or whether it needs to be strictly identical. -Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, +/// AllowRefinement specifies whether the simplification can be a refinement +/// (e.g. 0 instead of poison), or whether it needs to be strictly identical. +Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, const SimplifyQuery &Q, bool AllowRefinement); /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. @@ -325,4 +351,3 @@ const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &, } // end namespace llvm #endif - diff --git a/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h index 8166b52aa226..542a741ee07e 100644 --- a/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h +++ b/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_IDF_H -#define LLVM_ANALYSIS_IDF_H +#ifndef LLVM_ANALYSIS_ITERATEDDOMINANCEFRONTIER_H +#define LLVM_ANALYSIS_ITERATEDDOMINANCEFRONTIER_H #include "llvm/Support/CFGDiff.h" #include "llvm/Support/GenericIteratedDominanceFrontier.h" diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index f7a5adac2b43..ca276d2f3cf8 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -60,6 +60,7 @@ namespace llvm { +template <class GraphType> struct GraphTraits; class Module; class Value; @@ -115,8 +116,6 @@ public: class EdgeSequence; class SCC; class RefSCC; - class edge_iterator; - class call_edge_iterator; /// A class used to represent edges in the call graph. /// @@ -464,7 +463,7 @@ public: /// Dump a short description of this SCC to stderr. void dump() const; -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS) /// Verify invariants about the SCC. /// /// This will attempt to validate all of the basic invariants within an @@ -585,7 +584,7 @@ public: /// Dump a short description of this RefSCC to stderr. void dump() const; -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS) /// Verify invariants about the RefSCC and all its SCCs. /// /// This will attempt to validate all of the invariants *within* the diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h index 363cb49af382..57f732cc854b 100644 --- a/llvm/include/llvm/Analysis/LazyValueInfo.h +++ b/llvm/include/llvm/Analysis/LazyValueInfo.h @@ -75,7 +75,15 @@ public: /// \p Pred is a CmpInst predicate. If \p UseBlockValue is true, the block /// value is also taken into account. Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C, - Instruction *CxtI, bool UseBlockValue = false); + Instruction *CxtI, bool UseBlockValue); + + /// Determine whether the specified value comparison is known to be true + /// or false at the specified instruction. While this takes two Value's, + /// it still requires that one of them is a constant. + /// \p Pred is a CmpInst predicate. + /// If \p UseBlockValue is true, the block value is also taken into account. + Tristate getPredicateAt(unsigned Pred, Value *LHS, Value *RHS, + Instruction *CxtI, bool UseBlockValue); /// Determine whether the specified value is known to be a constant at the /// specified instruction. Return null if not. diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h index 15400f5e07ff..0132c88077d2 100644 --- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -12,16 +12,16 @@ // better decisions. // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H -#define LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H +#ifndef LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H +#define LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H #include "llvm/ADT/DenseSet.h" #include "llvm/Pass.h" #include <memory> namespace llvm { +class DivergenceInfo; class Function; -class GPUDivergenceAnalysis; class Module; class raw_ostream; class TargetTransformInfo; @@ -63,7 +63,7 @@ private: const TargetTransformInfo &TTI) const; // (optional) handle to new DivergenceAnalysis - std::unique_ptr<GPUDivergenceAnalysis> gpuDA; + std::unique_ptr<DivergenceInfo> gpuDA; // Stores all divergent values. DenseSet<const Value *> DivergentValues; @@ -73,4 +73,4 @@ private: }; } // End llvm namespace -#endif //LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H +#endif // LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 24a05610e68d..ced1943b81d9 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -25,7 +25,9 @@ class Instruction; class LoadInst; class Loop; class MDNode; +class MemoryLocation; class ScalarEvolution; +class TargetLibraryInfo; /// Return true if this is always a dereferenceable pointer. If the context /// instruction is specified perform context-sensitive analysis and return true @@ -33,7 +35,8 @@ class ScalarEvolution; bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// Returns true if V is always a dereferenceable pointer with alignment /// greater or equal than requested. If the context instruction is specified @@ -43,7 +46,8 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// Returns true if V is always dereferenceable for Size byte with alignment /// greater or equal than requested. If the context instruction is specified @@ -52,7 +56,8 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// Return true if we know that executing a load from this value cannot trap. /// @@ -65,7 +70,8 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// Return true if we can prove that the given load (which is assumed to be /// within the specified loop) would access only dereferenceable memory, and @@ -89,7 +95,8 @@ bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, bool isSafeToLoadUnconditionally(Value *V, Type *Ty, Align Alignment, const DataLayout &DL, Instruction *ScanFrom = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// The default number of maximum instructions to scan in the block, used by /// FindAvailableLoadedValue(). @@ -127,6 +134,13 @@ Value *FindAvailableLoadedValue(LoadInst *Load, bool *IsLoadCSE = nullptr, unsigned *NumScanedInst = nullptr); +/// This overload provides a more efficient implementation of +/// FindAvailableLoadedValue() for the case where we are not interested in +/// finding the closest clobbering instruction if no available load is found. +/// This overload cannot be used to scan across multiple blocks. +Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, + unsigned MaxInstsToScan = DefMaxInstsToScan); + /// Scan backwards to see if we have the value of the given pointer available /// locally within a small number of instructions. /// @@ -134,7 +148,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, /// this function, if ScanFrom points at the beginning of the block, it's safe /// to continue scanning the predecessors. /// -/// \param Ptr The pointer we want the load and store to originate from. +/// \param Loc The location we want the load and store to originate from. /// \param AccessTy The access type of the pointer. /// \param AtLeastAtomic Are we looking for at-least an atomic load/store ? In /// case it is false, we can return an atomic or non-atomic load or store. In @@ -150,8 +164,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load, /// location in memory, as opposed to the value operand of a store. /// /// \returns The found value, or nullptr if no value is found. -Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, - BasicBlock *ScanBB, +Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, + bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst); diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 13fbe884eddf..0a0ef1536caf 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -341,17 +341,21 @@ struct RuntimeCheckingPtrGroup { /// pointer, with index \p Index in RtCheck. RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck); + RuntimeCheckingPtrGroup(unsigned Index, const SCEV *Start, const SCEV *End, + unsigned AS) + : High(End), Low(Start), AddressSpace(AS) { + Members.push_back(Index); + } + /// Tries to add the pointer recorded in RtCheck at index /// \p Index to this pointer checking group. We can only add a pointer /// to a checking group if we will still be able to get /// the upper and lower bounds of the check. Returns true in case /// of success, false otherwise. - bool addPointer(unsigned Index); + bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck); + bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End, + unsigned AS, ScalarEvolution &SE); - /// Constitutes the context of this pointer checking group. For each - /// pointer that is a member of this group we will retain the index - /// at which it appears in RtCheck. - RuntimePointerChecking &RtCheck; /// The SCEV expression which represents the upper bound of all the /// pointers in this group. const SCEV *High; @@ -360,6 +364,8 @@ struct RuntimeCheckingPtrGroup { const SCEV *Low; /// Indices of all the pointers that constitute this grouping. SmallVector<unsigned, 2> Members; + /// Address space of the involved pointers. + unsigned AddressSpace; }; /// A memcheck which made up of a pair of grouped pointers. @@ -679,6 +685,16 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap = ValueToValueMap(), bool Assume = false, bool ShouldCheckWrap = true); +/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are +/// compatible and it is possible to calculate the distance between them. This +/// is a simple API that does not depend on the analysis pass. +/// \param StrictCheck Ensure that the calculated distance matches the +/// type-based one after all the bitcasts removal in the provided pointers. +Optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, + Value *PtrB, const DataLayout &DL, + ScalarEvolution &SE, bool StrictCheck = false, + bool CheckType = true); + /// Attempt to sort the pointers in \p VL and return the sorted indices /// in \p SortedIndices, if reordering is required. /// @@ -689,7 +705,7 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and /// saves the mask for actual memory accesses in program order in /// \p SortedIndices as <1,2,0,3> -bool sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, +bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl<unsigned> &SortedIndices); diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h index 11dbd15c8678..92db1d67fc4e 100644 --- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h +++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h @@ -47,7 +47,7 @@ class TargetTransformInfo; /// The adaptor from a function pass to a loop pass computes these analyses and /// makes them available to the loop passes "for free". Each loop pass is -/// expected expected to update these analyses if necessary to ensure they're +/// expected to update these analyses if necessary to ensure they're /// valid after it runs. struct LoopStandardAnalysisResults { AAResults &AA; diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h index e8f2205545eb..21882ebd0087 100644 --- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -205,7 +205,7 @@ public: } /// Return the estimated ordered loop costs. - const ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; } + ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; } private: /// Calculate the cache footprint of each loop in the nest (when it is diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index a5717bae12c3..164ec50e47bc 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -479,7 +479,8 @@ public: bool isAnnotatedParallel() const { return false; } /// Print loop with all the BBs inside it. - void print(raw_ostream &OS, unsigned Depth = 0, bool Verbose = false) const; + void print(raw_ostream &OS, bool Verbose = false, bool PrintNested = true, + unsigned Depth = 0) const; protected: friend class LoopInfoBase<BlockT, LoopT>; @@ -588,6 +589,9 @@ public: /// PHINode *getCanonicalInductionVariable() const; + /// Get the latch condition instruction. + ICmpInst *getLatchCmpInst() const; + /// Obtain the unique incoming and back edge. Return false if they are /// non-unique or the loop is dead; otherwise, return true. bool getIncomingAndBackEdge(BasicBlock *&Incoming, @@ -1199,6 +1203,14 @@ public: return true; } + + // Return true if a new use of V added in ExitBB would require an LCSSA PHI + // to be inserted at the begining of the block. Note that V is assumed to + // dominate ExitBB, and ExitBB must be the exit block of some loop. The + // IR is assumed to be in LCSSA form before the planned insertion. + bool wouldBeOutOfLoopUseRequiringLCSSA(const Value *V, + const BasicBlock *ExitBB) const; + }; // Allow clients to walk the list of nested loops... @@ -1283,6 +1295,33 @@ MDNode *findOptionMDForLoopID(MDNode *LoopID, StringRef Name); /// found, return nullptr. MDNode *findOptionMDForLoop(const Loop *TheLoop, StringRef Name); +Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop, + StringRef Name); + +/// Returns true if Name is applied to TheLoop and enabled. +bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name); + +/// Find named metadata for a loop with an integer value. +llvm::Optional<int> +getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name); + +/// Find string metadata for loop +/// +/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an +/// operand or null otherwise. If the string metadata is not found return +/// Optional's not-a-value. +Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop, + StringRef Name); + +/// Look for the loop attribute that requires progress within the loop. +/// Note: Most consumers probably want "isMustProgress" which checks +/// the containing function attribute too. +bool hasMustProgress(const Loop *L); + +/// Return true if this loop can be assumed to make progress. (i.e. can't +/// be infinite without side effects without also being undefined) +bool isMustProgress(const Loop *L); + /// Return whether an MDNode might represent an access group. /// /// Access group metadata nodes have to be distinct and empty. Being diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h index 426b349c6b8a..2cc9afb7c2cd 100644 --- a/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" @@ -380,8 +381,8 @@ void LoopBase<BlockT, LoopT>::verifyLoopNest( } template <class BlockT, class LoopT> -void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth, - bool Verbose) const { +void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, bool Verbose, + bool PrintNested, unsigned Depth) const { OS.indent(Depth * 2); if (static_cast<const LoopT *>(this)->isAnnotatedParallel()) OS << "Parallel "; @@ -406,10 +407,13 @@ void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth, if (Verbose) BB->print(OS); } - OS << "\n"; - for (iterator I = begin(), E = end(); I != E; ++I) - (*I)->print(OS, Depth + 2); + if (PrintNested) { + OS << "\n"; + + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->print(OS, /*Verbose*/ false, PrintNested, Depth + 2); + } } //===----------------------------------------------------------------------===// @@ -676,10 +680,7 @@ static void compareLoops(const LoopT *L, const LoopT *OtherL, const SmallPtrSetImpl<const BlockT *> &OtherBlocksSet = OtherL->getBlocksSet(); assert(BlocksSet.size() == OtherBlocksSet.size() && - llvm::all_of(BlocksSet, - [&OtherBlocksSet](const BlockT *BB) { - return OtherBlocksSet.count(BB); - }) && + llvm::set_is_subset(BlocksSet, OtherBlocksSet) && "Mismatched basic blocks in BlocksSets!"); } #endif diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h index 9c4fb4dbc29b..9a749a1c8eae 100644 --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -30,7 +30,6 @@ public: LoopNest(Loop &Root, ScalarEvolution &SE); LoopNest() = delete; - LoopNest &operator=(const LoopNest &) = delete; /// Construct a LoopNest object. static std::unique_ptr<LoopNest> getLoopNest(Loop &Root, ScalarEvolution &SE); @@ -61,10 +60,12 @@ public: static unsigned getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE); /// Recursivelly traverse all empty 'single successor' basic blocks of \p From - /// (if there are any). Return the last basic block found or \p End if it was - /// reached during the search. + /// (if there are any). When \p CheckUniquePred is set to true, check if + /// each of the empty single successors has a unique predecessor. Return + /// the last basic block found or \p End if it was reached during the search. static const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From, - const BasicBlock *End); + const BasicBlock *End, + bool CheckUniquePred = false); /// Return the outermost loop in the loop nest. Loop &getOutermostLoop() const { return *Loops.front(); } @@ -139,6 +140,11 @@ public: return all_of(Loops, [](const Loop *L) { return L->isRotatedForm(); }); } + /// Return the function to which the loop-nest belongs. + Function *getParent() const { + return Loops.front()->getHeader()->getParent(); + } + StringRef getName() const { return Loops.front()->getName(); } protected: diff --git a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h index 5f332e3cac16..7cf8a081f9a2 100644 --- a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h +++ b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h @@ -46,7 +46,7 @@ class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> { public: UnrolledInstAnalyzer(unsigned Iteration, - DenseMap<Value *, Constant *> &SimplifiedValues, + DenseMap<Value *, Value *> &SimplifiedValues, ScalarEvolution &SE, const Loop *L) : SimplifiedValues(SimplifiedValues), SE(SE), L(L) { IterationNumber = SE.getConstant(APInt(64, Iteration)); @@ -68,22 +68,19 @@ private: /// iteration. const SCEV *IterationNumber; - /// A Value->Constant map for keeping values that we managed to - /// constant-fold on the given iteration. - /// /// While we walk the loop instructions, we build up and maintain a mapping /// of simplified values specific to this iteration. The idea is to propagate /// any special information we have about loads that can be replaced with /// constants after complete unrolling, and account for likely simplifications /// post-unrolling. - DenseMap<Value *, Constant *> &SimplifiedValues; + DenseMap<Value *, Value *> &SimplifiedValues; ScalarEvolution &SE; const Loop *L; bool simplifyInstWithSCEV(Instruction *I); - bool visitInstruction(Instruction &I) { return simplifyInstWithSCEV(&I); } + bool visitInstruction(Instruction &I); bool visitBinaryOperator(BinaryOperator &I); bool visitLoad(LoadInst &I); bool visitCastInst(CastInst &I); diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h index c5428726995e..39ade20df53f 100644 --- a/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -212,6 +212,10 @@ struct ObjectSizeOpts { /// object size in Size if successful, and false otherwise. In this context, by /// object we mean the region of memory starting at Ptr to the end of the /// underlying object pointed to by Ptr. +/// +/// WARNING: The object size returned is the allocation size. This does not +/// imply dereferenceability at site of use since the object may be freeed in +/// between. bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts = {}); diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index efde00f82d57..cb522cf731d3 100644 --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -20,30 +20,18 @@ #include "llvm/ADT/PointerSumType.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Pass.h" -#include "llvm/Support/ErrorHandling.h" -#include <cassert> -#include <cstdint> -#include <utility> -#include <vector> namespace llvm { class AAResults; class AssumptionCache; +class BatchAAResults; class DominatorTree; -class Function; -class Instruction; -class LoadInst; class PHITransAddr; -class TargetLibraryInfo; class PhiValues; -class Value; /// A memory dependence query can return one of three different answers. class MemDepResult { @@ -343,7 +331,7 @@ private: // A map from instructions to their non-local dependencies. using NonLocalDepMapType = DenseMap<Instruction *, PerInstNLInfo>; - NonLocalDepMapType NonLocalDeps; + NonLocalDepMapType NonLocalDepsMap; // A reverse mapping from dependencies to the dependees. This is // used when removing instructions to keep the cache coherent. @@ -364,6 +352,10 @@ private: unsigned DefaultBlockScanLimit; + /// Offsets to dependant clobber loads. + using ClobberOffsetsMapType = DenseMap<LoadInst *, int32_t>; + ClobberOffsetsMapType ClobberOffsets; + public: MemoryDependenceResults(AAResults &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, DominatorTree &DT, @@ -452,10 +444,18 @@ public: Instruction *QueryInst = nullptr, unsigned *Limit = nullptr); + MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, + BasicBlock::iterator ScanIt, + BasicBlock *BB, + Instruction *QueryInst, + unsigned *Limit, + BatchAAResults &BatchAA); + MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, - Instruction *QueryInst, unsigned *Limit); + Instruction *QueryInst, unsigned *Limit, + BatchAAResults &BatchAA); /// This analysis looks for other loads and stores with invariant.group /// metadata and the same pointer operand. Returns Unknown if it does not @@ -468,6 +468,14 @@ public: /// Release memory in caches. void releaseMemory(); + /// Return the clobber offset to dependent instruction. + Optional<int32_t> getClobberOffset(LoadInst *DepInst) const { + const auto Off = ClobberOffsets.find(DepInst); + if (Off != ClobberOffsets.end()) + return Off->getSecond(); + return None; + } + private: MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt, @@ -480,12 +488,13 @@ private: DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock = false, bool IsIncomplete = false); - MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst, + MemDepResult getNonLocalInfoForBlock(Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, - unsigned NumSortedEntries); + unsigned NumSortedEntries, + BatchAAResults &BatchAA); - void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P); + void removeCachedNonLocalPointerDependencies(ValueIsLoadPair P); void verifyRemoved(Instruction *Inst) const; }; diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 63c031b1921f..f40b99968fd3 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -288,7 +288,7 @@ protected: DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB, unsigned NumOperands) : MemoryAccess(C, Vty, DeleteValue, BB, NumOperands), - MemoryInstruction(MI), OptimizedAccessAlias(MayAlias) { + MemoryInstruction(MI), OptimizedAccessAlias(AliasResult::MayAlias) { setDefiningAccess(DMA); } @@ -299,8 +299,9 @@ protected: OptimizedAccessAlias = AR; } - void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false, - Optional<AliasResult> AR = MayAlias) { + void setDefiningAccess( + MemoryAccess *DMA, bool Optimized = false, + Optional<AliasResult> AR = AliasResult(AliasResult::MayAlias)) { if (!Optimized) { setOperand(0, DMA); return; @@ -328,7 +329,8 @@ public: /*NumOperands=*/1) {} // allocate space for exactly one operand - void *operator new(size_t s) { return User::operator new(s, 1); } + void *operator new(size_t S) { return User::operator new(S, 1); } + void operator delete(void *Ptr) { User::operator delete(Ptr); } static bool classof(const Value *MA) { return MA->getValueID() == MemoryUseVal; @@ -388,7 +390,8 @@ public: ID(Ver) {} // allocate space for exactly two operands - void *operator new(size_t s) { return User::operator new(s, 2); } + void *operator new(size_t S) { return User::operator new(S, 2); } + void operator delete(void *Ptr) { User::operator delete(Ptr); } static bool classof(const Value *MA) { return MA->getValueID() == MemoryDefVal; @@ -483,9 +486,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess) /// issue. class MemoryPhi final : public MemoryAccess { // allocate space for exactly zero operands - void *operator new(size_t s) { return User::operator new(s); } + void *operator new(size_t S) { return User::operator new(S); } public: + void operator delete(void *Ptr) { User::operator delete(Ptr); } + /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); @@ -1099,7 +1104,7 @@ public: return MP->getIncomingBlock(ArgNo); } - typename BaseT::iterator::pointer operator*() const { + typename std::iterator_traits<BaseT>::pointer operator*() const { assert(Access && "Tried to access past the end of our iterator"); // Go to the first argument for phis, and the defining access for everything // else. @@ -1195,7 +1200,7 @@ public: return DefIterator == Other.DefIterator; } - BaseT::iterator::reference operator*() const { + typename std::iterator_traits<BaseT>::reference operator*() const { assert(DefIterator != OriginalAccess->defs_end() && "Tried to access past the end of our iterator"); return CurrentPair; diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h index b0bf2e5ead62..659e6aff6e28 100644 --- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h +++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h @@ -240,11 +240,6 @@ public: /// successors. void changeToUnreachable(const Instruction *I); - /// Conditional branch BI is changed or replaced with an unconditional branch - /// to `To`. Update Phis in BI's successors to remove BI's BB. - void changeCondBranchToUnconditionalTo(const BranchInst *BI, - const BasicBlock *To); - /// Get handle on MemorySSA. MemorySSA* getMemorySSA() const { return MSSA; } diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h index 16c5f6701da0..62bdade95d96 100644 --- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -19,8 +19,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H -#define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H +#ifndef LLVM_ANALYSIS_OBJCARCANALYSISUTILS_H +#define LLVM_ANALYSIS_OBJCARCANALYSISUTILS_H #include "llvm/ADT/Optional.h" #include "llvm/Analysis/ObjCARCInstKind.h" diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h new file mode 100644 index 000000000000..2566bfbcf61c --- /dev/null +++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h @@ -0,0 +1,59 @@ +//===- ObjCARCUtil.h - ObjC ARC Utility Functions ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines ARC utility functions which are used by various parts of +/// the compiler. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_OBJCARCUTIL_H +#define LLVM_IR_OBJCARCUTIL_H + +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/LLVMContext.h" + +namespace llvm { +namespace objcarc { + +inline const char *getRVMarkerModuleFlagStr() { + return "clang.arc.retainAutoreleasedReturnValueMarker"; +} + +enum AttachedCallOperandBundle : unsigned { RVOB_Retain, RVOB_Claim }; + +inline AttachedCallOperandBundle +getAttachedCallOperandBundleEnum(bool IsRetain) { + return IsRetain ? RVOB_Retain : RVOB_Claim; +} + +inline bool hasAttachedCallOpBundle(const CallBase *CB) { + // Ignore the bundle if the return type is void. Global optimization passes + // can turn the called function's return type to void. That should happen only + // if the call doesn't return and the call to @llvm.objc.clang.arc.noop.use + // no longer consumes the function return or is deleted. In that case, it's + // not necessary to emit the marker instruction or calls to the ARC runtime + // functions. + return !CB->getFunctionType()->getReturnType()->isVoidTy() && + CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall) + .hasValue(); +} + +inline bool hasAttachedCallOpBundle(const CallBase *CB, bool IsRetain) { + assert(hasAttachedCallOpBundle(CB) && + "call doesn't have operand bundle clang_arc_attachedcall"); + auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall); + if (!B.hasValue()) + return false; + return cast<ConstantInt>(B->Inputs[0])->getZExtValue() == + getAttachedCallOperandBundleEnum(IsRetain); +} + +} // end namespace objcarc +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h b/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h index 9815dd05cd1c..ff706e91f3c4 100644 --- a/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h +++ b/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h @@ -11,8 +11,8 @@ // used to compute the "hotness" of the diagnostic message. //===----------------------------------------------------------------------===// -#ifndef LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H -#define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H +#ifndef LLVM_ANALYSIS_OPTIMIZATIONREMARKEMITTER_H +#define LLVM_ANALYSIS_OPTIMIZATIONREMARKEMITTER_H #include "llvm/ADT/Optional.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -61,6 +61,12 @@ public: bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv); + /// Return true iff at least *some* remarks are enabled. + bool enabled() const { + return F->getContext().getLLVMRemarkStreamer() || + F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(); + } + /// Output the remark via the diagnostic handler and to the /// optimization record file. void emit(DiagnosticInfoOptimizationBase &OptDiag); @@ -73,9 +79,11 @@ public: // remarks enabled. We can't currently check whether remarks are requested // for the calling pass since that requires actually building the remark. - if (F->getContext().getLLVMRemarkStreamer() || - F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) { + if (enabled()) { auto R = RemarkBuilder(); + static_assert( + std::is_base_of<DiagnosticInfoOptimizationBase, decltype(R)>::value, + "the lambda passed to emit() must return a remark"); emit((DiagnosticInfoOptimizationBase &)R); } } @@ -166,4 +174,4 @@ public: Result run(Function &F, FunctionAnalysisManager &AM); }; } -#endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H +#endif // LLVM_ANALYSIS_OPTIMIZATIONREMARKEMITTER_H diff --git a/llvm/include/llvm/Analysis/OverflowInstAnalysis.h b/llvm/include/llvm/Analysis/OverflowInstAnalysis.h new file mode 100644 index 000000000000..7523fb9392cd --- /dev/null +++ b/llvm/include/llvm/Analysis/OverflowInstAnalysis.h @@ -0,0 +1,45 @@ +//===-- OverflowInstAnalysis.h - Utils to fold overflow insts ----*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file holds routines to help analyse overflow instructions +// and fold them into constants or other overflow instructions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_OVERFLOWINSTANALYSIS_H +#define LLVM_ANALYSIS_OVERFLOWINSTANALYSIS_H + +#include "llvm/IR/InstrTypes.h" + +namespace llvm { +class Value; +class Use; + +/// Match one of the patterns up to the select/logic op: +/// %Op0 = icmp ne i4 %X, 0 +/// %Agg = call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %Y) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +/// %ret = select i1 %Op0, i1 %Op1, i1 false / %ret = and i1 %Op0, %Op1 +/// +/// %Op0 = icmp eq i4 %X, 0 +/// %Agg = call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %Y) +/// %NotOp1 = extractvalue { i4, i1 } %Agg, 1 +/// %Op1 = xor i1 %NotOp1, true +/// %ret = select i1 %Op0, i1 true, i1 %Op1 / %ret = or i1 %Op0, %Op1 +/// +/// Callers are expected to align that with the operands of the select/logic. +/// IsAnd is set to true if the Op0 and Op1 are used as the first pattern. +/// If Op0 and Op1 match one of the patterns above, return true and fill Y's +/// use. + +bool isCheckForZeroAndMulWithOverflow(Value *Op0, Value *Op1, bool IsAnd, + Use *&Y); +bool isCheckForZeroAndMulWithOverflow(Value *Op0, Value *Op1, bool IsAnd); +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index a4e6ffc3dd58..c95404d96f4e 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_PROFILE_SUMMARY_INFO_H -#define LLVM_ANALYSIS_PROFILE_SUMMARY_INFO_H +#ifndef LLVM_ANALYSIS_PROFILESUMMARYINFO_H +#define LLVM_ANALYSIS_PROFILESUMMARYINFO_H #include "llvm/ADT/DenseMap.h" #include "llvm/IR/PassManager.h" @@ -38,7 +38,7 @@ class Function; // units. This would require making this depend on BFI. class ProfileSummaryInfo { private: - const Module &M; + const Module *M; std::unique_ptr<ProfileSummary> Summary; void computeThresholds(); // Count thresholds to answer isHotCount and isColdCount queries. @@ -58,8 +58,7 @@ private: mutable DenseMap<int, uint64_t> ThresholdCache; public: - ProfileSummaryInfo(const Module &M) : M(M) { refresh(); } - + ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); } ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default; /// If no summary is present, attempt to refresh. diff --git a/llvm/include/llvm/Analysis/RegionIterator.h b/llvm/include/llvm/Analysis/RegionIterator.h index 72bc5bbcb506..fecb28725dcc 100644 --- a/llvm/include/llvm/Analysis/RegionIterator.h +++ b/llvm/include/llvm/Analysis/RegionIterator.h @@ -35,10 +35,15 @@ class BasicBlock; /// /// For a subregion RegionNode there is just one successor. The RegionNode /// representing the exit of the subregion. -template <class NodeRef, class BlockT, class RegionT> -class RNSuccIterator - : public std::iterator<std::forward_iterator_tag, NodeRef> { - using super = std::iterator<std::forward_iterator_tag, NodeRef>; +template <class NodeRef, class BlockT, class RegionT> class RNSuccIterator { +public: + using iterator_category = std::forward_iterator_tag; + using value_type = NodeRef; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + +private: using BlockTraits = GraphTraits<BlockT *>; using SuccIterTy = typename BlockTraits::ChildIteratorType; @@ -99,7 +104,6 @@ class RNSuccIterator public: using Self = RNSuccIterator<NodeRef, BlockT, RegionT>; - using value_type = typename super::value_type; /// Create begin iterator of a RegionNode. inline RNSuccIterator(NodeRef node) @@ -163,9 +167,7 @@ public: /// are contained in the Region and its subregions. This is close to a virtual /// control flow graph of the Region. template <class NodeRef, class BlockT, class RegionT> -class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT> - : public std::iterator<std::forward_iterator_tag, NodeRef> { - using super = std::iterator<std::forward_iterator_tag, NodeRef>; +class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT> { using BlockTraits = GraphTraits<BlockT *>; using SuccIterTy = typename BlockTraits::ChildIteratorType; @@ -173,8 +175,13 @@ class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT> SuccIterTy Itor; public: + using iterator_category = std::forward_iterator_tag; + using value_type = NodeRef; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + using Self = RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>; - using value_type = typename super::value_type; /// Create the iterator from a RegionNode. /// diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index b3f199de2cfa..ae9c73fede96 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -505,6 +505,17 @@ public: /// Erase Value from ValueExprMap and ExprValueMap. void eraseValueFromMap(Value *V); + /// Is operation \p BinOp between \p LHS and \p RHS provably does not have + /// a signed/unsigned overflow (\p Signed)? + bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, + const SCEV *LHS, const SCEV *RHS); + + /// Parse NSW/NUW flags from add/sub/mul IR binary operation \p Op into + /// SCEV no-wrap flags, and deduce flag[s] that aren't known yet. + /// Does not mutate the original instruction. + std::pair<SCEV::NoWrapFlags, bool /*Deduced*/> + getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO); + /// Return a SCEV expression for the full generality of the specified /// expression. const SCEV *getSCEV(Value *V); @@ -512,7 +523,8 @@ public: const SCEV *getConstant(ConstantInt *V); const SCEV *getConstant(const APInt &Val); const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); - const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getLosslessPtrToIntExpr(const SCEV *Op, unsigned Depth = 0); + const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty); const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); @@ -575,7 +587,6 @@ public: const SCEV *getGEPExpr(GEPOperator *GEP, const SmallVectorImpl<const SCEV *> &IndexExprs); const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW); - const SCEV *getSignumExpr(const SCEV *Op); const SCEV *getMinMaxExpr(SCEVTypes Kind, SmallVectorImpl<const SCEV *> &Operands); const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); @@ -622,10 +633,26 @@ public: const SCEV *getNotSCEV(const SCEV *V); /// Return LHS-RHS. Minus is represented in SCEV as A+B*-1. + /// + /// If the LHS and RHS are pointers which don't share a common base + /// (according to getPointerBase()), this returns a SCEVCouldNotCompute. + /// To compute the difference between two unrelated pointers, you can + /// explicitly convert the arguments using getPtrToIntExpr(), for pointer + /// types that support it. const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, unsigned Depth = 0); + /// Compute ceil(N / D). N and D are treated as unsigned values. + /// + /// Since SCEV doesn't have native ceiling division, this generates a + /// SCEV expression of the following form: + /// + /// umin(N, 1) + floor((N - umin(N, 1)) / D) + /// + /// A denominator of zero or poison is handled the same way as getUDivExpr(). + const SCEV *getUDivCeilSCEV(const SCEV *N, const SCEV *D); + /// Return a SCEV corresponding to a conversion of the input value to the /// specified type. If the type must be extended, it is zero extended. const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty, @@ -705,17 +732,25 @@ public: bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); - /// Returns the maximum trip count of the loop if it is a single-exit - /// loop and we can compute a small maximum for that loop. - /// - /// Implemented in terms of the \c getSmallConstantTripCount overload with - /// the single exiting block passed to it. See that routine for details. + /// Convert from an "exit count" (i.e. "backedge taken count") to a "trip + /// count". A "trip count" is the number of times the header of the loop + /// will execute if an exit is taken after the specified number of backedges + /// have been taken. (e.g. TripCount = ExitCount + 1) A zero result + /// must be interpreted as a loop having an unknown trip count. + const SCEV *getTripCountFromExitCount(const SCEV *ExitCount); + + /// Returns the exact trip count of the loop if we can compute it, and + /// the result is a small constant. '0' is used to represent an unknown + /// or non-constant trip count. Note that a trip count is simply one more + /// than the backedge taken count for the loop. unsigned getSmallConstantTripCount(const Loop *L); - /// Returns the maximum trip count of this loop as a normal unsigned - /// value. Returns 0 if the trip count is unknown or not constant. This - /// "trip count" assumes that control exits via ExitingBlock. More - /// precisely, it is the number of times that control may reach ExitingBlock + /// Return the exact trip count for this loop if we exit through ExitingBlock. + /// '0' is used to represent an unknown or non-constant trip count. Note + /// that a trip count is simply one more than the backedge taken count for + /// the same exit. + /// This "trip count" assumes that control exits via ExitingBlock. More + /// precisely, it is the number of times that control will reach ExitingBlock /// before taking the branch. For loops with multiple exits, it may not be /// the number times that the loop header executes if the loop exits /// prematurely via another branch. @@ -727,12 +762,18 @@ public: /// Returns 0 if the trip count is unknown or not constant. unsigned getSmallConstantMaxTripCount(const Loop *L); + /// Returns the largest constant divisor of the trip count as a normal + /// unsigned value, if possible. This means that the actual trip count is + /// always a multiple of the returned value. Returns 1 if the trip count is + /// unknown or not guaranteed to be the multiple of a constant., Will also + /// return 1 if the trip count is very large (>= 2^32). + /// Note that the argument is an exit count for loop L, NOT a trip count. + unsigned getSmallConstantTripMultiple(const Loop *L, + const SCEV *ExitCount); + /// Returns the largest constant divisor of the trip count of the - /// loop if it is a single-exit loop and we can compute a small maximum for - /// that loop. - /// - /// Implemented in terms of the \c getSmallConstantTripMultiple overload with - /// the single exiting block passed to it. See that routine for details. + /// loop. Will return 1 if no trip count could be computed, or if a + /// divisor could not be found. unsigned getSmallConstantTripMultiple(const Loop *L); /// Returns the largest constant divisor of the trip count of this loop as a @@ -938,11 +979,24 @@ public: bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// Check whether the condition described by Pred, LHS, and RHS is true or + /// false. If we know it, return the evaluation of this condition. If neither + /// is proved, return None. + Optional<bool> evaluatePredicate(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS); + /// Test if the given expression is known to satisfy the condition described /// by Pred, LHS, and RHS in the given Context. bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *Context); + /// Check whether the condition described by Pred, LHS, and RHS is true or + /// false in the given \p Context. If we know it, return the evaluation of + /// this condition. If neither is proved, return None. + Optional<bool> evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, + const Instruction *Context); + /// Test if the condition described by Pred, LHS, RHS is known to be true on /// every iteration of the loop of the recurrency LHS. bool isKnownOnEveryIteration(ICmpInst::Predicate Pred, @@ -1177,6 +1231,9 @@ public: /// sharpen it. void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags); + /// Try to apply information from loop guards for \p L to \p Expr. + const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1225,7 +1282,8 @@ private: /// The type for ExprValueMap. using ValueOffsetPair = std::pair<Value *, ConstantInt *>; - using ExprValueMapType = DenseMap<const SCEV *, SetVector<ValueOffsetPair>>; + using ValueOffsetPairSetVector = SmallSetVector<ValueOffsetPair, 4>; + using ExprValueMapType = DenseMap<const SCEV *, ValueOffsetPairSetVector>; /// ExprValueMap -- This map records the original values from which /// the SCEV expr is generated from. @@ -1277,7 +1335,7 @@ private: DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache; /// Return the Value set from which the SCEV expr is generated. - SetVector<ValueOffsetPair> *getSCEVValues(const SCEV *S); + ValueOffsetPairSetVector *getSCEVValues(const SCEV *S); /// Private helper method for the GetMinTrailingZeros method uint32_t GetMinTrailingZerosImpl(const SCEV *S); @@ -1324,8 +1382,6 @@ private: !isa<SCEVCouldNotCompute>(MaxNotTaken); } - bool hasOperand(const SCEV *S) const; - /// Test whether this ExitLimit contains all information. bool hasFullInfo() const { return !isa<SCEVCouldNotCompute>(ExactNotTaken); @@ -1376,6 +1432,9 @@ private: /// True iff the backedge is taken either exactly Max or zero times. bool MaxOrZero = false; + /// SCEV expressions used in any of the ExitNotTakenInfo counts. + SmallPtrSet<const SCEV *, 4> Operands; + bool isComplete() const { return IsComplete; } const SCEV *getConstantMax() const { return ConstantMax; } @@ -1444,10 +1503,7 @@ private: /// Return true if any backedge taken count expressions refer to the given /// subexpression. - bool hasOperand(const SCEV *S, ScalarEvolution *SE) const; - - /// Invalidate this result and free associated memory. - void clear(); + bool hasOperand(const SCEV *S) const; }; /// Cache the backedge-taken count of the loops for this function as they @@ -1502,6 +1558,10 @@ private: return getLoopProperties(L).HasNoAbnormalExits; } + /// Return true if this loop is finite by assumption. That is, + /// to be infinite, it must also be undefined. + bool loopIsFiniteByAssumption(const Loop *L); + /// Compute a LoopDisposition value. LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L); @@ -1558,6 +1618,12 @@ private: ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop, const SCEV *MaxBECount, unsigned BitWidth); + /// If the unknown expression U corresponds to a simple recurrence, return + /// a constant range which represents the entire recurrence. Note that + /// *add* recurrences with loop invariant steps aren't represented by + /// SCEVUnknowns and thus don't use this mechanism. + ConstantRange getRangeForUnknownRecurrence(const SCEVUnknown *U); + /// We know that there is no SCEV for the specified value. Analyze the /// expression. const SCEV *createSCEV(Value *V); @@ -1966,11 +2032,6 @@ private: Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI); - /// Compute the backedge taken count knowing the interval difference, the - /// stride and presence of the equality in the comparison. - const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride, - bool Equality); - /// Compute the maximum backedge count based on the range of values /// permitted by Start, End, and Stride. This is for loops of the form /// {Start, +, Stride} LT End. @@ -1983,15 +2044,13 @@ private: /// Verify if an linear IV with positive stride can overflow when in a /// less-than comparison, knowing the invariant term of the comparison, - /// the stride and the knowledge of NSW/NUW flags on the recurrence. - bool doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, - bool NoWrap); + /// the stride. + bool canIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned); /// Verify if an linear IV with negative stride can overflow when in a /// greater-than comparison, knowing the invariant term of the comparison, - /// the stride and the knowledge of NSW/NUW flags on the recurrence. - bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, - bool NoWrap); + /// the stride. + bool canIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned); /// Get add expr already created or create a new one. const SCEV *getOrCreateAddExpr(ArrayRef<const SCEV *> Ops, @@ -2021,9 +2080,6 @@ private: /// Assign A and B to LHS and RHS, respectively. bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); - /// Try to apply information from loop guards for \p L to \p Expr. - const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); - /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in /// `UniqueSCEVs`. /// diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h index 98d53237d4a0..20acb407ead0 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h @@ -33,6 +33,9 @@ public: AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI); + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + private: Value *GetBaseValue(const SCEV *S); }; diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h index 37e675f08afc..c0da311e4e48 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -210,8 +210,6 @@ class Type; return make_range(op_begin(), op_end()); } - Type *getType() const { return getOperand(0)->getType(); } - NoWrapFlags getNoWrapFlags(NoWrapFlags Mask = NoWrapMask) const { return (NoWrapFlags)(SubclassData & Mask); } @@ -293,6 +291,8 @@ class Type; : SCEVCommutativeExpr(ID, scMulExpr, O, N) {} public: + Type *getType() const { return getOperand(0)->getType(); } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { return S->getSCEVType() == scMulExpr; @@ -359,6 +359,7 @@ class Type; : SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {} public: + Type *getType() const { return getStart()->getType(); } const SCEV *getStart() const { return Operands[0]; } const Loop *getLoop() const { return L; } @@ -401,6 +402,11 @@ class Type; /// iteration number. const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const; + /// Return the value of this chain of recurrences at the specified iteration + /// number. Takes an explicit list of operands to represent an AddRec. + static const SCEV *evaluateAtIteration(ArrayRef<const SCEV *> Operands, + const SCEV *It, ScalarEvolution &SE); + /// Return the number of iterations of this loop that produce /// values in the specified constant range. Another way of /// looking at this is that it returns the first iteration number @@ -440,6 +446,8 @@ class Type; } public: + Type *getType() const { return getOperand(0)->getType(); } + static bool classof(const SCEV *S) { return isMinMaxType(S->getSCEVType()); } @@ -895,13 +903,10 @@ class Type; Operands.push_back(visit(Op)); const Loop *L = Expr->getLoop(); - const SCEV *Res = SE.getAddRecExpr(Operands, L, Expr->getNoWrapFlags()); - if (0 == Map.count(L)) - return Res; + return SE.getAddRecExpr(Operands, L, Expr->getNoWrapFlags()); - const SCEVAddRecExpr *Rec = cast<SCEVAddRecExpr>(Res); - return Rec->evaluateAtIteration(Map[L], SE); + return SCEVAddRecExpr::evaluateAtIteration(Operands, Map[L], SE); } private: diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index 81a2533152de..27c58c0afa8a 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -470,8 +470,7 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::Solve() { while (!BBWorkList.empty() || !ValueWorkList.empty()) { // Process the value work list. while (!ValueWorkList.empty()) { - Value *V = ValueWorkList.back(); - ValueWorkList.pop_back(); + Value *V = ValueWorkList.pop_back_val(); LLVM_DEBUG(dbgs() << "\nPopped off V-WL: " << *V << "\n"); diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h index 9838d629e93e..92459ea79ab4 100644 --- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H -#define LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H +#ifndef LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H +#define LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" @@ -89,4 +89,4 @@ private: } // namespace llvm -#endif // LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H +#endif // LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H diff --git a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h index 358f757314ee..f9bac739cee6 100644 --- a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h +++ b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H -#define LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H +#ifndef LLVM_ANALYSIS_SYNTHETICCOUNTSUTILS_H +#define LLVM_ANALYSIS_SYNTHETICCOUNTSUTILS_H #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CallGraph.h" diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index defc95d0062a..ded53617b304 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -329,6 +329,12 @@ TLI_DEFINE_STRING_INTERNAL("__isoc99_scanf") /// int __isoc99_sscanf(const char *s, const char *format, ...) TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_sscanf) TLI_DEFINE_STRING_INTERNAL("__isoc99_sscanf") +/// void __kmpc_alloc_shared(size_t nbyte); +TLI_DEFINE_ENUM_INTERNAL(__kmpc_alloc_shared) +TLI_DEFINE_STRING_INTERNAL("__kmpc_alloc_shared") +/// void __kmpc_free_shared(void *ptr, size_t nbyte); +TLI_DEFINE_ENUM_INTERNAL(__kmpc_free_shared) +TLI_DEFINE_STRING_INTERNAL("__kmpc_free_shared") /// double __log10_finite(double x); TLI_DEFINE_ENUM_INTERNAL(log10_finite) TLI_DEFINE_STRING_INTERNAL("__log10_finite") diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 34a8a1e3407c..22bfeda0efd0 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -28,7 +28,7 @@ class Triple; struct VecDesc { StringRef ScalarFnName; StringRef VectorFnName; - unsigned VectorizationFactor; + ElementCount VectorizationFactor; }; enum LibFunc : unsigned { @@ -52,6 +52,7 @@ class TargetLibraryInfoImpl { llvm::DenseMap<unsigned, std::string> CustomNames; static StringLiteral const StandardNames[NumLibFuncs]; bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param; + unsigned SizeOfInt; enum AvailabilityState { StandardName = 3, // (memset to all ones) @@ -86,11 +87,12 @@ public: /// addVectorizableFunctionsFromVecLib for filling up the tables of /// vectorizable functions. enum VectorLibrary { - NoLibrary, // Don't use any vector library. - Accelerate, // Use Accelerate framework. - LIBMVEC_X86,// GLIBC Vector Math library. - MASSV, // IBM MASS vector library. - SVML // Intel short vector math library. + NoLibrary, // Don't use any vector library. + Accelerate, // Use Accelerate framework. + DarwinLibSystemM, // Use Darwin's libsystem_m. + LIBMVEC_X86, // GLIBC Vector Math library. + MASSV, // IBM MASS vector library. + SVML // Intel short vector math library. }; TargetLibraryInfoImpl(); @@ -152,7 +154,7 @@ public: /// Return true if the function F has a vector equivalent with vectorization /// factor VF. - bool isFunctionVectorizable(StringRef F, unsigned VF) const { + bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const { return !getVectorizedFunction(F, VF).empty(); } @@ -162,19 +164,7 @@ public: /// Return the name of the equivalent of F, vectorized with factor VF. If no /// such mapping exists, return the empty string. - StringRef getVectorizedFunction(StringRef F, unsigned VF) const; - - /// Return true if the function F has a scalar equivalent, and set VF to be - /// the vectorization factor. - bool isFunctionScalarizable(StringRef F, unsigned &VF) const { - return !getScalarizedFunction(F, VF).empty(); - } - - /// Return the name of the equivalent of F, scalarized. If no such mapping - /// exists, return the empty string. - /// - /// Set VF to the vectorization factor. - StringRef getScalarizedFunction(StringRef F, unsigned &VF) const; + StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const; /// Set to true iff i32 parameters to library functions should have signext /// or zeroext attributes if they correspond to C-level int or unsigned int, @@ -200,9 +190,25 @@ public: /// This queries the 'wchar_size' metadata. unsigned getWCharSize(const Module &M) const; + /// Get size of a C-level int or unsigned int, in bits. + unsigned getIntSize() const { + return SizeOfInt; + } + + /// Initialize the C-level size of an integer. + void setIntSize(unsigned Bits) { + SizeOfInt = Bits; + } + /// Returns the largest vectorization factor used in the list of /// vector functions. - unsigned getWidestVF(StringRef ScalarF) const; + void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, + ElementCount &Scalable) const; + + /// Returns true if call site / callee has cdecl-compatible calling + /// conventions. + static bool isCallingConvCCompatible(CallBase *CI); + static bool isCallingConvCCompatible(Function *Callee); }; /// Provides information about what library functions are available for @@ -317,13 +323,13 @@ public: bool has(LibFunc F) const { return getState(F) != TargetLibraryInfoImpl::Unavailable; } - bool isFunctionVectorizable(StringRef F, unsigned VF) const { + bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const { return Impl->isFunctionVectorizable(F, VF); } bool isFunctionVectorizable(StringRef F) const { return Impl->isFunctionVectorizable(F); } - StringRef getVectorizedFunction(StringRef F, unsigned VF) const { + StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const { return Impl->getVectorizedFunction(F, VF); } @@ -395,6 +401,11 @@ public: return Impl->getWCharSize(M); } + /// \copydoc TargetLibraryInfoImpl::getIntSize() + unsigned getIntSize() const { + return Impl->getIntSize(); + } + /// Handle invalidation from the pass manager. /// /// If we try to invalidate this info, just return false. It cannot become @@ -409,8 +420,9 @@ public: } /// Returns the largest vectorization factor used in the list of /// vector functions. - unsigned getWidestVF(StringRef ScalarF) const { - return Impl->getWidestVF(ScalarF); + void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, + ElementCount &ScalableVF) const { + Impl->getWidestVF(ScalarF, FixedVF, ScalableVF); } /// Check if the function "F" is listed in a library known to LLVM. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index cdfb04424e56..628058142e48 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -21,11 +21,13 @@ #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H +#include "llvm/Analysis/IVDescriptors.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/InstructionCost.h" #include <functional> @@ -59,6 +61,7 @@ class TargetLibraryInfo; class Type; class User; class Value; +class VPIntrinsic; struct KnownBits; template <typename T> class Optional; @@ -118,46 +121,34 @@ class IntrinsicCostAttributes { SmallVector<Type *, 4> ParamTys; SmallVector<const Value *, 4> Arguments; FastMathFlags FMF; - ElementCount VF = ElementCount::getFixed(1); // If ScalarizationCost is UINT_MAX, the cost of scalarizing the // arguments and the return value will be computed based on types. - unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); + InstructionCost ScalarizationCost = InstructionCost::getInvalid(); public: - IntrinsicCostAttributes(const IntrinsicInst &I); + IntrinsicCostAttributes( + Intrinsic::ID Id, const CallBase &CI, + InstructionCost ScalarCost = InstructionCost::getInvalid()); - IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI); - - IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, - ElementCount Factor); - - IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, - ElementCount Factor, unsigned ScalarCost); - - IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, - ArrayRef<Type *> Tys, FastMathFlags Flags); - - IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, - ArrayRef<Type *> Tys, FastMathFlags Flags, - unsigned ScalarCost); - - IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, - ArrayRef<Type *> Tys, FastMathFlags Flags, - unsigned ScalarCost, - const IntrinsicInst *I); - - IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, - ArrayRef<Type *> Tys); + IntrinsicCostAttributes( + Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys, + FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr, + InstructionCost ScalarCost = InstructionCost::getInvalid()); IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args); + IntrinsicCostAttributes( + Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args, + ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(), + const IntrinsicInst *I = nullptr, + InstructionCost ScalarCost = InstructionCost::getInvalid()); + Intrinsic::ID getID() const { return IID; } const IntrinsicInst *getInst() const { return II; } Type *getReturnType() const { return RetTy; } - ElementCount getVectorFactor() const { return VF; } FastMathFlags getFlags() const { return FMF; } - unsigned getScalarizationCost() const { return ScalarizationCost; } + InstructionCost getScalarizationCost() const { return ScalarizationCost; } const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; } const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; } @@ -165,9 +156,7 @@ public: return Arguments.empty(); } - bool skipScalarizationCost() const { - return ScalarizationCost != std::numeric_limits<unsigned>::max(); - } + bool skipScalarizationCost() const { return ScalarizationCost.isValid(); } }; class TargetTransformInfo; @@ -247,8 +236,6 @@ public: Cost = getUserCost(I, kind); break; } - if (Cost == -1) - Cost.setInvalid(); return Cost; } @@ -277,9 +264,10 @@ public: }; /// Estimate the cost of a GEP operation when lowered. - int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands, - TargetCostKind CostKind = TCK_SizeAndLatency) const; + InstructionCost + getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands, + TargetCostKind CostKind = TCK_SizeAndLatency) const; /// \returns A value by which our inlining threshold should be multiplied. /// This is primarily used to bump up the inlining threshold wholesale on @@ -306,7 +294,7 @@ public: /// \return the expected cost of a memcpy, which could e.g. depend on the /// source/destination type and alignment and the number of bytes copied. - int getMemcpyCost(const Instruction *I) const; + InstructionCost getMemcpyCost(const Instruction *I) const; /// \return The estimated number of case clusters when lowering \p 'SI'. /// \p JTSize Set a jump table size only when \p SI is suitable for a jump @@ -329,16 +317,20 @@ public: /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - int getUserCost(const User *U, ArrayRef<const Value *> Operands, - TargetCostKind CostKind) const; + InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands, + TargetCostKind CostKind) const; /// This is a helper function which calls the two-argument getUserCost /// with \p Operands which are the current operands U has. - int getUserCost(const User *U, TargetCostKind CostKind) const { + InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const { SmallVector<const Value *, 4> Operands(U->operand_values()); return getUserCost(U, Operands, CostKind); } + /// If a branch or a select condition is skewed in one direction by more than + /// this factor, it is very likely to be predicted correctly. + BranchProbability getPredictableBranchThreshold() const; + /// Return true if branch divergence exists. /// /// Branch divergence has a significantly negative impact on GPU performance @@ -638,13 +630,15 @@ public: DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const; - /// \return True is LSR should make efforts to create/preserve post-inc - /// addressing mode expressions. - bool shouldFavorPostInc() const; + enum AddressingModeKind { + AMK_PreIndexed, + AMK_PostIndexed, + AMK_None + }; - /// Return true if LSR should make efforts to generate indexed addressing - /// modes that operate across loop iterations. - bool shouldFavorBackedgeIndex(const Loop *L) const; + /// Return the preferred addressing mode LSR should make efforts to generate. + AddressingModeKind getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const; /// Return true if the target supports masked store. bool isLegalMaskedStore(Type *DataType, Align Alignment) const; @@ -689,9 +683,10 @@ public: /// If the AM is supported, the return value must be >= 0. /// If the AM is not supported, it returns a negative value. /// TODO: Handle pre/postinc as well. - int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale, - unsigned AddrSpace = 0) const; + InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale, + unsigned AddrSpace = 0) const; /// Return true if the loop strength reduce pass should make /// Instruction* based TTI queries to isLegalAddressingMode(). This is @@ -714,7 +709,7 @@ public: bool isTypeLegal(Type *Ty) const; /// Returns the estimated number of registers required to represent \p Ty. - unsigned getRegUsageForType(Type *Ty) const; + InstructionCost getRegUsageForType(Type *Ty) const; /// Return true if switches should be turned into lookup tables for the /// target. @@ -724,6 +719,9 @@ public: /// containing this constant value for the target. bool shouldBuildLookupTablesForConstant(Constant *C) const; + /// Return true if lookup tables should be turned into relative lookup tables. + bool shouldBuildRelLookupTables() const; + /// Return true if the input function which is cold at all call sites, /// should use coldcc calling convention. bool useColdCCForColdCall(Function &F) const; @@ -731,14 +729,15 @@ public: /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or /// extracted from vectors. - unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, - bool Insert, bool Extract) const; + InstructionCost getScalarizationOverhead(VectorType *Ty, + const APInt &DemandedElts, + bool Insert, bool Extract) const; /// Estimate the overhead of scalarizing an instructions unique - /// non-constant operands. The types of the arguments are ordinarily - /// scalar, in which case the costs are multiplied with VF. - unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, - unsigned VF) const; + /// non-constant operands. The (potentially vector) types to use for each of + /// argument are passes via Tys. + InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + ArrayRef<Type *> Tys) const; /// If target has efficient vector element load/store instructions, it can /// return true here so that insertion/extraction costs are not added to @@ -798,7 +797,7 @@ public: /// Determine if the target supports unaligned memory accesses. bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace = 0, - unsigned Alignment = 1, + Align Alignment = Align(1), bool *Fast = nullptr) const; /// Return hardware support for population count. @@ -815,20 +814,23 @@ public: /// Return the expected cost of supporting the floating point operation /// of the specified type. - int getFPOpCost(Type *Ty) const; + InstructionCost getFPOpCost(Type *Ty) const; /// Return the expected cost of materializing for the given integer /// immediate of the specified type. - int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const; + InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, + TargetCostKind CostKind) const; /// Return the expected cost of materialization for the given integer /// immediate of the specified type for a given instruction. The cost can be /// zero if the immediate can be folded into the specified instruction. - int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, - TargetCostKind CostKind, - Instruction *Inst = nullptr) const; - int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty, TargetCostKind CostKind) const; + InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, + const APInt &Imm, Type *Ty, + TargetCostKind CostKind, + Instruction *Inst = nullptr) const; + InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TargetCostKind CostKind) const; /// Return the expected cost for the given integer when optimising /// for size. This is different than the other integer immediate cost @@ -837,8 +839,8 @@ public: /// with another such as Thumb. This return value is used as a penalty when /// the total costs for a constant is calculated (the bigger the cost, the /// more beneficial constant hoisting is). - int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) const; + InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, + const APInt &Imm, Type *Ty) const; /// @} /// \name Vector Target Information @@ -856,44 +858,13 @@ public: SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset. SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one ///< with any shuffle mask. - SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any + SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any ///< shuffle mask. + SK_Splice ///< Concatenates elements from the first input vector + ///< with elements of the second input vector. Returning + ///< a vector of the same type as the input vectors. }; - /// Kind of the reduction data. - enum ReductionKind { - RK_None, /// Not a reduction. - RK_Arithmetic, /// Binary reduction data. - RK_MinMax, /// Min/max reduction data. - RK_UnsignedMinMax, /// Unsigned min/max reduction data. - }; - - /// Contains opcode + LHS/RHS parts of the reduction operations. - struct ReductionData { - ReductionData() = delete; - ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) - : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { - assert(Kind != RK_None && "expected binary or min/max reduction only."); - } - unsigned Opcode = 0; - Value *LHS = nullptr; - Value *RHS = nullptr; - ReductionKind Kind = RK_None; - bool hasSameData(ReductionData &RD) const { - return Kind == RD.Kind && Opcode == RD.Opcode; - } - }; - - static ReductionKind matchPairwiseReduction( - const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty); - - static ReductionKind matchVectorSplittingReduction( - const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty); - - static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, VectorType *&Ty, - bool &IsPairwise); - /// Additional information about an operand's possible values. enum OperandValueKind { OK_AnyValue, // Operand can have any value. @@ -924,8 +895,10 @@ public: /// \return the target-provided register class name const char *getRegisterClassName(unsigned ClassID) const; + enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector }; + /// \return The width of the largest scalar or vector register type. - unsigned getRegisterBitWidth(bool Vector) const; + TypeSize getRegisterBitWidth(RegisterKind K) const; /// \return The width of the smallest vector register type. unsigned getMinVectorRegisterBitWidth() const; @@ -940,12 +913,13 @@ public: /// creating vectors that span multiple vector registers. /// If false, the vectorization factor will be chosen based on the /// size of the widest element type. - bool shouldMaximizeVectorBandwidth(bool OptSize) const; + bool shouldMaximizeVectorBandwidth() const; /// \return The minimum vectorization factor for types of given element /// bit width, or 0 if there is no minimum VF. The returned value only /// applies when shouldMaximizeVectorBandwidth returns true. - unsigned getMinimumVF(unsigned ElemWidth) const; + /// If IsScalable is true, the returned ElementCount must be a scalable VF. + ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const; /// \return The maximum vectorization factor for types of given element /// bit width and opcode, or 0 if there is no maximum VF. @@ -1036,7 +1010,7 @@ public: /// cases or optimizations based on those values. /// \p CxtI is the optional original context instruction, if one exists, to /// provide even more information. - int getArithmeticInstrCost( + InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, OperandValueKind Opd1Info = OK_AnyValue, @@ -1047,12 +1021,14 @@ public: const Instruction *CxtI = nullptr) const; /// \return The cost of a shuffle instruction of kind Kind and of type Tp. + /// The exact mask may be passed as Mask, or else the array will be empty. /// The index and subtype parameters are used by the subvector insertion and /// extraction shuffle kinds to show the insert/extract point and the type of /// the subvector being inserted/extracted. /// NOTE: For subvector extractions Tp represents the source type. - int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0, - VectorType *SubTp = nullptr) const; + InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, + ArrayRef<int> Mask = None, int Index = 0, + VectorType *SubTp = nullptr) const; /// Represents a hint about the context in which a cast is used. /// @@ -1093,44 +1069,50 @@ public: /// \return The expected cost of cast instructions, such as bitcast, trunc, /// zext, etc. If there is an existing instruction that holds Opcode, it /// may be passed in the 'I' parameter. - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, - TTI::CastContextHint CCH, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, - const Instruction *I = nullptr) const; + InstructionCost + getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, + const Instruction *I = nullptr) const; /// \return The expected cost of a sign- or zero-extended vector extract. Use /// -1 to indicate that there is no information about the index value. - int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, - unsigned Index = -1) const; + InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, + VectorType *VecTy, + unsigned Index = -1) const; /// \return The expected cost of control-flow related instructions such as - /// Phi, Ret, Br. - int getCFInstrCost(unsigned Opcode, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; + /// Phi, Ret, Br, Switch. + InstructionCost + getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, + const Instruction *I = nullptr) const; /// \returns The expected cost of compare and select instructions. If there /// is an existing instruction that holds Opcode, it may be passed in the /// 'I' parameter. The \p VecPred parameter can be used to indicate the select /// is using a compare with the specified predicate as condition. When vector /// types are passed, \p VecPred must be used for all lanes. - int getCmpSelInstrCost( - unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, - CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr) const; + InstructionCost + getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, + CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. /// Use -1 to indicate that there is no information on the index value. - int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index = -1) const; /// \return The cost of Load and Store instructions. - int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr) const; + InstructionCost + getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \return The cost of masked Load and Store instructions. - int getMaskedMemoryOpCost( + InstructionCost getMaskedMemoryOpCost( unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; @@ -1143,7 +1125,7 @@ public: /// \p Alignment - alignment of single element /// \p I - the optional original context instruction, if one exists, e.g. the /// load/store to transform or the call to the gather/scatter intrinsic - int getGatherScatterOpCost( + InstructionCost getGatherScatterOpCost( unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr) const; @@ -1158,32 +1140,49 @@ public: /// \p AddressSpace is address space of the pointer. /// \p UseMaskForCond indicates if the memory access is predicated. /// \p UseMaskForGaps indicates if gaps should be masked. - int getInterleavedMemoryOpCost( + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, bool UseMaskForCond = false, bool UseMaskForGaps = false) const; - /// Calculate the cost of performing a vector reduction. + /// A helper function to determine the type of reduction algorithm used + /// for a given \p Opcode and set of FastMathFlags \p FMF. + static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) { + return FMF != None && !(*FMF).allowReassoc(); + } + + /// Calculate the cost of vector reduction intrinsics. /// /// This is the cost of reducing the vector value of type \p Ty to a scalar - /// value using the operation denoted by \p Opcode. The form of the reduction - /// can either be a pairwise reduction or a reduction that splits the vector - /// at every reduction level. + /// value using the operation denoted by \p Opcode. The FastMathFlags + /// parameter \p FMF indicates what type of reduction we are performing: + /// 1. Tree-wise. This is the typical 'fast' reduction performed that + /// involves successively splitting a vector into half and doing the + /// operation on the pair of halves until you have a scalar value. For + /// example: + /// (v0, v1, v2, v3) + /// ((v0+v2), (v1+v3), undef, undef) + /// ((v0+v2+v1+v3), undef, undef, undef) + /// This is the default behaviour for integer operations, whereas for + /// floating point we only do this if \p FMF indicates that + /// reassociation is allowed. + /// 2. Ordered. For a vector with N elements this involves performing N + /// operations in lane order, starting with an initial scalar value, i.e. + /// result = InitVal + v0 + /// result = result + v1 + /// result = result + v2 + /// result = result + v3 + /// This is only the case for FP operations and when reassociation is not + /// allowed. /// - /// Pairwise: - /// (v0, v1, v2, v3) - /// ((v0+v1), (v2+v3), undef, undef) - /// Split: - /// (v0, v1, v2, v3) - /// ((v0+v2), (v1+v3), undef, undef) - int getArithmeticReductionCost( - unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; - - int getMinMaxReductionCost( - VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + InstructionCost getArithmeticReductionCost( + unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + + InstructionCost getMinMaxReductionCost( + VectorType *Ty, VectorType *CondTy, bool IsUnsigned, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// Calculate the cost of an extended reduction pattern, similar to /// getArithmeticReductionCost of an Add reduction with an extension and @@ -1198,12 +1197,13 @@ public: /// \returns The cost of Intrinsic instructions. Analyses the real arguments. /// Three cases are handled: 1. scalar instruction 2. vector instruction /// 3. scalar instruction which is to be vectorized. - int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, - TTI::TargetCostKind CostKind) const; + InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) const; /// \returns The cost of Call instructions. - int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; + InstructionCost getCallInstrCost( + Function *F, Type *RetTy, ArrayRef<Type *> Tys, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; /// \returns The number of pieces into which the provided type must be /// split during legalization. Zero is returned when the answer is unknown. @@ -1216,15 +1216,16 @@ public: /// The 'SE' parameter holds pointer for the scalar evolution object which /// is used in order to get the Ptr step value in case of constant stride. /// The 'Ptr' parameter holds SCEV of the access pointer. - int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr, - const SCEV *Ptr = nullptr) const; + InstructionCost getAddressComputationCost(Type *Ty, + ScalarEvolution *SE = nullptr, + const SCEV *Ptr = nullptr) const; /// \returns The cost, if any, of keeping values of the given types alive /// over a callsite. /// /// Some types may require the use of register classes that do not have /// any callee-saved registers, so would require a spill and fill. - unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const; + InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const; /// \returns True if the intrinsic is a supported memory intrinsic. Info /// will contain additional information - whether the intrinsic may write @@ -1305,6 +1306,13 @@ public: bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const; + /// \returns True if it is legal to vectorize the given reduction kind. + bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, + ElementCount VF) const; + + /// \returns True if the given type is supported for scalable vectors + bool isElementTypeLegalForScalableVector(Type *Ty) const; + /// \returns The new vector factor value if the target doesn't support \p /// SizeInBytes loads or has a better vector factor. unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, @@ -1325,11 +1333,6 @@ public: bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present. }; - /// \returns True if the target wants to handle the given reduction idiom in - /// the intrinsics form instead of the shuffle form. - bool useReductionIntrinsic(unsigned Opcode, Type *Ty, - ReductionFlags Flags) const; - /// \returns True if the target prefers reductions in loop. bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const; @@ -1366,6 +1369,38 @@ public: /// Intrinsics") Use of %evl is discouraged when that is not the case. bool hasActiveVectorLength() const; + struct VPLegalization { + enum VPTransform { + // keep the predicating parameter + Legal = 0, + // where legal, discard the predicate parameter + Discard = 1, + // transform into something else that is also predicating + Convert = 2 + }; + + // How to transform the EVL parameter. + // Legal: keep the EVL parameter as it is. + // Discard: Ignore the EVL parameter where it is safe to do so. + // Convert: Fold the EVL into the mask parameter. + VPTransform EVLParamStrategy; + + // How to transform the operator. + // Legal: The target supports this operator. + // Convert: Convert this to a non-VP operation. + // The 'Discard' strategy is invalid. + VPTransform OpStrategy; + + bool shouldDoNothing() const { + return (EVLParamStrategy == Legal) && (OpStrategy == Legal); + } + VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy) + : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {} + }; + + /// \returns How the target needs this vector-predicated operation to be + /// transformed. + VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const; /// @} /// @} @@ -1373,11 +1408,11 @@ public: private: /// Estimate the latency of specified instruction. /// Returns 1 as the default value. - int getInstructionLatency(const Instruction *I) const; + InstructionCost getInstructionLatency(const Instruction *I) const; /// Returns the expected throughput cost of the instruction. /// Returns -1 if the cost is unknown. - int getInstructionThroughput(const Instruction *I) const; + InstructionCost getInstructionThroughput(const Instruction *I) const; /// The abstract base class used to type erase specific TTI /// implementations. @@ -1394,19 +1429,21 @@ class TargetTransformInfo::Concept { public: virtual ~Concept() = 0; virtual const DataLayout &getDataLayout() const = 0; - virtual int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands, - TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands, + TTI::TargetCostKind CostKind) = 0; virtual unsigned getInliningThresholdMultiplier() = 0; virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0; virtual int getInlinerVectorBonusPercent() = 0; - virtual int getMemcpyCost(const Instruction *I) = 0; + virtual InstructionCost getMemcpyCost(const Instruction *I) = 0; virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) = 0; - virtual int getUserCost(const User *U, ArrayRef<const Value *> Operands, - TargetCostKind CostKind) = 0; + virtual InstructionCost getUserCost(const User *U, + ArrayRef<const Value *> Operands, + TargetCostKind CostKind) = 0; + virtual BranchProbability getPredictableBranchThreshold() = 0; virtual bool hasBranchDivergence() = 0; virtual bool useGPUDivergenceAnalysis() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; @@ -1458,8 +1495,8 @@ public: virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) = 0; - virtual bool shouldFavorPostInc() const = 0; - virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; + virtual AddressingModeKind + getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0; virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; @@ -1471,24 +1508,27 @@ public: virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; - virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale, unsigned AddrSpace) = 0; + virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, int64_t Scale, + unsigned AddrSpace) = 0; virtual bool LSRWithInstrQueries() = 0; virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool useAA() = 0; virtual bool isTypeLegal(Type *Ty) = 0; - virtual unsigned getRegUsageForType(Type *Ty) = 0; + virtual InstructionCost getRegUsageForType(Type *Ty) = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; + virtual bool shouldBuildRelLookupTables() = 0; virtual bool useColdCCForColdCall(Function &F) = 0; - virtual unsigned getScalarizationOverhead(VectorType *Ty, - const APInt &DemandedElts, - bool Insert, bool Extract) = 0; - virtual unsigned + virtual InstructionCost getScalarizationOverhead(VectorType *Ty, + const APInt &DemandedElts, + bool Insert, + bool Extract) = 0; + virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, - unsigned VF) = 0; + ArrayRef<Type *> Tys) = 0; virtual bool supportsEfficientVectorElementLoadStore() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual MemCmpExpansionOptions @@ -1499,31 +1539,33 @@ public: virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, - unsigned Alignment, + Align Alignment, bool *Fast) = 0; virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0; - virtual int getFPOpCost(Type *Ty) = 0; - virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, - const APInt &Imm, Type *Ty) = 0; - virtual int getIntImmCost(const APInt &Imm, Type *Ty, - TargetCostKind CostKind) = 0; - virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty, TargetCostKind CostKind, - Instruction *Inst = nullptr) = 0; - virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty, - TargetCostKind CostKind) = 0; + virtual InstructionCost getFPOpCost(Type *Ty) = 0; + virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, + const APInt &Imm, Type *Ty) = 0; + virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, + TargetCostKind CostKind) = 0; + virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, + const APInt &Imm, Type *Ty, + TargetCostKind CostKind, + Instruction *Inst = nullptr) = 0; + virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TargetCostKind CostKind) = 0; virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0; virtual const char *getRegisterClassName(unsigned ClassID) const = 0; - virtual unsigned getRegisterBitWidth(bool Vector) const = 0; - virtual unsigned getMinVectorRegisterBitWidth() = 0; + virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0; + virtual unsigned getMinVectorRegisterBitWidth() const = 0; virtual Optional<unsigned> getMaxVScale() const = 0; - virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0; - virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0; + virtual bool shouldMaximizeVectorBandwidth() const = 0; + virtual ElementCount getMinimumVF(unsigned ElemWidth, + bool IsScalable) const = 0; virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; @@ -1556,64 +1598,71 @@ public: virtual bool enableWritePrefetching() const = 0; virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; - virtual unsigned getArithmeticInstrCost( - unsigned Opcode, Type *Ty, - TTI::TargetCostKind CostKind, - OperandValueKind Opd1Info, - OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, - const Instruction *CxtI = nullptr) = 0; - virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, - VectorType *SubTp) = 0; - virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, - CastContextHint CCH, - TTI::TargetCostKind CostKind, - const Instruction *I) = 0; - virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, - VectorType *VecTy, unsigned Index) = 0; - virtual int getCFInstrCost(unsigned Opcode, - TTI::TargetCostKind CostKind) = 0; - virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) = 0; - virtual int getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) = 0; - virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) = 0; - virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind) = 0; - virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, - const Value *Ptr, bool VariableMask, - Align Alignment, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) = 0; + virtual InstructionCost getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + OperandValueKind Opd1Info, OperandValueKind Opd2Info, + OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, + ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0; + virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, + ArrayRef<int> Mask, int Index, + VectorType *SubTp) = 0; + virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src, CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I) = 0; + virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, + VectorType *VecTy, + unsigned Index) = 0; + virtual InstructionCost getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) = 0; + virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const Instruction *I) = 0; + virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) = 0; + virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) = 0; + virtual InstructionCost + getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost + getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, + bool VariableMask, Align Alignment, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) = 0; - virtual int getInterleavedMemoryOpCost( + virtual InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; - virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind) = 0; - virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, bool IsUnsigned, - TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost + getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + Optional<FastMathFlags> FMF, + TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost + getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, + TTI::TargetCostKind CostKind) = 0; virtual InstructionCost getExtendedAddReductionCost( bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0; - virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, - TTI::TargetCostKind CostKind) = 0; - virtual int getCallInstrCost(Function *F, Type *RetTy, - ArrayRef<Type *> Tys, - TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost + getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type *> Tys, + TTI::TargetCostKind CostKind) = 0; virtual unsigned getNumberOfParts(Type *Tp) = 0; - virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, - const SCEV *Ptr) = 0; - virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0; + virtual InstructionCost + getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0; + virtual InstructionCost + getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0; virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) = 0; virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; @@ -1644,14 +1693,15 @@ public: virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const = 0; + virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, + ElementCount VF) const = 0; + virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0; virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const = 0; virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const = 0; - virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, - ReductionFlags) const = 0; virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const = 0; virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, @@ -1660,7 +1710,9 @@ public: virtual unsigned getGISelRematGlobalCost() const = 0; virtual bool supportsScalableVectors() const = 0; virtual bool hasActiveVectorLength() const = 0; - virtual int getInstructionLatency(const Instruction *I) = 0; + virtual InstructionCost getInstructionLatency(const Instruction *I) = 0; + virtual VPLegalization + getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; }; template <typename T> @@ -1675,9 +1727,10 @@ public: return Impl.getDataLayout(); } - int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands, - enum TargetTransformInfo::TargetCostKind CostKind) override { + InstructionCost + getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands, + enum TargetTransformInfo::TargetCostKind CostKind) override { return Impl.getGEPCost(PointeeType, Ptr, Operands); } unsigned getInliningThresholdMultiplier() override { @@ -1689,13 +1742,16 @@ public: int getInlinerVectorBonusPercent() override { return Impl.getInlinerVectorBonusPercent(); } - int getMemcpyCost(const Instruction *I) override { + InstructionCost getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); } - int getUserCost(const User *U, ArrayRef<const Value *> Operands, - TargetCostKind CostKind) override { + InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands, + TargetCostKind CostKind) override { return Impl.getUserCost(U, Operands, CostKind); } + BranchProbability getPredictableBranchThreshold() override { + return Impl.getPredictableBranchThreshold(); + } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } bool useGPUDivergenceAnalysis() override { return Impl.useGPUDivergenceAnalysis(); @@ -1801,9 +1857,10 @@ public: TargetLibraryInfo *LibInfo) override { return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); } - bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); } - bool shouldFavorBackedgeIndex(const Loop *L) const override { - return Impl.shouldFavorBackedgeIndex(L); + AddressingModeKind + getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const override { + return Impl.getPreferredAddressingMode(L, SE); } bool isLegalMaskedStore(Type *DataType, Align Alignment) override { return Impl.isLegalMaskedStore(DataType, Alignment); @@ -1838,9 +1895,10 @@ public: bool prefersVectorizedAddressing() override { return Impl.prefersVectorizedAddressing(); } - int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) override { + InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale, + unsigned AddrSpace) override { return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); } @@ -1853,7 +1911,7 @@ public: } bool useAA() override { return Impl.useAA(); } bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } - unsigned getRegUsageForType(Type *Ty) override { + InstructionCost getRegUsageForType(Type *Ty) override { return Impl.getRegUsageForType(Ty); } bool shouldBuildLookupTables() override { @@ -1862,17 +1920,22 @@ public: bool shouldBuildLookupTablesForConstant(Constant *C) override { return Impl.shouldBuildLookupTablesForConstant(C); } + bool shouldBuildRelLookupTables() override { + return Impl.shouldBuildRelLookupTables(); + } bool useColdCCForColdCall(Function &F) override { return Impl.useColdCCForColdCall(F); } - unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, - bool Insert, bool Extract) override { + InstructionCost getScalarizationOverhead(VectorType *Ty, + const APInt &DemandedElts, + bool Insert, bool Extract) override { return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } - unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, - unsigned VF) override { - return Impl.getOperandsScalarizationOverhead(Args, VF); + InstructionCost + getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + ArrayRef<Type *> Tys) override { + return Impl.getOperandsScalarizationOverhead(Args, Tys); } bool supportsEfficientVectorElementLoadStore() override { @@ -1896,7 +1959,7 @@ public: return Impl.isFPVectorizationPotentiallyUnsafe(); } bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, - unsigned AddressSpace, unsigned Alignment, + unsigned AddressSpace, Align Alignment, bool *Fast) override { return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, Alignment, Fast); @@ -1910,23 +1973,27 @@ public: return Impl.isFCmpOrdCheaperThanFCmpZero(Ty); } - int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } + InstructionCost getFPOpCost(Type *Ty) override { + return Impl.getFPOpCost(Ty); + } - int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) override { + InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, + const APInt &Imm, Type *Ty) override { return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); } - int getIntImmCost(const APInt &Imm, Type *Ty, - TargetCostKind CostKind) override { + InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, + TargetCostKind CostKind) override { return Impl.getIntImmCost(Imm, Ty, CostKind); } - int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, - TargetCostKind CostKind, - Instruction *Inst = nullptr) override { + InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, + const APInt &Imm, Type *Ty, + TargetCostKind CostKind, + Instruction *Inst = nullptr) override { return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst); } - int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty, TargetCostKind CostKind) override { + InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TargetCostKind CostKind) override { return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); } unsigned getNumberOfRegisters(unsigned ClassID) const override { @@ -1939,20 +2006,21 @@ public: const char *getRegisterClassName(unsigned ClassID) const override { return Impl.getRegisterClassName(ClassID); } - unsigned getRegisterBitWidth(bool Vector) const override { - return Impl.getRegisterBitWidth(Vector); + TypeSize getRegisterBitWidth(RegisterKind K) const override { + return Impl.getRegisterBitWidth(K); } - unsigned getMinVectorRegisterBitWidth() override { + unsigned getMinVectorRegisterBitWidth() const override { return Impl.getMinVectorRegisterBitWidth(); } Optional<unsigned> getMaxVScale() const override { return Impl.getMaxVScale(); } - bool shouldMaximizeVectorBandwidth(bool OptSize) const override { - return Impl.shouldMaximizeVectorBandwidth(OptSize); + bool shouldMaximizeVectorBandwidth() const override { + return Impl.shouldMaximizeVectorBandwidth(); } - unsigned getMinimumVF(unsigned ElemWidth) const override { - return Impl.getMinimumVF(ElemWidth); + ElementCount getMinimumVF(unsigned ElemWidth, + bool IsScalable) const override { + return Impl.getMinimumVF(ElemWidth, IsScalable); } unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override { return Impl.getMaximumVF(ElemWidth, Opcode); @@ -2008,82 +2076,84 @@ public: BlockFrequencyInfo *BFI) override { return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); } - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - TTI::TargetCostKind CostKind, - OperandValueKind Opd1Info, - OperandValueKind Opd2Info, - OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo, - ArrayRef<const Value *> Args, - const Instruction *CxtI = nullptr) override { + InstructionCost getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + OperandValueKind Opd1Info, OperandValueKind Opd2Info, + OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, + ArrayRef<const Value *> Args, + const Instruction *CxtI = nullptr) override { return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); } - int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, - VectorType *SubTp) override { - return Impl.getShuffleCost(Kind, Tp, Index, SubTp); + InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, + ArrayRef<int> Mask, int Index, + VectorType *SubTp) override { + return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp); } - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, - CastContextHint CCH, TTI::TargetCostKind CostKind, - const Instruction *I) override { + InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I) override { return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); } - int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, - unsigned Index) override { + InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, + VectorType *VecTy, + unsigned Index) override { return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); } - int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override { - return Impl.getCFInstrCost(Opcode, CostKind); + InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) override { + return Impl.getCFInstrCost(Opcode, CostKind, I); } - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) override { + InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const Instruction *I) override { return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } - int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } - int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, TTI::TargetCostKind CostKind, - const Instruction *I) override { + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) override { return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind, I); } - int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind) override { + InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind) override { return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); } - int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, - bool VariableMask, Align Alignment, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) override { + InstructionCost + getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, + bool VariableMask, Align Alignment, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) override { return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I); } - int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, - ArrayRef<unsigned> Indices, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - bool UseMaskForCond, - bool UseMaskForGaps) override { + InstructionCost getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) override { return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } - int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind) override { - return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, - CostKind); - } - int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, bool IsUnsigned, + InstructionCost + getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + Optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) override { - return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, - CostKind); + return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); + } + InstructionCost + getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, + TTI::TargetCostKind CostKind) override { + return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind); } InstructionCost getExtendedAddReductionCost( bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, @@ -2091,23 +2161,23 @@ public: return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty, CostKind); } - int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, - TTI::TargetCostKind CostKind) override { + InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) override { return Impl.getIntrinsicInstrCost(ICA, CostKind); } - int getCallInstrCost(Function *F, Type *RetTy, - ArrayRef<Type *> Tys, - TTI::TargetCostKind CostKind) override { + InstructionCost getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type *> Tys, + TTI::TargetCostKind CostKind) override { return Impl.getCallInstrCost(F, RetTy, Tys, CostKind); } unsigned getNumberOfParts(Type *Tp) override { return Impl.getNumberOfParts(Tp); } - int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, - const SCEV *Ptr) override { + InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, + const SCEV *Ptr) override { return Impl.getAddressComputationCost(Ty, SE, Ptr); } - unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override { + InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override { return Impl.getCostOfKeepingLiveOverCall(Tys); } bool getTgtMemIntrinsic(IntrinsicInst *Inst, @@ -2170,6 +2240,13 @@ public: return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, AddrSpace); } + bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, + ElementCount VF) const override { + return Impl.isLegalToVectorizeReduction(RdxDesc, VF); + } + bool isElementTypeLegalForScalableVector(Type *Ty) const override { + return Impl.isElementTypeLegalForScalableVector(Ty); + } unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override { @@ -2180,10 +2257,6 @@ public: VectorType *VecTy) const override { return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); } - bool useReductionIntrinsic(unsigned Opcode, Type *Ty, - ReductionFlags Flags) const override { - return Impl.useReductionIntrinsic(Opcode, Ty, Flags); - } bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const override { return Impl.preferInLoopReduction(Opcode, Ty, Flags); @@ -2208,9 +2281,14 @@ public: return Impl.hasActiveVectorLength(); } - int getInstructionLatency(const Instruction *I) override { + InstructionCost getInstructionLatency(const Instruction *I) override { return Impl.getInstructionLatency(I); } + + VPLegalization + getVPLegalizationStrategy(const VPIntrinsic &PI) const override { + return Impl.getVPLegalizationStrategy(PI); + } }; template <typename T> diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 7e31cb365a87..c07a33c9f155 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -22,8 +22,11 @@ #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" +using namespace llvm::PatternMatch; + namespace llvm { /// Base class for use as a mix-in that aids implementing @@ -44,9 +47,10 @@ public: const DataLayout &getDataLayout() const { return DL; } - int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const { + InstructionCost + getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -71,10 +75,20 @@ public: int getInlinerVectorBonusPercent() const { return 150; } - unsigned getMemcpyCost(const Instruction *I) const { + InstructionCost getMemcpyCost(const Instruction *I) const { return TTI::TCC_Expensive; } + // Although this default value is arbitrary, it is not random. It is assumed + // that a condition that evaluates the same way by a higher percentage than + // this is best represented as control flow. Therefore, the default value N + // should be set such that the win from N% correct executions is greater than + // the loss from (100 - N)% mispredicted executions for the majority of + // intended targets. + BranchProbability getPredictableBranchThreshold() const { + return BranchProbability(99, 100); + } + bool hasBranchDivergence() const { return false; } bool useGPUDivergenceAnalysis() const { return false; } @@ -209,9 +223,10 @@ public: return false; } - bool shouldFavorPostInc() const { return false; } - - bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } + TTI::AddressingModeKind + getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { + return TTI::AMK_None; + } bool isLegalMaskedStore(Type *DataType, Align Alignment) const { return false; @@ -255,9 +270,10 @@ public: bool prefersVectorizedAddressing() const { return true; } - int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) const { + InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale, + unsigned AddrSpace) const { // Guess that all legal addressing mode are free. if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace)) @@ -275,20 +291,24 @@ public: bool isTypeLegal(Type *Ty) const { return false; } - unsigned getRegUsageForType(Type *Ty) const { return 1; } + InstructionCost getRegUsageForType(Type *Ty) const { return 1; } bool shouldBuildLookupTables() const { return true; } + bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } + bool shouldBuildRelLookupTables() const { return false; } + bool useColdCCForColdCall(Function &F) const { return false; } - unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, - bool Insert, bool Extract) const { + InstructionCost getScalarizationOverhead(VectorType *Ty, + const APInt &DemandedElts, + bool Insert, bool Extract) const { return 0; } - unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, - unsigned VF) const { + InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + ArrayRef<Type *> Tys) const { return 0; } @@ -310,7 +330,7 @@ public: bool isFPVectorizationPotentiallyUnsafe() const { return false; } bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, - unsigned AddressSpace, unsigned Alignment, + unsigned AddressSpace, Align Alignment, bool *Fast) const { return false; } @@ -323,29 +343,30 @@ public: bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } - unsigned getFPOpCost(Type *Ty) const { + InstructionCost getFPOpCost(Type *Ty) const { return TargetTransformInfo::TCC_Basic; } - int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) const { + InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) const { return 0; } - unsigned getIntImmCost(const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) const { + InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { return TTI::TCC_Basic; } - unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind, - Instruction *Inst = nullptr) const { + InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr) const { return TTI::TCC_Free; } - unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) const { + InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { return TTI::TCC_Free; } @@ -366,15 +387,19 @@ public: } } - unsigned getRegisterBitWidth(bool Vector) const { return 32; } + TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { + return TypeSize::getFixed(32); + } unsigned getMinVectorRegisterBitWidth() const { return 128; } Optional<unsigned> getMaxVScale() const { return None; } - bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } + bool shouldMaximizeVectorBandwidth() const { return false; } - unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } + ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { + return ElementCount::get(0, IsScalable); + } unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } @@ -420,14 +445,12 @@ public: unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; } - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - TTI::TargetCostKind CostKind, - TTI::OperandValueKind Opd1Info, - TTI::OperandValueKind Opd2Info, - TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, - ArrayRef<const Value *> Args, - const Instruction *CxtI = nullptr) const { + InstructionCost getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, + const Instruction *CxtI = nullptr) const { // FIXME: A number of transformation tests seem to require these values // which seems a little odd for how arbitary there are. switch (Opcode) { @@ -445,15 +468,16 @@ public: return 1; } - unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index, - VectorType *SubTp) const { + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, + ArrayRef<int> Mask, int Index, + VectorType *SubTp) const { return 1; } - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, - TTI::CastContextHint CCH, - TTI::TargetCostKind CostKind, - const Instruction *I) const { + InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I) const { switch (Opcode) { default: break; @@ -488,12 +512,14 @@ public: return 1; } - unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, - VectorType *VecTy, unsigned Index) const { + InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, + VectorType *VecTy, + unsigned Index) const { return 1; } - unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const { + InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) const { // A phi would be free, unless we're costing the throughput because it // will require a register. if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) @@ -501,34 +527,36 @@ public: return 1; } - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) const { + InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const Instruction *I) const { return 1; } - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { return 1; } - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, TTI::TargetCostKind CostKind, - const Instruction *I) const { + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) const { return 1; } - unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind) const { + InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind) const { return 1; } - unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, - const Value *Ptr, bool VariableMask, - Align Alignment, TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) const { + InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + const Value *Ptr, bool VariableMask, + Align Alignment, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) const { return 1; } @@ -539,8 +567,8 @@ public: return 1; } - unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, - TTI::TargetCostKind CostKind) const { + InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) const { switch (ICA.getID()) { default: break; @@ -548,6 +576,7 @@ public: case Intrinsic::assume: case Intrinsic::sideeffect: case Intrinsic::pseudoprobe: + case Intrinsic::arithmetic_fence: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: @@ -579,25 +608,27 @@ public: return 1; } - unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys, - TTI::TargetCostKind CostKind) const { + InstructionCost getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type *> Tys, + TTI::TargetCostKind CostKind) const { return 1; } unsigned getNumberOfParts(Type *Tp) const { return 0; } - unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, - const SCEV *) const { + InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, + const SCEV *) const { return 0; } - unsigned getArithmeticReductionCost(unsigned, VectorType *, bool, - TTI::TargetCostKind) const { + InstructionCost getArithmeticReductionCost(unsigned, VectorType *, + Optional<FastMathFlags> FMF, + TTI::TargetCostKind) const { return 1; } - unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool, - TTI::TargetCostKind) const { + InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool, + TTI::TargetCostKind) const { return 1; } @@ -607,7 +638,7 @@ public: return 1; } - unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { + InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { return 0; } @@ -686,6 +717,13 @@ public: return true; } + bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, + ElementCount VF) const { + return true; + } + + bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; } + unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const { @@ -698,11 +736,6 @@ public: return VF; } - bool useReductionIntrinsic(unsigned Opcode, Type *Ty, - TTI::ReductionFlags Flags) const { - return false; - } - bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { return false; @@ -721,6 +754,13 @@ public: bool hasActiveVectorLength() const { return false; } + TargetTransformInfo::VPLegalization + getVPLegalizationStrategy(const VPIntrinsic &PI) const { + return TargetTransformInfo::VPLegalization( + /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, + /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); + } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. @@ -816,13 +856,13 @@ protected: public: using BaseT::getGEPCost; - int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { + InstructionCost + getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); - // TODO: will remove this when pointers have an opaque type. - assert(Ptr->getType()->getScalarType()->getPointerElementType() == - PointeeType && + assert(cast<PointerType>(Ptr->getType()->getScalarType()) + ->isOpaqueOrPointeeTypeMatches(PointeeType) && "explicit pointee type doesn't match operand's pointee type"); auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); bool HasBaseReg = (BaseGV == nullptr); @@ -880,8 +920,8 @@ public: return TTI::TCC_Basic; } - int getUserCost(const User *U, ArrayRef<const Value *> Operands, - TTI::TargetCostKind CostKind) { + InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands, + TTI::TargetCostKind CostKind) { auto *TargetTTI = static_cast<T *>(this); // Handle non-intrinsic calls, invokes, and callbr. // FIXME: Unlikely to be true for anything but CodeSize. @@ -914,7 +954,8 @@ public: case Instruction::Br: case Instruction::Ret: case Instruction::PHI: - return TargetTTI->getCFInstrCost(Opcode, CostKind); + case Instruction::Switch: + return TargetTTI->getCFInstrCost(Opcode, CostKind, I); case Instruction::ExtractValue: case Instruction::Freeze: return TTI::TCC_Free; @@ -987,6 +1028,23 @@ public: CostKind, I); } case Instruction::Select: { + const Value *Op0, *Op1; + if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) || + match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { + // select x, y, false --> x & y + // select x, true, y --> x | y + TTI::OperandValueProperties Op1VP = TTI::OP_None; + TTI::OperandValueProperties Op2VP = TTI::OP_None; + TTI::OperandValueKind Op1VK = TTI::getOperandInfo(Op0, Op1VP); + TTI::OperandValueKind Op2VK = TTI::getOperandInfo(Op1, Op2VP); + assert(Op0->getType()->getScalarSizeInBits() == 1 && + Op1->getType()->getScalarSizeInBits() == 1); + + SmallVector<const Value *, 2> Operands{Op0, Op1}; + return TargetTTI->getArithmeticInstrCost( + match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, + CostKind, Op1VK, Op2VK, Op1VP, Op2VP, Operands, I); + } Type *CondTy = U->getOperand(0)->getType(); return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, CmpInst::BAD_ICMP_PREDICATE, @@ -1020,25 +1078,30 @@ public: int SubIndex; if (Shuffle->isExtractSubvectorMask(SubIndex)) return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, - SubIndex, VecTy); + Shuffle->getShuffleMask(), SubIndex, + VecTy); else if (Shuffle->changesLength()) return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; else if (Shuffle->isIdentity()) return 0; else if (Shuffle->isReverse()) - return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 0, nullptr); + return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, + Shuffle->getShuffleMask(), 0, nullptr); else if (Shuffle->isSelect()) - return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 0, nullptr); + return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, + Shuffle->getShuffleMask(), 0, nullptr); else if (Shuffle->isTranspose()) - return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 0, nullptr); + return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, + Shuffle->getShuffleMask(), 0, nullptr); else if (Shuffle->isZeroEltSplat()) - return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 0, nullptr); + return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, + Shuffle->getShuffleMask(), 0, nullptr); else if (Shuffle->isSingleSource()) - return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 0, - nullptr); + return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, + Shuffle->getShuffleMask(), 0, nullptr); - return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 0, - nullptr); + return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, + Shuffle->getShuffleMask(), 0, nullptr); } case Instruction::ExtractElement: { unsigned Idx = -1; @@ -1050,26 +1113,6 @@ public: if (CI) Idx = CI->getZExtValue(); - // Try to match a reduction (a series of shufflevector and vector ops - // followed by an extractelement). - unsigned RdxOpcode; - VectorType *RdxType; - bool IsPairwise; - switch (TTI::matchVectorReduction(EEI, RdxOpcode, RdxType, IsPairwise)) { - case TTI::RK_Arithmetic: - return TargetTTI->getArithmeticReductionCost(RdxOpcode, RdxType, - IsPairwise, CostKind); - case TTI::RK_MinMax: - return TargetTTI->getMinMaxReductionCost( - RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)), - IsPairwise, /*IsUnsigned=*/false, CostKind); - case TTI::RK_UnsignedMinMax: - return TargetTTI->getMinMaxReductionCost( - RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)), - IsPairwise, /*IsUnsigned=*/true, CostKind); - case TTI::RK_None: - break; - } return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(), Idx); } @@ -1078,7 +1121,7 @@ public: return TTI::TCC_Basic; } - int getInstructionLatency(const Instruction *I) { + InstructionCost getInstructionLatency(const Instruction *I) { SmallVector<const Value *, 4> Operands(I->operand_values()); if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) return 0; diff --git a/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h b/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h index d02bcd0e335b..45ef4dbe2155 100644 --- a/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h +++ b/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h @@ -9,13 +9,13 @@ // ThinLTO. //===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H -#define LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H +#ifndef LLVM_ANALYSIS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H +#define LLVM_ANALYSIS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include <string> +#include <memory> #include <vector> namespace llvm { @@ -109,4 +109,4 @@ enum class InlinerFunctionImportStatsOpts { } // llvm -#endif // LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H +#endif // LLVM_ANALYSIS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H diff --git a/llvm/include/llvm/Analysis/Utils/Local.h b/llvm/include/llvm/Analysis/Utils/Local.h index bd82b34165d6..031938c6f9c7 100644 --- a/llvm/include/llvm/Analysis/Utils/Local.h +++ b/llvm/include/llvm/Analysis/Utils/Local.h @@ -100,4 +100,4 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, } -#endif // LLVM_TRANSFORMS_UTILS_LOCAL_H +#endif // LLVM_ANALYSIS_UTILS_LOCAL_H diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h index ea6bc2cf19ee..47ee23e06000 100644 --- a/llvm/include/llvm/Analysis/Utils/TFUtils.h +++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h @@ -12,6 +12,7 @@ #include "llvm/Config/llvm-config.h" #ifdef LLVM_HAVE_TF_API +#include "llvm/ADT/StringMap.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/JSON.h" @@ -120,56 +121,62 @@ loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, /// The assumption is that, for an event to be logged (i.e. a set of feature /// values and a reward), the user calls the log* API for each feature exactly /// once, providing the index matching the position in the feature spec list -/// provided at construction: +/// provided at construction. The example assumes the first feature's element +/// type is float, the second is int64, and the reward is float: +/// /// event 0: -/// logTensorValue(0, ...) -/// logTensorValue(1, ...) +/// logFloatValue(0, ...) +/// logInt64Value(1, ...) /// ... -/// logReward(...) +/// logFloatReward(...) /// event 1: -/// logTensorValue(0, ...) -/// logTensorValue(1, ...) +/// logFloatValue(0, ...) +/// logInt64Value(1, ...) /// ... -/// logReward(...) +/// logFloatReward(...) /// /// At the end, call print to generate the protobuf. +/// Alternatively, don't call logReward at the end of each event, just +/// log{Float|Int32|Int64}FinalReward at the end. +class LoggerDataImpl; class Logger final { public: - /// Construct a Logger. If IncludeReward is false, then logReward shouldn't - /// be called, and the reward feature won't be printed out. + /// Construct a Logger. If IncludeReward is false, then logReward or + /// logFinalReward shouldn't be called, and the reward feature won't be + /// printed out. Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs, - const TensorSpec &RewardSpec, bool IncludeReward) - : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec), - RawLogData(FeatureSpecs.size() + IncludeReward), - IncludeReward(IncludeReward) {} - - template <typename T> void logReward(T Value) { - assert(IncludeReward); - logTensorValue(RawLogData.size() - 1, &Value); - } + const TensorSpec &RewardSpec, bool IncludeReward); - template <typename T> void logFinalReward(T Value) { - assert(RawLogData.back().empty()); - logReward(Value); - } + ~Logger(); - template <typename T> - void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) { - const char *Start = reinterpret_cast<const char *>(Value); - const char *End = Start + sizeof(T) * Size; - RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End); - } + void logFloatReward(float Value); + void logInt32Reward(int32_t Value); + void logInt64Reward(int64_t Value); + + void logFloatFinalReward(float Value); + void logInt32FinalReward(int32_t Value); + void logInt64FinalReward(int64_t Value); + + void logFloatValue(size_t FeatureID, const float *Value); + void logInt32Value(size_t FeatureID, const int32_t *Value); + void logInt64Value(size_t FeatureID, const int64_t *Value); + + void logSpecifiedTensorValue(size_t FeatureID, const char *RawData); + + // Warning! For int32_t, the return is set up for int64_t, so the caller needs + // to piecemeal cast their int32_t values. + // FIXME: let's drop int32_t support. While it's supported by evaluator, it's + // not supported by the tensorflow::SequenceExample proto. For small values, + // we can consider using bytes. + char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID); void print(raw_ostream &OS); private: std::vector<LoggedFeatureSpec> FeatureSpecs; TensorSpec RewardSpec; - /// RawData has one entry per feature, plus one more for the reward. - /// Each feature's values are then stored in a vector, in succession. - /// This means the ith event is stored at [*][i] - std::vector<std::vector<char>> RawLogData; const bool IncludeReward; + std::unique_ptr<LoggerDataImpl> LoggerData; }; class TFModelEvaluator final { diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h index 108d08033ac3..1b32fca50697 100644 --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -17,13 +17,13 @@ // ValueLatticeElement //===----------------------------------------------------------------------===// +namespace llvm { + /// This class represents lattice values for constants. /// /// FIXME: This is basically just for bringup, this can be made a lot more rich /// in the future. /// - -namespace llvm { class ValueLatticeElement { enum ValueLatticeElementTy { /// This Value has no known value yet. As a result, this implies the @@ -474,11 +474,9 @@ public: const auto &CR = getConstantRange(); const auto &OtherCR = Other.getConstantRange(); - if (ConstantRange::makeSatisfyingICmpRegion(Pred, OtherCR).contains(CR)) + if (CR.icmp(Pred, OtherCR)) return ConstantInt::getTrue(Ty); - if (ConstantRange::makeSatisfyingICmpRegion( - CmpInst::getInversePredicate(Pred), OtherCR) - .contains(CR)) + if (CR.icmp(CmpInst::getInversePredicate(Pred), OtherCR)) return ConstantInt::getFalse(Ty); return nullptr; diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 86c0991451c5..90ec742f18e6 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -370,10 +370,11 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// that the returned value has pointer type if the specified value does. If /// the MaxLookup value is non-zero, it limits the number of instructions to /// be stripped off. - Value *getUnderlyingObject(Value *V, unsigned MaxLookup = 6); - inline const Value *getUnderlyingObject(const Value *V, - unsigned MaxLookup = 6) { - return getUnderlyingObject(const_cast<Value *>(V), MaxLookup); + const Value *getUnderlyingObject(const Value *V, unsigned MaxLookup = 6); + inline Value *getUnderlyingObject(Value *V, unsigned MaxLookup = 6) { + // Force const to avoid infinite recursion. + const Value *VConst = V; + return const_cast<Value *>(getUnderlyingObject(VConst, MaxLookup)); } /// This method is similar to getUnderlyingObject except that it can @@ -460,7 +461,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// for such instructions, moving them may change the resulting value. bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// Returns true if the result or effects of the given instructions \p I /// depend on or influence global memory. @@ -582,6 +584,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// poison. /// Formally, given I = `r = op v1 v2 .. vN`, propagatesPoison returns true /// if, for all i, r is evaluated to poison or op raises UB if vi = poison. + /// If vi is a vector or an aggregate and r is a single value, any poison + /// element in vi should make r poison or raise UB. /// To filter out operands that raise UB on poison, you can use /// getGuaranteedNonPoisonOp. bool propagatesPoison(const Operator *I); @@ -590,6 +594,11 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// if I is executed and that operand has a poison value. void getGuaranteedNonPoisonOps(const Instruction *I, SmallPtrSetImpl<const Value *> &Ops); + /// Insert operands of I into Ops such that I will trigger undefined behavior + /// if I is executed and that operand is not a well-defined value + /// (i.e. has undef bits or poison). + void getGuaranteedWellDefinedOps(const Instruction *I, + SmallPtrSetImpl<const Value *> &Ops); /// Return true if the given instruction must trigger undefined behavior /// when I is executed with any operands which appear in KnownPoison holding @@ -729,6 +738,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// For example, signed minimum is the inverse of signed maximum. SelectPatternFlavor getInverseMinMaxFlavor(SelectPatternFlavor SPF); + Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID); + /// Return the canonical inverse comparison predicate for the specified /// minimum/maximum flavor. CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF); @@ -741,6 +752,37 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; std::pair<Intrinsic::ID, bool> canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL); + /// Attempt to match a simple first order recurrence cycle of the form: + /// %iv = phi Ty [%Start, %Entry], [%Inc, %backedge] + /// %inc = binop %iv, %step + /// OR + /// %iv = phi Ty [%Start, %Entry], [%Inc, %backedge] + /// %inc = binop %step, %iv + /// + /// A first order recurrence is a formula with the form: X_n = f(X_(n-1)) + /// + /// A couple of notes on subtleties in that definition: + /// * The Step does not have to be loop invariant. In math terms, it can + /// be a free variable. We allow recurrences with both constant and + /// variable coefficients. Callers may wish to filter cases where Step + /// does not dominate P. + /// * For non-commutative operators, we will match both forms. This + /// results in some odd recurrence structures. Callers may wish to filter + /// out recurrences where the phi is not the LHS of the returned operator. + /// * Because of the structure matched, the caller can assume as a post + /// condition of the match the presence of a Loop with P's parent as it's + /// header *except* in unreachable code. (Dominance decays in unreachable + /// code.) + /// + /// NOTE: This is intentional simple. If you want the ability to analyze + /// non-trivial loop conditons, see ScalarEvolution instead. + bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, + Value *&Start, Value *&Step); + + /// Analogous to the above, but starting from the binary operator + bool matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P, + Value *&Start, Value *&Step); + /// Return true if RHS is known to be implied true by LHS. Return false if /// RHS is known to be implied false by LHS. Otherwise, return None if no /// implication can be made. diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index cfc3d6115866..8a1ebec4c727 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -17,402 +17,454 @@ #define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) VEC, #endif +#define FIXED(NL) ElementCount::getFixed(NL) +#define SCALABLE(NL) ElementCount::getScalable(NL) + #if !(defined(TLI_DEFINE_VECFUNC)) #define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF}, -#endif +#endif #if defined(TLI_DEFINE_ACCELERATE_VECFUNCS) // Accelerate framework's Vector Functions // Floating-Point Arithmetic and Auxiliary Functions -TLI_DEFINE_VECFUNC("ceilf", "vceilf", 4) -TLI_DEFINE_VECFUNC("fabsf", "vfabsf", 4) -TLI_DEFINE_VECFUNC("llvm.fabs.f32", "vfabsf", 4) -TLI_DEFINE_VECFUNC("floorf", "vfloorf", 4) -TLI_DEFINE_VECFUNC("sqrtf", "vsqrtf", 4) -TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "vsqrtf", 4) +TLI_DEFINE_VECFUNC("ceilf", "vceilf", FIXED(4)) +TLI_DEFINE_VECFUNC("fabsf", "vfabsf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.fabs.f32", "vfabsf", FIXED(4)) +TLI_DEFINE_VECFUNC("floorf", "vfloorf", FIXED(4)) +TLI_DEFINE_VECFUNC("sqrtf", "vsqrtf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "vsqrtf", FIXED(4)) // Exponential and Logarithmic Functions -TLI_DEFINE_VECFUNC("expf", "vexpf", 4) -TLI_DEFINE_VECFUNC("llvm.exp.f32", "vexpf", 4) -TLI_DEFINE_VECFUNC("expm1f", "vexpm1f", 4) -TLI_DEFINE_VECFUNC("logf", "vlogf", 4) -TLI_DEFINE_VECFUNC("llvm.log.f32", "vlogf", 4) -TLI_DEFINE_VECFUNC("log1pf", "vlog1pf", 4) -TLI_DEFINE_VECFUNC("log10f", "vlog10f", 4) -TLI_DEFINE_VECFUNC("llvm.log10.f32", "vlog10f", 4) -TLI_DEFINE_VECFUNC("logbf", "vlogbf", 4) +TLI_DEFINE_VECFUNC("expf", "vexpf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "vexpf", FIXED(4)) +TLI_DEFINE_VECFUNC("expm1f", "vexpm1f", FIXED(4)) +TLI_DEFINE_VECFUNC("logf", "vlogf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log.f32", "vlogf", FIXED(4)) +TLI_DEFINE_VECFUNC("log1pf", "vlog1pf", FIXED(4)) +TLI_DEFINE_VECFUNC("log10f", "vlog10f", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "vlog10f", FIXED(4)) +TLI_DEFINE_VECFUNC("logbf", "vlogbf", FIXED(4)) // Trigonometric Functions -TLI_DEFINE_VECFUNC("sinf", "vsinf", 4) -TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", 4) -TLI_DEFINE_VECFUNC("cosf", "vcosf", 4) -TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", 4) -TLI_DEFINE_VECFUNC("tanf", "vtanf", 4) -TLI_DEFINE_VECFUNC("asinf", "vasinf", 4) -TLI_DEFINE_VECFUNC("acosf", "vacosf", 4) -TLI_DEFINE_VECFUNC("atanf", "vatanf", 4) +TLI_DEFINE_VECFUNC("sinf", "vsinf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", FIXED(4)) +TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4)) +TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4)) +TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4)) +TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4)) +TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4)) // Hyperbolic Functions -TLI_DEFINE_VECFUNC("sinhf", "vsinhf", 4) -TLI_DEFINE_VECFUNC("coshf", "vcoshf", 4) -TLI_DEFINE_VECFUNC("tanhf", "vtanhf", 4) -TLI_DEFINE_VECFUNC("asinhf", "vasinhf", 4) -TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4) -TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4) +TLI_DEFINE_VECFUNC("sinhf", "vsinhf", FIXED(4)) +TLI_DEFINE_VECFUNC("coshf", "vcoshf", FIXED(4)) +TLI_DEFINE_VECFUNC("tanhf", "vtanhf", FIXED(4)) +TLI_DEFINE_VECFUNC("asinhf", "vasinhf", FIXED(4)) +TLI_DEFINE_VECFUNC("acoshf", "vacoshf", FIXED(4)) +TLI_DEFINE_VECFUNC("atanhf", "vatanhf", FIXED(4)) + +#elif defined(TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS) +// Darwin libsystem_m vector functions. + +// Exponential and Logarithmic Functions +TLI_DEFINE_VECFUNC("exp", "_simd_exp_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_simd_exp_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("expf", "_simd_exp_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_simd_exp_f4", FIXED(4)) +// Trigonometric Functions +TLI_DEFINE_VECFUNC("acos", "_simd_acos_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("acosf", "_simd_acos_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("asin", "_simd_asin_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("asinf", "_simd_asin_f4", FIXED(4)) + +TLI_DEFINE_VECFUNC("atan", "_simd_atan_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4)) + +TLI_DEFINE_VECFUNC("cos", "_simd_cos_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_simd_cos_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("cosf", "_simd_cos_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_simd_cos_f4", FIXED(4)) + +TLI_DEFINE_VECFUNC("sin", "_simd_sin_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_simd_sin_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("sinf", "_simd_sin_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_simd_sin_f4", FIXED(4)) + +// Floating-Point Arithmetic and Auxiliary Functions +TLI_DEFINE_VECFUNC("cbrt", "_simd_cbrt_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("cbrtf", "_simd_cbrt_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("erf", "_simd_erf_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("erff", "_simd_erf_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("pow", "_simd_pow_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_simd_pow_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("powf", "_simd_pow_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_simd_pow_f4", FIXED(4)) + +// Hyperbolic Functions +TLI_DEFINE_VECFUNC("sinh", "_simd_sinh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("sinhf", "_simd_sinh_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("cosh", "_simd_cosh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("coshf", "_simd_cosh_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("tanh", "_simd_tanh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("tanhf", "_simd_tanh_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("asinh", "_simd_asinh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("asinhf", "_simd_asinh_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("acosh", "_simd_acosh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("acoshf", "_simd_acosh_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("atanh", "_simd_atanh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("atanhf", "_simd_atanh_f4", FIXED(4)) #elif defined(TLI_DEFINE_LIBMVEC_X86_VECFUNCS) // GLIBC Vector math Functions -TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", 2) -TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", 4) +TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", FIXED(2)) +TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", FIXED(4)) -TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", 4) -TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", 8) +TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", FIXED(4)) +TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", 2) -TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", 4) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", FIXED(4)) -TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", 4) -TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", 8) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", FIXED(8)) -TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", 2) -TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", 4) +TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", FIXED(2)) +TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", FIXED(4)) -TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", 4) -TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", 8) +TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", FIXED(4)) +TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", 2) -TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", 4) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", FIXED(4)) -TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", 4) -TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", 8) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8)) -TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", 2) -TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", 4) +TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2)) +TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4)) -TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", 4) -TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", 8) +TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", FIXED(4)) +TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", FIXED(8)) -TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", 2) -TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", 4) +TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", FIXED(2)) +TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", FIXED(4)) -TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", 4) -TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", 8) +TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", FIXED(4)) +TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", 2) -TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", 4) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", FIXED(4)) -TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", 4) -TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", 8) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", FIXED(8)) -TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", 2) -TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", 4) +TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", FIXED(2)) +TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", FIXED(4)) -TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", 4) -TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", 8) +TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", FIXED(4)) +TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", FIXED(8)) -TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", 2) -TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", 4) +TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", FIXED(2)) +TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", FIXED(4)) -TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", 4) -TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", 8) +TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", FIXED(4)) +TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", 2) -TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", 4) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", FIXED(4)) -TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", 4) -TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", 8) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", FIXED(8)) -TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", 2) -TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", 4) +TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", FIXED(2)) +TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", FIXED(4)) -TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", 4) -TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", 8) +TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", FIXED(4)) +TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", FIXED(8)) -TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", 2) -TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", 4) +TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", FIXED(2)) +TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", FIXED(4)) -TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", 4) -TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", 8) +TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", FIXED(4)) +TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", 2) -TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", 4) +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", FIXED(4)) -TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", 4) -TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", 8) +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", FIXED(8)) #elif defined(TLI_DEFINE_MASSV_VECFUNCS) // IBM MASS library's vector Functions // Floating-Point Arithmetic and Auxiliary Functions -TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2_massv", 2) -TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4_massv", 4) -TLI_DEFINE_VECFUNC("pow", "__powd2_massv", 2) -TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2_massv", 2) -TLI_DEFINE_VECFUNC("powf", "__powf4_massv", 4) -TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4_massv", 4) -TLI_DEFINE_VECFUNC("sqrt", "__sqrtd2_massv", 2) -TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__sqrtd2_massv", 2) -TLI_DEFINE_VECFUNC("sqrtf", "__sqrtf4_massv", 4) -TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__sqrtf4_massv", 4) +TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2", FIXED(2)) +TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4", FIXED(4)) +TLI_DEFINE_VECFUNC("pow", "__powd2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2", FIXED(2)) +TLI_DEFINE_VECFUNC("powf", "__powf4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4", FIXED(4)) // Exponential and Logarithmic Functions -TLI_DEFINE_VECFUNC("exp", "__expd2_massv", 2) -TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2_massv", 2) -TLI_DEFINE_VECFUNC("expf", "__expf4_massv", 4) -TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4_massv", 4) -TLI_DEFINE_VECFUNC("exp2", "__exp2d2_massv", 2) -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2_massv", 2) -TLI_DEFINE_VECFUNC("exp2f", "__exp2f4_massv", 4) -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4_massv", 4) -TLI_DEFINE_VECFUNC("expm1", "__expm1d2_massv", 2) -TLI_DEFINE_VECFUNC("expm1f", "__expm1f4_massv", 4) -TLI_DEFINE_VECFUNC("log", "__logd2_massv", 2) -TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2_massv", 2) -TLI_DEFINE_VECFUNC("logf", "__logf4_massv", 4) -TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4_massv", 4) -TLI_DEFINE_VECFUNC("log1p", "__log1pd2_massv", 2) -TLI_DEFINE_VECFUNC("log1pf", "__log1pf4_massv", 4) -TLI_DEFINE_VECFUNC("log10", "__log10d2_massv", 2) -TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2_massv", 2) -TLI_DEFINE_VECFUNC("log10f", "__log10f4_massv", 4) -TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4_massv", 4) -TLI_DEFINE_VECFUNC("log2", "__log2d2_massv", 2) -TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2_massv", 2) -TLI_DEFINE_VECFUNC("log2f", "__log2f4_massv", 4) -TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4_massv", 4) +TLI_DEFINE_VECFUNC("exp", "__expd2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2", FIXED(2)) +TLI_DEFINE_VECFUNC("expf", "__expf4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4", FIXED(4)) +TLI_DEFINE_VECFUNC("exp2", "__exp2d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2", FIXED(2)) +TLI_DEFINE_VECFUNC("exp2f", "__exp2f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4", FIXED(4)) +TLI_DEFINE_VECFUNC("expm1", "__expm1d2", FIXED(2)) +TLI_DEFINE_VECFUNC("expm1f", "__expm1f4", FIXED(4)) +TLI_DEFINE_VECFUNC("log", "__logd2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2", FIXED(2)) +TLI_DEFINE_VECFUNC("logf", "__logf4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4", FIXED(4)) +TLI_DEFINE_VECFUNC("log1p", "__log1pd2", FIXED(2)) +TLI_DEFINE_VECFUNC("log1pf", "__log1pf4", FIXED(4)) +TLI_DEFINE_VECFUNC("log10", "__log10d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2", FIXED(2)) +TLI_DEFINE_VECFUNC("log10f", "__log10f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4", FIXED(4)) +TLI_DEFINE_VECFUNC("log2", "__log2d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2", FIXED(2)) +TLI_DEFINE_VECFUNC("log2f", "__log2f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4", FIXED(4)) // Trigonometric Functions -TLI_DEFINE_VECFUNC("sin", "__sind2_massv", 2) -TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2_massv", 2) -TLI_DEFINE_VECFUNC("sinf", "__sinf4_massv", 4) -TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4_massv", 4) -TLI_DEFINE_VECFUNC("cos", "__cosd2_massv", 2) -TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2_massv", 2) -TLI_DEFINE_VECFUNC("cosf", "__cosf4_massv", 4) -TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4_massv", 4) -TLI_DEFINE_VECFUNC("tan", "__tand2_massv", 2) -TLI_DEFINE_VECFUNC("tanf", "__tanf4_massv", 4) -TLI_DEFINE_VECFUNC("asin", "__asind2_massv", 2) -TLI_DEFINE_VECFUNC("asinf", "__asinf4_massv", 4) -TLI_DEFINE_VECFUNC("acos", "__acosd2_massv", 2) -TLI_DEFINE_VECFUNC("acosf", "__acosf4_massv", 4) -TLI_DEFINE_VECFUNC("atan", "__atand2_massv", 2) -TLI_DEFINE_VECFUNC("atanf", "__atanf4_massv", 4) -TLI_DEFINE_VECFUNC("atan2", "__atan2d2_massv", 2) -TLI_DEFINE_VECFUNC("atan2f", "__atan2f4_massv", 4) +TLI_DEFINE_VECFUNC("sin", "__sind2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2", FIXED(2)) +TLI_DEFINE_VECFUNC("sinf", "__sinf4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4", FIXED(4)) +TLI_DEFINE_VECFUNC("cos", "__cosd2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2", FIXED(2)) +TLI_DEFINE_VECFUNC("cosf", "__cosf4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4", FIXED(4)) +TLI_DEFINE_VECFUNC("tan", "__tand2", FIXED(2)) +TLI_DEFINE_VECFUNC("tanf", "__tanf4", FIXED(4)) +TLI_DEFINE_VECFUNC("asin", "__asind2", FIXED(2)) +TLI_DEFINE_VECFUNC("asinf", "__asinf4", FIXED(4)) +TLI_DEFINE_VECFUNC("acos", "__acosd2", FIXED(2)) +TLI_DEFINE_VECFUNC("acosf", "__acosf4", FIXED(4)) +TLI_DEFINE_VECFUNC("atan", "__atand2", FIXED(2)) +TLI_DEFINE_VECFUNC("atanf", "__atanf4", FIXED(4)) +TLI_DEFINE_VECFUNC("atan2", "__atan2d2", FIXED(2)) +TLI_DEFINE_VECFUNC("atan2f", "__atan2f4", FIXED(4)) // Hyperbolic Functions -TLI_DEFINE_VECFUNC("sinh", "__sinhd2_massv", 2) -TLI_DEFINE_VECFUNC("sinhf", "__sinhf4_massv", 4) -TLI_DEFINE_VECFUNC("cosh", "__coshd2_massv", 2) -TLI_DEFINE_VECFUNC("coshf", "__coshf4_massv", 4) -TLI_DEFINE_VECFUNC("tanh", "__tanhd2_massv", 2) -TLI_DEFINE_VECFUNC("tanhf", "__tanhf4_massv", 4) -TLI_DEFINE_VECFUNC("asinh", "__asinhd2_massv", 2) -TLI_DEFINE_VECFUNC("asinhf", "__asinhf4_massv", 4) -TLI_DEFINE_VECFUNC("acosh", "__acoshd2_massv", 2) -TLI_DEFINE_VECFUNC("acoshf", "__acoshf4_massv", 4) -TLI_DEFINE_VECFUNC("atanh", "__atanhd2_massv", 2) -TLI_DEFINE_VECFUNC("atanhf", "__atanhf4_massv", 4) +TLI_DEFINE_VECFUNC("sinh", "__sinhd2", FIXED(2)) +TLI_DEFINE_VECFUNC("sinhf", "__sinhf4", FIXED(4)) +TLI_DEFINE_VECFUNC("cosh", "__coshd2", FIXED(2)) +TLI_DEFINE_VECFUNC("coshf", "__coshf4", FIXED(4)) +TLI_DEFINE_VECFUNC("tanh", "__tanhd2", FIXED(2)) +TLI_DEFINE_VECFUNC("tanhf", "__tanhf4", FIXED(4)) +TLI_DEFINE_VECFUNC("asinh", "__asinhd2", FIXED(2)) +TLI_DEFINE_VECFUNC("asinhf", "__asinhf4", FIXED(4)) +TLI_DEFINE_VECFUNC("acosh", "__acoshd2", FIXED(2)) +TLI_DEFINE_VECFUNC("acoshf", "__acoshf4", FIXED(4)) +TLI_DEFINE_VECFUNC("atanh", "__atanhd2", FIXED(2)) +TLI_DEFINE_VECFUNC("atanhf", "__atanhf4", FIXED(4)) #elif defined(TLI_DEFINE_SVML_VECFUNCS) // Intel SVM library's Vector Functions -TLI_DEFINE_VECFUNC("sin", "__svml_sin2", 2) -TLI_DEFINE_VECFUNC("sin", "__svml_sin4", 4) -TLI_DEFINE_VECFUNC("sin", "__svml_sin8", 8) +TLI_DEFINE_VECFUNC("sin", "__svml_sin2", FIXED(2)) +TLI_DEFINE_VECFUNC("sin", "__svml_sin4", FIXED(4)) +TLI_DEFINE_VECFUNC("sin", "__svml_sin8", FIXED(8)) -TLI_DEFINE_VECFUNC("sinf", "__svml_sinf4", 4) -TLI_DEFINE_VECFUNC("sinf", "__svml_sinf8", 8) -TLI_DEFINE_VECFUNC("sinf", "__svml_sinf16", 16) +TLI_DEFINE_VECFUNC("sinf", "__svml_sinf4", FIXED(4)) +TLI_DEFINE_VECFUNC("sinf", "__svml_sinf8", FIXED(8)) +TLI_DEFINE_VECFUNC("sinf", "__svml_sinf16", FIXED(16)) -TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin2", 2) -TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin4", 4) -TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin8", 8) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin8", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf4", 4) -TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf8", 8) -TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf16", 16) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf8", FIXED(8)) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf16", FIXED(16)) -TLI_DEFINE_VECFUNC("cos", "__svml_cos2", 2) -TLI_DEFINE_VECFUNC("cos", "__svml_cos4", 4) -TLI_DEFINE_VECFUNC("cos", "__svml_cos8", 8) +TLI_DEFINE_VECFUNC("cos", "__svml_cos2", FIXED(2)) +TLI_DEFINE_VECFUNC("cos", "__svml_cos4", FIXED(4)) +TLI_DEFINE_VECFUNC("cos", "__svml_cos8", FIXED(8)) -TLI_DEFINE_VECFUNC("cosf", "__svml_cosf4", 4) -TLI_DEFINE_VECFUNC("cosf", "__svml_cosf8", 8) -TLI_DEFINE_VECFUNC("cosf", "__svml_cosf16", 16) +TLI_DEFINE_VECFUNC("cosf", "__svml_cosf4", FIXED(4)) +TLI_DEFINE_VECFUNC("cosf", "__svml_cosf8", FIXED(8)) +TLI_DEFINE_VECFUNC("cosf", "__svml_cosf16", FIXED(16)) -TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos2", 2) -TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos4", 4) -TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos8", 8) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos8", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", 4) -TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", 8) -TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", 16) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", FIXED(8)) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", FIXED(16)) -TLI_DEFINE_VECFUNC("pow", "__svml_pow2", 2) -TLI_DEFINE_VECFUNC("pow", "__svml_pow4", 4) -TLI_DEFINE_VECFUNC("pow", "__svml_pow8", 8) +TLI_DEFINE_VECFUNC("pow", "__svml_pow2", FIXED(2)) +TLI_DEFINE_VECFUNC("pow", "__svml_pow4", FIXED(4)) +TLI_DEFINE_VECFUNC("pow", "__svml_pow8", FIXED(8)) -TLI_DEFINE_VECFUNC("powf", "__svml_powf4", 4) -TLI_DEFINE_VECFUNC("powf", "__svml_powf8", 8) -TLI_DEFINE_VECFUNC("powf", "__svml_powf16", 16) +TLI_DEFINE_VECFUNC("powf", "__svml_powf4", FIXED(4)) +TLI_DEFINE_VECFUNC("powf", "__svml_powf8", FIXED(8)) +TLI_DEFINE_VECFUNC("powf", "__svml_powf16", FIXED(16)) -TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow2", 2) -TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow4", 4) -TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow8", 8) +TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow2", FIXED(2)) +TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow4", FIXED(4)) +TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow8", FIXED(8)) -TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf4", 4) -TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf8", 8) -TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf16", 16) +TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf4", FIXED(4)) +TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf8", FIXED(8)) +TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf16", FIXED(16)) -TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow2", 2) -TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow4", 4) -TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow8", 8) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow8", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf4", 4) -TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf8", 8) -TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf16", 16) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf8", FIXED(8)) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf16", FIXED(16)) -TLI_DEFINE_VECFUNC("exp", "__svml_exp2", 2) -TLI_DEFINE_VECFUNC("exp", "__svml_exp4", 4) -TLI_DEFINE_VECFUNC("exp", "__svml_exp8", 8) +TLI_DEFINE_VECFUNC("exp", "__svml_exp2", FIXED(2)) +TLI_DEFINE_VECFUNC("exp", "__svml_exp4", FIXED(4)) +TLI_DEFINE_VECFUNC("exp", "__svml_exp8", FIXED(8)) -TLI_DEFINE_VECFUNC("expf", "__svml_expf4", 4) -TLI_DEFINE_VECFUNC("expf", "__svml_expf8", 8) -TLI_DEFINE_VECFUNC("expf", "__svml_expf16", 16) +TLI_DEFINE_VECFUNC("expf", "__svml_expf4", FIXED(4)) +TLI_DEFINE_VECFUNC("expf", "__svml_expf8", FIXED(8)) +TLI_DEFINE_VECFUNC("expf", "__svml_expf16", FIXED(16)) -TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp2", 2) -TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp4", 4) -TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp8", 8) +TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp2", FIXED(2)) +TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp4", FIXED(4)) +TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp8", FIXED(8)) -TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf4", 4) -TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf8", 8) -TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf16", 16) +TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf4", FIXED(4)) +TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf8", FIXED(8)) +TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf16", FIXED(16)) -TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp2", 2) -TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp4", 4) -TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp8", 8) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp8", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf4", 4) -TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf8", 8) -TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf16", 16) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf8", FIXED(8)) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf16", FIXED(16)) -TLI_DEFINE_VECFUNC("log", "__svml_log2", 2) -TLI_DEFINE_VECFUNC("log", "__svml_log4", 4) -TLI_DEFINE_VECFUNC("log", "__svml_log8", 8) +TLI_DEFINE_VECFUNC("log", "__svml_log2", FIXED(2)) +TLI_DEFINE_VECFUNC("log", "__svml_log4", FIXED(4)) +TLI_DEFINE_VECFUNC("log", "__svml_log8", FIXED(8)) -TLI_DEFINE_VECFUNC("logf", "__svml_logf4", 4) -TLI_DEFINE_VECFUNC("logf", "__svml_logf8", 8) -TLI_DEFINE_VECFUNC("logf", "__svml_logf16", 16) +TLI_DEFINE_VECFUNC("logf", "__svml_logf4", FIXED(4)) +TLI_DEFINE_VECFUNC("logf", "__svml_logf8", FIXED(8)) +TLI_DEFINE_VECFUNC("logf", "__svml_logf16", FIXED(16)) -TLI_DEFINE_VECFUNC("__log_finite", "__svml_log2", 2) -TLI_DEFINE_VECFUNC("__log_finite", "__svml_log4", 4) -TLI_DEFINE_VECFUNC("__log_finite", "__svml_log8", 8) +TLI_DEFINE_VECFUNC("__log_finite", "__svml_log2", FIXED(2)) +TLI_DEFINE_VECFUNC("__log_finite", "__svml_log4", FIXED(4)) +TLI_DEFINE_VECFUNC("__log_finite", "__svml_log8", FIXED(8)) -TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf4", 4) -TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf8", 8) -TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf16", 16) +TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf4", FIXED(4)) +TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf8", FIXED(8)) +TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf16", FIXED(16)) -TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log2", 2) -TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log4", 4) -TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log8", 8) +TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log8", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4) -TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8) -TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16) +TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", FIXED(8)) +TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", FIXED(16)) -TLI_DEFINE_VECFUNC("log2", "__svml_log22", 2) -TLI_DEFINE_VECFUNC("log2", "__svml_log24", 4) -TLI_DEFINE_VECFUNC("log2", "__svml_log28", 8) +TLI_DEFINE_VECFUNC("log2", "__svml_log22", FIXED(2)) +TLI_DEFINE_VECFUNC("log2", "__svml_log24", FIXED(4)) +TLI_DEFINE_VECFUNC("log2", "__svml_log28", FIXED(8)) -TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", 4) -TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", 8) -TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", 16) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", FIXED(4)) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", FIXED(8)) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", FIXED(16)) -TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", 2) -TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", 4) -TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", 8) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", FIXED(2)) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", FIXED(4)) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", FIXED(8)) -TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", 4) -TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", 8) -TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", 16) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", FIXED(4)) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", FIXED(8)) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", FIXED(16)) -TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", 2) -TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", 4) -TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", 8) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4) -TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8) -TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", FIXED(8)) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", FIXED(16)) -TLI_DEFINE_VECFUNC("log10", "__svml_log102", 2) -TLI_DEFINE_VECFUNC("log10", "__svml_log104", 4) -TLI_DEFINE_VECFUNC("log10", "__svml_log108", 8) +TLI_DEFINE_VECFUNC("log10", "__svml_log102", FIXED(2)) +TLI_DEFINE_VECFUNC("log10", "__svml_log104", FIXED(4)) +TLI_DEFINE_VECFUNC("log10", "__svml_log108", FIXED(8)) -TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", 4) -TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", 8) -TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", 16) +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", FIXED(4)) +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", FIXED(8)) +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", FIXED(16)) -TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", 2) -TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", 4) -TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", 8) +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", FIXED(2)) +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", FIXED(4)) +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", FIXED(8)) -TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", 4) -TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", 8) -TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", 16) +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", FIXED(4)) +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", FIXED(8)) +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", FIXED(16)) -TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", 2) -TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", 4) -TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", 8) +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", 4) -TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", 8) -TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", 16) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", FIXED(8)) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", FIXED(16)) -TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", 2) -TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", 4) -TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", 8) +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", FIXED(2)) +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", FIXED(4)) +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", FIXED(8)) -TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", 4) -TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", 8) -TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", 16) +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", FIXED(4)) +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", FIXED(8)) +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", FIXED(16)) -TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", 2) -TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", 4) -TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", 8) +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", FIXED(2)) +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", FIXED(4)) +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", FIXED(8)) -TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4) -TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8) -TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16) +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", FIXED(4)) +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", FIXED(8)) +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", FIXED(16)) -TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2) -TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4) -TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8) +TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", FIXED(2)) +TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", FIXED(4)) +TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", FIXED(8)) -TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f4", 4) -TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f8", 8) -TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f16", 16) +TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f4", FIXED(4)) +TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f8", FIXED(8)) +TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f16", FIXED(16)) -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp22", 2) -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp24", 4) -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp28", 8) +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp22", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp24", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp28", FIXED(8)) -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f4", 4) -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f8", 8) -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f16", 16) +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f8", FIXED(8)) +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f16", FIXED(16)) -TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp22", 2) -TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp24", 4) -TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp28", 8) +TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp22", FIXED(2)) +TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp24", FIXED(4)) +TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp28", FIXED(8)) -TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f4", 4) -TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f8", 8) -TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16) +TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f4", FIXED(4)) +TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f8", FIXED(8)) +TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16)) #else #error "Must choose which vector library functions are to be defined." @@ -420,6 +472,7 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16) #undef TLI_DEFINE_VECFUNC #undef TLI_DEFINE_ACCELERATE_VECFUNCS +#undef TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS #undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS #undef TLI_DEFINE_SVML_VECFUNCS diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 26cb0e456ed4..c890216c9e01 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -31,7 +31,7 @@ enum class VFParamKind { OMP_LinearPos, // declare simd linear(i:c) uniform(c) OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c) OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c) - OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c + OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c) OMP_Uniform, // declare simd uniform(i) GlobalPredicate, // Global logical predicate that acts on all lanes // of the input and output mask concurrently. For @@ -80,13 +80,11 @@ struct VFParameter { /// represent vector functions. in particular, it is not attached to /// any target-specific ABI. struct VFShape { - unsigned VF; // Vectorization factor. - bool IsScalable; // True if the function is a scalable function. + ElementCount VF; // Vectorization factor. SmallVector<VFParameter, 8> Parameters; // List of parameter information. // Comparison operator. bool operator==(const VFShape &Other) const { - return std::tie(VF, IsScalable, Parameters) == - std::tie(Other.VF, Other.IsScalable, Other.Parameters); + return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters); } /// Update the parameter in position P.ParamPos to P. @@ -115,7 +113,7 @@ struct VFShape { Parameters.push_back( VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate})); - return {EC.getKnownMinValue(), EC.isScalable(), Parameters}; + return {EC, Parameters}; } /// Sanity check on the Parameters in the VFShape. bool hasValidParameterList() const; @@ -127,12 +125,6 @@ struct VFInfo { std::string ScalarName; /// Scalar Function Name. std::string VectorName; /// Vector Function Name associated to this VFInfo. VFISAKind ISA; /// Instruction Set Architecture. - - // Comparison operator. - bool operator==(const VFInfo &Other) const { - return std::tie(Shape, ScalarName, VectorName, ISA) == - std::tie(Shape, Other.ScalarName, Other.VectorName, Other.ISA); - } }; namespace VFABI { @@ -186,12 +178,13 @@ Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName, const Module &M); /// <isa> = "_LLVM_" /// <mask> = "N". Note: TLI does not support masked interfaces. /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor` -/// field of the `VecDesc` struct. +/// field of the `VecDesc` struct. If the number of lanes is scalable +/// then 'x' is printed instead. /// <vparams> = "v", as many as are the numArgs. /// <scalarname> = the name of the scalar function. /// <vectorname> = the name of the vector function. std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, - unsigned numArgs, unsigned VF); + unsigned numArgs, ElementCount VF); /// Retrieve the `VFParamKind` from a string token. VFParamKind getVFParamKindFromString(const StringRef Token); @@ -322,6 +315,11 @@ bool isTriviallyVectorizable(Intrinsic::ID ID); /// Identifies if the vector form of the intrinsic has a scalar operand. bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx); +/// Identifies if the vector form of the intrinsic has a scalar operand that has +/// an overloaded type. +bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, + unsigned ScalarOpdIdx); + /// Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns /// its intrinsic ID, in case it does not found it return not_intrinsic. @@ -601,10 +599,6 @@ public: bool isReverse() const { return Reverse; } uint32_t getFactor() const { return Factor; } - LLVM_ATTRIBUTE_DEPRECATED(uint32_t getAlignment() const, - "Use getAlign instead.") { - return Alignment.value(); - } Align getAlign() const { return Alignment; } uint32_t getNumMembers() const { return Members.size(); } |