aboutsummaryrefslogtreecommitdiff
path: root/llvm/include/llvm/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/include/llvm/Analysis')
-rw-r--r--llvm/include/llvm/Analysis/AliasAnalysis.h138
-rw-r--r--llvm/include/llvm/Analysis/AliasSetTracker.h23
-rw-r--r--llvm/include/llvm/Analysis/AssumeBundleQueries.h30
-rw-r--r--llvm/include/llvm/Analysis/AssumptionCache.h10
-rw-r--r--llvm/include/llvm/Analysis/BasicAliasAnalysis.h59
-rw-r--r--llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h344
-rw-r--r--llvm/include/llvm/Analysis/BranchProbabilityInfo.h6
-rw-r--r--llvm/include/llvm/Analysis/CFG.h19
-rw-r--r--llvm/include/llvm/Analysis/CFGPrinter.h14
-rw-r--r--llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h4
-rw-r--r--llvm/include/llvm/Analysis/CGSCCPassManager.h15
-rw-r--r--llvm/include/llvm/Analysis/ConstantFolding.h4
-rw-r--r--llvm/include/llvm/Analysis/ConstraintSystem.h6
-rw-r--r--llvm/include/llvm/Analysis/DDG.h8
-rw-r--r--llvm/include/llvm/Analysis/DOTGraphTraitsPass.h4
-rw-r--r--llvm/include/llvm/Analysis/DemandedBits.h9
-rw-r--r--llvm/include/llvm/Analysis/DependenceGraphBuilder.h6
-rw-r--r--llvm/include/llvm/Analysis/DivergenceAnalysis.h86
-rw-r--r--llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h6
-rw-r--r--llvm/include/llvm/Analysis/IRSimilarityIdentifier.h6
-rw-r--r--llvm/include/llvm/Analysis/IVDescriptors.h80
-rw-r--r--llvm/include/llvm/Analysis/InlineAdvisor.h14
-rw-r--r--llvm/include/llvm/Analysis/InlineCost.h48
-rw-r--r--llvm/include/llvm/Analysis/InlineModelFeatureMaps.h79
-rw-r--r--llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h5
-rw-r--r--llvm/include/llvm/Analysis/InstructionSimplify.h65
-rw-r--r--llvm/include/llvm/Analysis/IteratedDominanceFrontier.h4
-rw-r--r--llvm/include/llvm/Analysis/LazyCallGraph.h7
-rw-r--r--llvm/include/llvm/Analysis/LazyValueInfo.h10
-rw-r--r--llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h10
-rw-r--r--llvm/include/llvm/Analysis/Loads.h30
-rw-r--r--llvm/include/llvm/Analysis/LoopAccessAnalysis.h28
-rw-r--r--llvm/include/llvm/Analysis/LoopAnalysisManager.h2
-rw-r--r--llvm/include/llvm/Analysis/LoopCacheAnalysis.h2
-rw-r--r--llvm/include/llvm/Analysis/LoopInfo.h41
-rw-r--r--llvm/include/llvm/Analysis/LoopInfoImpl.h19
-rw-r--r--llvm/include/llvm/Analysis/LoopNestAnalysis.h14
-rw-r--r--llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h9
-rw-r--r--llvm/include/llvm/Analysis/MemoryBuiltins.h4
-rw-r--r--llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h45
-rw-r--r--llvm/include/llvm/Analysis/MemorySSA.h21
-rw-r--r--llvm/include/llvm/Analysis/MemorySSAUpdater.h5
-rw-r--r--llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h4
-rw-r--r--llvm/include/llvm/Analysis/ObjCARCUtil.h59
-rw-r--r--llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h18
-rw-r--r--llvm/include/llvm/Analysis/OverflowInstAnalysis.h45
-rw-r--r--llvm/include/llvm/Analysis/ProfileSummaryInfo.h9
-rw-r--r--llvm/include/llvm/Analysis/RegionIterator.h25
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h132
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h3
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h19
-rw-r--r--llvm/include/llvm/Analysis/SparsePropagation.h3
-rw-r--r--llvm/include/llvm/Analysis/SyncDependenceAnalysis.h6
-rw-r--r--llvm/include/llvm/Analysis/SyntheticCountsUtils.h4
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.def6
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.h62
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h764
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h279
-rw-r--r--llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h8
-rw-r--r--llvm/include/llvm/Analysis/Utils/Local.h2
-rw-r--r--llvm/include/llvm/Analysis/Utils/TFUtils.h71
-rw-r--r--llvm/include/llvm/Analysis/ValueLattice.h10
-rw-r--r--llvm/include/llvm/Analysis/ValueTracking.h52
-rw-r--r--llvm/include/llvm/Analysis/VecFuncs.def633
-rw-r--r--llvm/include/llvm/Analysis/VectorUtils.h30
65 files changed, 2316 insertions, 1267 deletions
diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 9f7461243f35..7fec0feb09d5 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -78,21 +78,64 @@ class Value;
///
/// See docs/AliasAnalysis.html for more information on the specific meanings
/// of these values.
-enum AliasResult : uint8_t {
- /// The two locations do not alias at all.
- ///
- /// This value is arranged to convert to false, while all other values
- /// convert to true. This allows a boolean context to convert the result to
- /// a binary flag indicating whether there is the possibility of aliasing.
- NoAlias = 0,
- /// The two locations may or may not alias. This is the least precise result.
- MayAlias,
- /// The two locations alias, but only due to a partial overlap.
- PartialAlias,
- /// The two locations precisely alias each other.
- MustAlias,
+class AliasResult {
+private:
+ static const int OffsetBits = 23;
+ static const int AliasBits = 8;
+ static_assert(AliasBits + 1 + OffsetBits <= 32,
+ "AliasResult size is intended to be 4 bytes!");
+
+ unsigned int Alias : AliasBits;
+ unsigned int HasOffset : 1;
+ signed int Offset : OffsetBits;
+
+public:
+ enum Kind : uint8_t {
+ /// The two locations do not alias at all.
+ ///
+ /// This value is arranged to convert to false, while all other values
+ /// convert to true. This allows a boolean context to convert the result to
+ /// a binary flag indicating whether there is the possibility of aliasing.
+ NoAlias = 0,
+ /// The two locations may or may not alias. This is the least precise
+ /// result.
+ MayAlias,
+ /// The two locations alias, but only due to a partial overlap.
+ PartialAlias,
+ /// The two locations precisely alias each other.
+ MustAlias,
+ };
+ static_assert(MustAlias < (1 << AliasBits),
+ "Not enough bit field size for the enum!");
+
+ explicit AliasResult() = delete;
+ constexpr AliasResult(const Kind &Alias)
+ : Alias(Alias), HasOffset(false), Offset(0) {}
+
+ operator Kind() const { return static_cast<Kind>(Alias); }
+
+ constexpr bool hasOffset() const { return HasOffset; }
+ constexpr int32_t getOffset() const {
+ assert(HasOffset && "No offset!");
+ return Offset;
+ }
+ void setOffset(int32_t NewOffset) {
+ if (isInt<OffsetBits>(NewOffset)) {
+ HasOffset = true;
+ Offset = NewOffset;
+ }
+ }
+
+ /// Helper for processing AliasResult for swapped memory location pairs.
+ void swap(bool DoSwap = true) {
+ if (DoSwap && hasOffset())
+ setOffset(-getOffset());
+ }
};
+static_assert(sizeof(AliasResult) == 4,
+ "AliasResult size is intended to be 4 bytes!");
+
/// << operator for AliasResult.
raw_ostream &operator<<(raw_ostream &OS, AliasResult AR);
@@ -335,6 +378,31 @@ createModRefInfo(const FunctionModRefBehavior FMRB) {
return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef));
}
+/// Reduced version of MemoryLocation that only stores a pointer and size.
+/// Used for caching AATags independent BasicAA results.
+struct AACacheLoc {
+ const Value *Ptr;
+ LocationSize Size;
+};
+
+template <> struct DenseMapInfo<AACacheLoc> {
+ static inline AACacheLoc getEmptyKey() {
+ return {DenseMapInfo<const Value *>::getEmptyKey(),
+ DenseMapInfo<LocationSize>::getEmptyKey()};
+ }
+ static inline AACacheLoc getTombstoneKey() {
+ return {DenseMapInfo<const Value *>::getTombstoneKey(),
+ DenseMapInfo<LocationSize>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const AACacheLoc &Val) {
+ return DenseMapInfo<const Value *>::getHashValue(Val.Ptr) ^
+ DenseMapInfo<LocationSize>::getHashValue(Val.Size);
+ }
+ static bool isEqual(const AACacheLoc &LHS, const AACacheLoc &RHS) {
+ return LHS.Ptr == RHS.Ptr && LHS.Size == RHS.Size;
+ }
+};
+
/// This class stores info we want to provide to or retain within an alias
/// query. By default, the root query is stateless and starts with a freshly
/// constructed info object. Specific alias analyses can use this query info to
@@ -345,7 +413,7 @@ createModRefInfo(const FunctionModRefBehavior FMRB) {
/// caches used by BasicAA, but can further be extended to fit other AA needs.
class AAQueryInfo {
public:
- using LocPair = std::pair<MemoryLocation, MemoryLocation>;
+ using LocPair = std::pair<AACacheLoc, AACacheLoc>;
struct CacheEntry {
AliasResult Result;
/// Number of times a NoAlias assumption has been used.
@@ -360,6 +428,9 @@ public:
using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>;
IsCapturedCacheT IsCapturedCache;
+ /// Query depth used to distinguish recursive queries.
+ unsigned Depth = 0;
+
/// How many active NoAlias assumption uses there are.
int NumAssumptionUses = 0;
@@ -369,6 +440,15 @@ public:
SmallVector<AAQueryInfo::LocPair, 4> AssumptionBasedResults;
AAQueryInfo() : AliasCache(), IsCapturedCache() {}
+
+ /// Create a new AAQueryInfo based on this one, but with the cache cleared.
+ /// This is used for recursive queries across phis, where cache results may
+ /// not be valid.
+ AAQueryInfo withEmptyCache() {
+ AAQueryInfo NewAAQI;
+ NewAAQI.Depth = Depth;
+ return NewAAQI;
+ }
};
class BatchAAResults;
@@ -428,7 +508,7 @@ public:
/// A trivial helper function to check to see if the specified pointers are
/// no-alias.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
- return alias(LocA, LocB) == NoAlias;
+ return alias(LocA, LocB) == AliasResult::NoAlias;
}
/// A convenience wrapper around the \c isNoAlias helper interface.
@@ -446,13 +526,13 @@ public:
/// A trivial helper function to check to see if the specified pointers are
/// must-alias.
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
- return alias(LocA, LocB) == MustAlias;
+ return alias(LocA, LocB) == AliasResult::MustAlias;
}
/// A convenience wrapper around the \c isMustAlias helper interface.
bool isMustAlias(const Value *V1, const Value *V2) {
return alias(V1, LocationSize::precise(1), V2, LocationSize::precise(1)) ==
- MustAlias;
+ AliasResult::MustAlias;
}
/// Checks whether the given location points to constant memory, or if
@@ -715,7 +795,11 @@ public:
/// Early exits in callCapturesBefore may lead to ModRefInfo::Must not being
/// set.
ModRefInfo callCapturesBefore(const Instruction *I,
- const MemoryLocation &MemLoc, DominatorTree *DT);
+ const MemoryLocation &MemLoc,
+ DominatorTree *DT) {
+ AAQueryInfo AAQIP;
+ return callCapturesBefore(I, MemLoc, DT, AAQIP);
+ }
/// A convenience wrapper to synthesize a memory location.
ModRefInfo callCapturesBefore(const Instruction *I, const Value *P,
@@ -784,6 +868,9 @@ private:
ModRefInfo getModRefInfo(const Instruction *I,
const Optional<MemoryLocation> &OptLoc,
AAQueryInfo &AAQIP);
+ ModRefInfo callCapturesBefore(const Instruction *I,
+ const MemoryLocation &MemLoc, DominatorTree *DT,
+ AAQueryInfo &AAQIP);
class Concept;
@@ -797,9 +884,6 @@ private:
std::vector<AnalysisKey *> AADeps;
- /// Query depth used to distinguish recursive queries.
- unsigned Depth = 0;
-
friend class BatchAAResults;
};
@@ -841,11 +925,17 @@ public:
return AA.getModRefBehavior(Call);
}
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
- return alias(LocA, LocB) == MustAlias;
+ return alias(LocA, LocB) == AliasResult::MustAlias;
}
bool isMustAlias(const Value *V1, const Value *V2) {
return alias(MemoryLocation(V1, LocationSize::precise(1)),
- MemoryLocation(V2, LocationSize::precise(1))) == MustAlias;
+ MemoryLocation(V2, LocationSize::precise(1))) ==
+ AliasResult::MustAlias;
+ }
+ ModRefInfo callCapturesBefore(const Instruction *I,
+ const MemoryLocation &MemLoc,
+ DominatorTree *DT) {
+ return AA.callCapturesBefore(I, MemLoc, DT, AAQI);
}
};
@@ -1073,7 +1163,7 @@ protected:
public:
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
AAQueryInfo &AAQI) {
- return MayAlias;
+ return AliasResult::MayAlias;
}
bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h
index b27fd5aa92a7..b66ff395454d 100644
--- a/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -35,20 +35,17 @@
namespace llvm {
class AAResults;
+class AliasResult;
class AliasSetTracker;
-class BasicBlock;
-class LoadInst;
-class Loop;
-class MemorySSA;
class AnyMemSetInst;
class AnyMemTransferInst;
+class BasicBlock;
+class LoadInst;
class raw_ostream;
class StoreInst;
class VAArgInst;
class Value;
-enum AliasResult : uint8_t;
-
class AliasSet : public ilist_node<AliasSet> {
friend class AliasSetTracker;
@@ -235,11 +232,16 @@ public:
void dump() const;
/// Define an iterator for alias sets... this is just a forward iterator.
- class iterator : public std::iterator<std::forward_iterator_tag,
- PointerRec, ptrdiff_t> {
+ class iterator {
PointerRec *CurNode;
public:
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = PointerRec;
+ using difference_type = std::ptrdiff_t;
+ using pointer = value_type *;
+ using reference = value_type &;
+
explicit iterator(PointerRec *CN = nullptr) : CurNode(CN) {}
bool operator==(const iterator& x) const {
@@ -343,8 +345,6 @@ class AliasSetTracker {
struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {};
AAResults &AA;
- MemorySSA *MSSA = nullptr;
- Loop *L = nullptr;
ilist<AliasSet> AliasSets;
using PointerMapType = DenseMap<ASTCallbackVH, AliasSet::PointerRec *,
@@ -357,8 +357,6 @@ public:
/// Create an empty collection of AliasSets, and use the specified alias
/// analysis object to disambiguate load and store addresses.
explicit AliasSetTracker(AAResults &AA) : AA(AA) {}
- explicit AliasSetTracker(AAResults &AA, MemorySSA *MSSA, Loop *L)
- : AA(AA), MSSA(MSSA), L(L) {}
~AliasSetTracker() { clear(); }
/// These methods are used to add different types of instructions to the alias
@@ -383,7 +381,6 @@ public:
void add(BasicBlock &BB); // Add all instructions in basic block
void add(const AliasSetTracker &AST); // Add alias relations from another AST
void addUnknown(Instruction *I);
- void addAllInstructionsInLoopUsingMSSA();
void clear();
diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
index 4d2884284d67..49c0cd89a4db 100644
--- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h
+++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
@@ -11,11 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_UTILS_ASSUMEBUNDLEQUERIES_H
-#define LLVM_TRANSFORMS_UTILS_ASSUMEBUNDLEQUERIES_H
+#ifndef LLVM_ANALYSIS_ASSUMEBUNDLEQUERIES_H
+#define LLVM_ANALYSIS_ASSUMEBUNDLEQUERIES_H
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/ADT/DenseMap.h"
namespace llvm {
@@ -39,12 +40,12 @@ enum AssumeBundleArg {
///
/// Return true iff the queried attribute was found.
/// If ArgVal is set. the argument will be stored to ArgVal.
-bool hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn, StringRef AttrName,
+bool hasAttributeInAssume(AssumeInst &Assume, Value *IsOn, StringRef AttrName,
uint64_t *ArgVal = nullptr);
-inline bool hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn,
+inline bool hasAttributeInAssume(AssumeInst &Assume, Value *IsOn,
Attribute::AttrKind Kind,
uint64_t *ArgVal = nullptr) {
- return hasAttributeInAssume(AssumeCI, IsOn,
+ return hasAttributeInAssume(Assume, IsOn,
Attribute::getNameFromAttrKind(Kind), ArgVal);
}
@@ -87,7 +88,7 @@ using RetainedKnowledgeMap =
/// many queries are going to be made on the same llvm.assume.
/// String attributes are not inserted in the map.
/// If the IR changes the map will be outdated.
-void fillMapFromAssume(CallInst &AssumeCI, RetainedKnowledgeMap &Result);
+void fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result);
/// Represent one information held inside an operand bundle of an llvm.assume.
/// AttrKind is the property that holds.
@@ -106,19 +107,28 @@ struct RetainedKnowledge {
ArgValue == Other.ArgValue;
}
bool operator!=(RetainedKnowledge Other) const { return !(*this == Other); }
+ /// This is only intended for use in std::min/std::max between attribute that
+ /// only differ in ArgValue.
+ bool operator<(RetainedKnowledge Other) const {
+ assert(((AttrKind == Other.AttrKind && WasOn == Other.WasOn) ||
+ AttrKind == Attribute::None || Other.AttrKind == Attribute::None) &&
+ "This is only intend for use in min/max to select the best for "
+ "RetainedKnowledge that is otherwise equal");
+ return ArgValue < Other.ArgValue;
+ }
operator bool() const { return AttrKind != Attribute::None; }
static RetainedKnowledge none() { return RetainedKnowledge{}; }
};
/// Retreive the information help by Assume on the operand at index Idx.
/// Assume should be an llvm.assume and Idx should be in the operand bundle.
-RetainedKnowledge getKnowledgeFromOperandInAssume(CallInst &Assume,
+RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume,
unsigned Idx);
/// Retreive the information help by the Use U of an llvm.assume. the use should
/// be in the operand bundle.
inline RetainedKnowledge getKnowledgeFromUseInAssume(const Use *U) {
- return getKnowledgeFromOperandInAssume(*cast<CallInst>(U->getUser()),
+ return getKnowledgeFromOperandInAssume(*cast<AssumeInst>(U->getUser()),
U->getOperandNo());
}
@@ -133,7 +143,7 @@ constexpr StringRef IgnoreBundleTag = "ignore";
///
/// the argument to the call of llvm.assume may still be useful even if the
/// function returned true.
-bool isAssumeWithEmptyBundle(CallInst &Assume);
+bool isAssumeWithEmptyBundle(AssumeInst &Assume);
/// Return a valid Knowledge associated to the Use U if its Attribute kind is
/// in AttrKinds.
@@ -159,7 +169,7 @@ RetainedKnowledge getKnowledgeValidInContext(
/// This extracts the Knowledge from an element of an operand bundle.
/// This is mostly for use in the assume builder.
-RetainedKnowledge getKnowledgeFromBundle(CallInst &Assume,
+RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume,
const CallBase::BundleOpInfo &BOI);
} // namespace llvm
diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h
index 0ef63dc68e1c..51d04bd8cf02 100644
--- a/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -26,7 +26,7 @@
namespace llvm {
-class CallInst;
+class AssumeInst;
class Function;
class raw_ostream;
class Value;
@@ -45,7 +45,7 @@ public:
enum : unsigned { ExprResultIdx = std::numeric_limits<unsigned>::max() };
struct ResultElem {
- WeakTrackingVH Assume;
+ WeakVH Assume;
/// contains either ExprResultIdx or the index of the operand bundle
/// containing the knowledge.
@@ -116,15 +116,15 @@ public:
///
/// The call passed in must be an instruction within this function and must
/// not already be in the cache.
- void registerAssumption(CallInst *CI);
+ void registerAssumption(AssumeInst *CI);
/// Remove an \@llvm.assume intrinsic from this function's cache if it has
/// been added to the cache earlier.
- void unregisterAssumption(CallInst *CI);
+ void unregisterAssumption(AssumeInst *CI);
/// Update the cache of values being affected by this assumption (i.e.
/// the values about which this assumption provides information).
- void updateAffectedValues(CallInst *CI);
+ void updateAffectedValues(AssumeInst *CI);
/// Clear the cache of \@llvm.assume intrinsics for a function.
///
diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index 46b8cd1f3a88..991c0cbb642a 100644
--- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -35,7 +35,6 @@ class DataLayout;
class DominatorTree;
class Function;
class GEPOperator;
-class LoopInfo;
class PHINode;
class SelectInst;
class TargetLibraryInfo;
@@ -56,23 +55,20 @@ class BasicAAResult : public AAResultBase<BasicAAResult> {
const TargetLibraryInfo &TLI;
AssumptionCache &AC;
DominatorTree *DT;
- LoopInfo *LI;
PhiValues *PV;
public:
BasicAAResult(const DataLayout &DL, const Function &F,
const TargetLibraryInfo &TLI, AssumptionCache &AC,
- DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
- PhiValues *PV = nullptr)
- : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), PV(PV)
- {}
+ DominatorTree *DT = nullptr, PhiValues *PV = nullptr)
+ : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), PV(PV) {}
BasicAAResult(const BasicAAResult &Arg)
: AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC),
- DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {}
+ DT(Arg.DT), PV(Arg.PV) {}
BasicAAResult(BasicAAResult &&Arg)
: AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI),
- AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {}
+ AC(Arg.AC), DT(Arg.DT), PV(Arg.PV) {}
/// Handle invalidation events in the new pass manager.
bool invalidate(Function &Fn, const PreservedAnalyses &PA,
@@ -120,14 +116,8 @@ private:
// Context instruction to use when querying information about this index.
const Instruction *CxtI;
- bool operator==(const VariableGEPIndex &Other) const {
- return V == Other.V && ZExtBits == Other.ZExtBits &&
- SExtBits == Other.SExtBits && Scale == Other.Scale;
- }
-
- bool operator!=(const VariableGEPIndex &Other) const {
- return !operator==(Other);
- }
+ /// True if all operations in this expression are NSW.
+ bool IsNSW;
void dump() const {
print(dbgs());
@@ -152,6 +142,9 @@ private:
SmallVector<VariableGEPIndex, 4> VarIndices;
// Is GEP index scale compile-time constant.
bool HasCompileTimeConstantScale;
+ // Are all operations inbounds GEPs or non-indexing operations?
+ // (None iff expression doesn't involve any geps)
+ Optional<bool> InBounds;
void dump() const {
print(dbgs());
@@ -159,15 +152,15 @@ private:
}
void print(raw_ostream &OS) const {
OS << "(DecomposedGEP Base=" << Base->getName()
- << ", Offset=" << Offset
- << ", VarIndices=[";
+ << ", Offset=" << Offset
+ << ", VarIndices=[";
for (size_t i = 0; i < VarIndices.size(); i++) {
- if (i != 0)
- OS << ", ";
- VarIndices[i].print(OS);
+ if (i != 0)
+ OS << ", ";
+ VarIndices[i].print(OS);
}
OS << "], HasCompileTimeConstantScale=" << HasCompileTimeConstantScale
- << ")";
+ << ")";
}
};
@@ -190,12 +183,6 @@ private:
/// Tracks instructions visited by pointsToConstantMemory.
SmallPtrSet<const Value *, 16> Visited;
- static const Value *
- GetLinearExpression(const Value *V, APInt &Scale, APInt &Offset,
- unsigned &ZExtBits, unsigned &SExtBits,
- const DataLayout &DL, unsigned Depth, AssumptionCache *AC,
- DominatorTree *DT, bool &NSW, bool &NUW);
-
static DecomposedGEP
DecomposeGEPExpression(const Value *V, const DataLayout &DL,
AssumptionCache *AC, DominatorTree *DT);
@@ -224,29 +211,23 @@ private:
const SmallVectorImpl<VariableGEPIndex> &Src);
AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size,
- const AAMDNodes &V1AAInfo, const Value *V2,
- LocationSize V2Size, const AAMDNodes &V2AAInfo,
+ const Value *V2, LocationSize V2Size,
const Value *UnderlyingV1, const Value *UnderlyingV2,
AAQueryInfo &AAQI);
AliasResult aliasPHI(const PHINode *PN, LocationSize PNSize,
- const AAMDNodes &PNAAInfo, const Value *V2,
- LocationSize V2Size, const AAMDNodes &V2AAInfo,
- AAQueryInfo &AAQI);
+ const Value *V2, LocationSize V2Size, AAQueryInfo &AAQI);
AliasResult aliasSelect(const SelectInst *SI, LocationSize SISize,
- const AAMDNodes &SIAAInfo, const Value *V2,
- LocationSize V2Size, const AAMDNodes &V2AAInfo,
+ const Value *V2, LocationSize V2Size,
AAQueryInfo &AAQI);
AliasResult aliasCheck(const Value *V1, LocationSize V1Size,
- const AAMDNodes &V1AATag, const Value *V2,
- LocationSize V2Size, const AAMDNodes &V2AATag,
+ const Value *V2, LocationSize V2Size,
AAQueryInfo &AAQI);
AliasResult aliasCheckRecursive(const Value *V1, LocationSize V1Size,
- const AAMDNodes &V1AATag, const Value *V2,
- LocationSize V2Size, const AAMDNodes &V2AATag,
+ const Value *V2, LocationSize V2Size,
AAQueryInfo &AAQI, const Value *O1,
const Value *O2);
};
diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index c22787531117..f581b18bff17 100644
--- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -42,15 +42,20 @@
#include <iterator>
#include <limits>
#include <list>
+#include <queue>
#include <string>
+#include <unordered_set>
#include <utility>
#include <vector>
#define DEBUG_TYPE "block-freq"
+namespace llvm {
extern llvm::cl::opt<bool> CheckBFIUnknownBlockQueries;
-namespace llvm {
+extern llvm::cl::opt<bool> UseIterativeBFIInference;
+extern llvm::cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock;
+extern llvm::cl::opt<double> IterativeBFIPrecision;
class BranchProbabilityInfo;
class Function;
@@ -450,12 +455,6 @@ public:
bool addToDist(Distribution &Dist, const LoopData *OuterLoop,
const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight);
- LoopData &getLoopPackage(const BlockNode &Head) {
- assert(Head.Index < Working.size());
- assert(Working[Head.Index].isLoopHeader());
- return *Working[Head.Index].Loop;
- }
-
/// Analyze irreducible SCCs.
///
/// Separate irreducible SCCs from \c G, which is an explicit graph of \c
@@ -968,6 +967,45 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
return bfi_detail::getBlockName(getBlock(Node));
}
+ /// The current implementation for computing relative block frequencies does
+ /// not handle correctly control-flow graphs containing irreducible loops. To
+ /// resolve the problem, we apply a post-processing step, which iteratively
+ /// updates block frequencies based on the frequencies of their predesessors.
+ /// This corresponds to finding the stationary point of the Markov chain by
+ /// an iterative method aka "PageRank computation".
+ /// The algorithm takes at most O(|E| * IterativeBFIMaxIterations) steps but
+ /// typically converges faster.
+ ///
+ /// Decide whether we want to apply iterative inference for a given function.
+ bool needIterativeInference() const;
+
+ /// Apply an iterative post-processing to infer correct counts for irr loops.
+ void applyIterativeInference();
+
+ using ProbMatrixType = std::vector<std::vector<std::pair<size_t, Scaled64>>>;
+
+ /// Run iterative inference for a probability matrix and initial frequencies.
+ void iterativeInference(const ProbMatrixType &ProbMatrix,
+ std::vector<Scaled64> &Freq) const;
+
+ /// Find all blocks to apply inference on, that is, reachable from the entry
+ /// and backward reachable from exists along edges with positive probability.
+ void findReachableBlocks(std::vector<const BlockT *> &Blocks) const;
+
+ /// Build a matrix of probabilities with transitions (edges) between the
+ /// blocks: ProbMatrix[I] holds pairs (J, P), where Pr[J -> I | J] = P
+ void initTransitionProbabilities(
+ const std::vector<const BlockT *> &Blocks,
+ const DenseMap<const BlockT *, size_t> &BlockIndex,
+ ProbMatrixType &ProbMatrix) const;
+
+#ifndef NDEBUG
+ /// Compute the discrepancy between current block frequencies and the
+ /// probability matrix.
+ Scaled64 discrepancy(const ProbMatrixType &ProbMatrix,
+ const std::vector<Scaled64> &Freq) const;
+#endif
+
public:
BlockFrequencyInfoImpl() = default;
@@ -1094,6 +1132,10 @@ void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F,
computeMassInLoops();
computeMassInFunction();
unwrapLoops();
+ // Apply a post-processing step improving computed frequencies for functions
+ // with irreducible loops.
+ if (needIterativeInference())
+ applyIterativeInference();
finalizeMetrics();
if (CheckBFIUnknownBlockQueries) {
@@ -1314,6 +1356,294 @@ template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() {
llvm_unreachable("unhandled irreducible control flow");
}
+template <class BT>
+bool BlockFrequencyInfoImpl<BT>::needIterativeInference() const {
+ if (!UseIterativeBFIInference)
+ return false;
+ if (!F->getFunction().hasProfileData())
+ return false;
+ // Apply iterative inference only if the function contains irreducible loops;
+ // otherwise, computed block frequencies are reasonably correct.
+ for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) {
+ if (L->isIrreducible())
+ return true;
+ }
+ return false;
+}
+
+template <class BT> void BlockFrequencyInfoImpl<BT>::applyIterativeInference() {
+ // Extract blocks for processing: a block is considered for inference iff it
+ // can be reached from the entry by edges with a positive probability.
+ // Non-processed blocks are assigned with the zero frequency and are ignored
+ // in the computation
+ std::vector<const BlockT *> ReachableBlocks;
+ findReachableBlocks(ReachableBlocks);
+ if (ReachableBlocks.empty())
+ return;
+
+ // The map is used to to index successors/predecessors of reachable blocks in
+ // the ReachableBlocks vector
+ DenseMap<const BlockT *, size_t> BlockIndex;
+ // Extract initial frequencies for the reachable blocks
+ auto Freq = std::vector<Scaled64>(ReachableBlocks.size());
+ Scaled64 SumFreq;
+ for (size_t I = 0; I < ReachableBlocks.size(); I++) {
+ const BlockT *BB = ReachableBlocks[I];
+ BlockIndex[BB] = I;
+ Freq[I] = getFloatingBlockFreq(BB);
+ SumFreq += Freq[I];
+ }
+ assert(!SumFreq.isZero() && "empty initial block frequencies");
+
+ LLVM_DEBUG(dbgs() << "Applying iterative inference for " << F->getName()
+ << " with " << ReachableBlocks.size() << " blocks\n");
+
+ // Normalizing frequencies so they sum up to 1.0
+ for (auto &Value : Freq) {
+ Value /= SumFreq;
+ }
+
+ // Setting up edge probabilities using sparse matrix representation:
+ // ProbMatrix[I] holds a vector of pairs (J, P) where Pr[J -> I | J] = P
+ ProbMatrixType ProbMatrix;
+ initTransitionProbabilities(ReachableBlocks, BlockIndex, ProbMatrix);
+
+ // Run the propagation
+ iterativeInference(ProbMatrix, Freq);
+
+ // Assign computed frequency values
+ for (const BlockT &BB : *F) {
+ auto Node = getNode(&BB);
+ if (!Node.isValid())
+ continue;
+ if (BlockIndex.count(&BB)) {
+ Freqs[Node.Index].Scaled = Freq[BlockIndex[&BB]];
+ } else {
+ Freqs[Node.Index].Scaled = Scaled64::getZero();
+ }
+ }
+}
+
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::iterativeInference(
+ const ProbMatrixType &ProbMatrix, std::vector<Scaled64> &Freq) const {
+ assert(0.0 < IterativeBFIPrecision && IterativeBFIPrecision < 1.0 &&
+ "incorrectly specified precision");
+ // Convert double precision to Scaled64
+ const auto Precision =
+ Scaled64::getInverse(static_cast<uint64_t>(1.0 / IterativeBFIPrecision));
+ const size_t MaxIterations = IterativeBFIMaxIterationsPerBlock * Freq.size();
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << " Initial discrepancy = "
+ << discrepancy(ProbMatrix, Freq).toString() << "\n");
+#endif
+
+ // Successors[I] holds unique sucessors of the I-th block
+ auto Successors = std::vector<std::vector<size_t>>(Freq.size());
+ for (size_t I = 0; I < Freq.size(); I++) {
+ for (auto &Jump : ProbMatrix[I]) {
+ Successors[Jump.first].push_back(I);
+ }
+ }
+
+ // To speedup computation, we maintain a set of "active" blocks whose
+ // frequencies need to be updated based on the incoming edges.
+ // The set is dynamic and changes after every update. Initially all blocks
+ // with a positive frequency are active
+ auto IsActive = std::vector<bool>(Freq.size(), false);
+ std::queue<size_t> ActiveSet;
+ for (size_t I = 0; I < Freq.size(); I++) {
+ if (Freq[I] > 0) {
+ ActiveSet.push(I);
+ IsActive[I] = true;
+ }
+ }
+
+ // Iterate over the blocks propagating frequencies
+ size_t It = 0;
+ while (It++ < MaxIterations && !ActiveSet.empty()) {
+ size_t I = ActiveSet.front();
+ ActiveSet.pop();
+ IsActive[I] = false;
+
+ // Compute a new frequency for the block: NewFreq := Freq \times ProbMatrix.
+ // A special care is taken for self-edges that needs to be scaled by
+ // (1.0 - SelfProb), where SelfProb is the sum of probabilities on the edges
+ Scaled64 NewFreq;
+ Scaled64 OneMinusSelfProb = Scaled64::getOne();
+ for (auto &Jump : ProbMatrix[I]) {
+ if (Jump.first == I) {
+ OneMinusSelfProb -= Jump.second;
+ } else {
+ NewFreq += Freq[Jump.first] * Jump.second;
+ }
+ }
+ if (OneMinusSelfProb != Scaled64::getOne())
+ NewFreq /= OneMinusSelfProb;
+
+ // If the block's frequency has changed enough, then
+ // make sure the block and its successors are in the active set
+ auto Change = Freq[I] >= NewFreq ? Freq[I] - NewFreq : NewFreq - Freq[I];
+ if (Change > Precision) {
+ ActiveSet.push(I);
+ IsActive[I] = true;
+ for (size_t Succ : Successors[I]) {
+ if (!IsActive[Succ]) {
+ ActiveSet.push(Succ);
+ IsActive[Succ] = true;
+ }
+ }
+ }
+
+ // Update the frequency for the block
+ Freq[I] = NewFreq;
+ }
+
+ LLVM_DEBUG(dbgs() << " Completed " << It << " inference iterations"
+ << format(" (%0.0f per block)", double(It) / Freq.size())
+ << "\n");
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << " Final discrepancy = "
+ << discrepancy(ProbMatrix, Freq).toString() << "\n");
+#endif
+}
+
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::findReachableBlocks(
+ std::vector<const BlockT *> &Blocks) const {
+ // Find all blocks to apply inference on, that is, reachable from the entry
+ // along edges with non-zero probablities
+ std::queue<const BlockT *> Queue;
+ std::unordered_set<const BlockT *> Reachable;
+ const BlockT *Entry = &F->front();
+ Queue.push(Entry);
+ Reachable.insert(Entry);
+ while (!Queue.empty()) {
+ const BlockT *SrcBB = Queue.front();
+ Queue.pop();
+ for (const BlockT *DstBB : children<const BlockT *>(SrcBB)) {
+ auto EP = BPI->getEdgeProbability(SrcBB, DstBB);
+ if (EP.isZero())
+ continue;
+ if (Reachable.find(DstBB) == Reachable.end()) {
+ Queue.push(DstBB);
+ Reachable.insert(DstBB);
+ }
+ }
+ }
+
+ // Find all blocks to apply inference on, that is, backward reachable from
+ // the entry along (backward) edges with non-zero probablities
+ std::unordered_set<const BlockT *> InverseReachable;
+ for (const BlockT &BB : *F) {
+ // An exit block is a block without any successors
+ bool HasSucc = GraphTraits<const BlockT *>::child_begin(&BB) !=
+ GraphTraits<const BlockT *>::child_end(&BB);
+ if (!HasSucc && Reachable.count(&BB)) {
+ Queue.push(&BB);
+ InverseReachable.insert(&BB);
+ }
+ }
+ while (!Queue.empty()) {
+ const BlockT *SrcBB = Queue.front();
+ Queue.pop();
+ for (const BlockT *DstBB : children<Inverse<const BlockT *>>(SrcBB)) {
+ auto EP = BPI->getEdgeProbability(DstBB, SrcBB);
+ if (EP.isZero())
+ continue;
+ if (InverseReachable.find(DstBB) == InverseReachable.end()) {
+ Queue.push(DstBB);
+ InverseReachable.insert(DstBB);
+ }
+ }
+ }
+
+ // Collect the result
+ Blocks.reserve(F->size());
+ for (const BlockT &BB : *F) {
+ if (Reachable.count(&BB) && InverseReachable.count(&BB)) {
+ Blocks.push_back(&BB);
+ }
+ }
+}
+
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::initTransitionProbabilities(
+ const std::vector<const BlockT *> &Blocks,
+ const DenseMap<const BlockT *, size_t> &BlockIndex,
+ ProbMatrixType &ProbMatrix) const {
+ const size_t NumBlocks = Blocks.size();
+ auto Succs = std::vector<std::vector<std::pair<size_t, Scaled64>>>(NumBlocks);
+ auto SumProb = std::vector<Scaled64>(NumBlocks);
+
+ // Find unique successors and corresponding probabilities for every block
+ for (size_t Src = 0; Src < NumBlocks; Src++) {
+ const BlockT *BB = Blocks[Src];
+ std::unordered_set<const BlockT *> UniqueSuccs;
+ for (const auto SI : children<const BlockT *>(BB)) {
+ // Ignore cold blocks
+ if (BlockIndex.find(SI) == BlockIndex.end())
+ continue;
+ // Ignore parallel edges between BB and SI blocks
+ if (UniqueSuccs.find(SI) != UniqueSuccs.end())
+ continue;
+ UniqueSuccs.insert(SI);
+ // Ignore jumps with zero probability
+ auto EP = BPI->getEdgeProbability(BB, SI);
+ if (EP.isZero())
+ continue;
+
+ auto EdgeProb =
+ Scaled64::getFraction(EP.getNumerator(), EP.getDenominator());
+ size_t Dst = BlockIndex.find(SI)->second;
+ Succs[Src].push_back(std::make_pair(Dst, EdgeProb));
+ SumProb[Src] += EdgeProb;
+ }
+ }
+
+ // Add transitions for every jump with positive branch probability
+ ProbMatrix = ProbMatrixType(NumBlocks);
+ for (size_t Src = 0; Src < NumBlocks; Src++) {
+ // Ignore blocks w/o successors
+ if (Succs[Src].empty())
+ continue;
+
+ assert(!SumProb[Src].isZero() && "Zero sum probability of non-exit block");
+ for (auto &Jump : Succs[Src]) {
+ size_t Dst = Jump.first;
+ Scaled64 Prob = Jump.second;
+ ProbMatrix[Dst].push_back(std::make_pair(Src, Prob / SumProb[Src]));
+ }
+ }
+
+ // Add transitions from sinks to the source
+ size_t EntryIdx = BlockIndex.find(&F->front())->second;
+ for (size_t Src = 0; Src < NumBlocks; Src++) {
+ if (Succs[Src].empty()) {
+ ProbMatrix[EntryIdx].push_back(std::make_pair(Src, Scaled64::getOne()));
+ }
+ }
+}
+
+#ifndef NDEBUG
+template <class BT>
+BlockFrequencyInfoImplBase::Scaled64 BlockFrequencyInfoImpl<BT>::discrepancy(
+ const ProbMatrixType &ProbMatrix, const std::vector<Scaled64> &Freq) const {
+ assert(Freq[0] > 0 && "Incorrectly computed frequency of the entry block");
+ Scaled64 Discrepancy;
+ for (size_t I = 0; I < ProbMatrix.size(); I++) {
+ Scaled64 Sum;
+ for (const auto &Jump : ProbMatrix[I]) {
+ Sum += Freq[Jump.first] * Jump.second;
+ }
+ Discrepancy += Freq[I] >= Sum ? Freq[I] - Sum : Sum - Freq[I];
+ }
+ // Normalizing by the frequency of the entry block
+ return Discrepancy / Freq[0];
+}
+#endif
+
/// \note This should be a lambda, but that crashes GCC 4.7.
namespace bfi_detail {
diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index 6a286236a80e..e2099eba0f65 100644
--- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -168,12 +168,6 @@ public:
/// as having a relative probability >= 80%.
bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const;
- /// Retrieve the hot successor of a block if one exists.
- ///
- /// Given a basic block, look through its successors and if one exists for
- /// which \see isEdgeHot would return true, return that successor block.
- const BasicBlock *getHotSucc(const BasicBlock *BB) const;
-
/// Print an edge's probability.
///
/// Retrieves an edge's probability similarly to \see getEdgeProbability, but
diff --git a/llvm/include/llvm/Analysis/CFG.h b/llvm/include/llvm/Analysis/CFG.h
index a36ceb484f14..b90258f8efff 100644
--- a/llvm/include/llvm/Analysis/CFG.h
+++ b/llvm/include/llvm/Analysis/CFG.h
@@ -77,21 +77,10 @@ bool isPotentiallyReachable(
/// Determine whether there is a path from From to To within a single function.
/// Returns false only if we can prove that once 'From' has been reached then
/// 'To' can not be executed. Conservatively returns true.
-bool isPotentiallyReachable(const BasicBlock *From, const BasicBlock *To,
- const DominatorTree *DT = nullptr,
- const LoopInfo *LI = nullptr);
-
-/// Determine whether there is at least one path from a block in
-/// 'Worklist' to 'StopBB', returning true if uncertain.
-///
-/// Determine whether there is a path from at least one block in Worklist to
-/// StopBB within a single function. Returns false only if we can prove that
-/// once any block in 'Worklist' has been reached then 'StopBB' can not be
-/// executed. Conservatively returns true.
-bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist,
- BasicBlock *StopBB,
- const DominatorTree *DT = nullptr,
- const LoopInfo *LI = nullptr);
+bool isPotentiallyReachable(
+ const BasicBlock *From, const BasicBlock *To,
+ const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
+ const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
/// Determine whether there is at least one path from a block in
/// 'Worklist' to 'StopBB' without passing through any blocks in
diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h
index 53700798b6b3..c0cabceb4a54 100644
--- a/llvm/include/llvm/Analysis/CFGPrinter.h
+++ b/llvm/include/llvm/Analysis/CFGPrinter.h
@@ -72,15 +72,15 @@ public:
RawWeights = !!BFI; // Print RawWeights when BFI is available.
}
- const BlockFrequencyInfo *getBFI() { return BFI; }
+ const BlockFrequencyInfo *getBFI() const { return BFI; }
- const BranchProbabilityInfo *getBPI() { return BPI; }
+ const BranchProbabilityInfo *getBPI() const { return BPI; }
- const Function *getFunction() { return this->F; }
+ const Function *getFunction() const { return this->F; }
- uint64_t getMaxFreq() { return MaxFreq; }
+ uint64_t getMaxFreq() const { return MaxFreq; }
- uint64_t getFreq(const BasicBlock *BB) {
+ uint64_t getFreq(const BasicBlock *BB) const {
return BFI->getBlockFreq(BB).getFrequency();
}
@@ -123,7 +123,7 @@ template <>
struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits {
// Cache for is hidden property
- llvm::DenseMap<const BasicBlock *, bool> isHiddenBasicBlock;
+ llvm::DenseMap<const BasicBlock *, bool> isOnDeoptOrUnreachablePath;
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
@@ -296,7 +296,7 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits {
return Attrs;
}
bool isNodeHidden(const BasicBlock *Node, const DOTFuncInfo *CFGInfo);
- void computeHiddenNodes(const Function *F);
+ void computeDeoptOrUnreachablePaths(const Function *F);
};
} // End llvm namespace
diff --git a/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index 135321616b7c..ec05b3706ca3 100644
--- a/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -73,7 +73,7 @@ public:
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
AAQueryInfo &AAQI) {
if (LocA.Ptr == LocB.Ptr)
- return MustAlias;
+ return AliasResult::MustAlias;
// Comparisons between global variables and other constants should be
// handled by BasicAA.
@@ -84,7 +84,7 @@ public:
return AAResultBase::alias(LocA, LocB, AAQI);
AliasResult QueryResult = query(LocA, LocB);
- if (QueryResult == MayAlias)
+ if (QueryResult == AliasResult::MayAlias)
return AAResultBase::alias(LocA, LocB, AAQI);
return QueryResult;
diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h
index 985424a74054..e361cccef960 100644
--- a/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -373,12 +373,12 @@ private:
/// templated adaptor.
template <typename CGSCCPassT>
ModuleToPostOrderCGSCCPassAdaptor
-createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) {
+createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass) {
using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
PreservedAnalyses, CGSCCAnalysisManager,
LazyCallGraph &, CGSCCUpdateResult &>;
return ModuleToPostOrderCGSCCPassAdaptor(
- std::make_unique<PassModelT>(std::move(Pass)));
+ std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)));
}
/// A proxy from a \c FunctionAnalysisManager to an \c SCC.
@@ -491,12 +491,12 @@ private:
/// templated adaptor.
template <typename FunctionPassT>
CGSCCToFunctionPassAdaptor
-createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) {
+createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass) {
using PassModelT =
detail::PassModel<Function, FunctionPassT, PreservedAnalyses,
FunctionAnalysisManager>;
return CGSCCToFunctionPassAdaptor(
- std::make_unique<PassModelT>(std::move(Pass)));
+ std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass)));
}
/// A helper that repeats an SCC pass each time an indirect call is refined to
@@ -536,13 +536,14 @@ private:
/// A function to deduce a function pass type and wrap it in the
/// templated adaptor.
template <typename CGSCCPassT>
-DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT Pass,
+DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT &&Pass,
int MaxIterations) {
using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
PreservedAnalyses, CGSCCAnalysisManager,
LazyCallGraph &, CGSCCUpdateResult &>;
- return DevirtSCCRepeatedPass(std::make_unique<PassModelT>(std::move(Pass)),
- MaxIterations);
+ return DevirtSCCRepeatedPass(
+ std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)),
+ MaxIterations);
}
// Clear out the debug logging macro.
diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index ef6e66b2b88e..62742fdf9a91 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -136,7 +136,9 @@ Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &
/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
/// getelementptr constantexpr, return the constant value being addressed by the
/// constant expression, or null if something is funny and we can't decide.
-Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE);
+Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE,
+ Type *Ty,
+ const DataLayout &DL);
/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
/// indices (with an *implied* zero pointer index that is not in the list),
diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h
index 83c1fb4485fd..d5b8f208172b 100644
--- a/llvm/include/llvm/Analysis/ConstraintSystem.h
+++ b/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -30,9 +30,6 @@ class ConstraintSystem {
// Eliminate constraints from the system using Fourier–Motzkin elimination.
bool eliminateUsingFM();
- /// Print the constraints in the system, using \p Names as variable names.
- void dump(ArrayRef<std::string> Names) const;
-
/// Print the constraints in the system, using x0...xn as variable names.
void dump() const;
@@ -82,6 +79,9 @@ public:
/// Returns the number of rows in the constraint system.
unsigned size() const { return Constraints.size(); }
+
+ /// Print the constraints in the system, using \p Names as variable names.
+ void dump(ArrayRef<std::string> Names) const;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h
index e3bef33e55c3..51dd4a738f00 100644
--- a/llvm/include/llvm/Analysis/DDG.h
+++ b/llvm/include/llvm/Analysis/DDG.h
@@ -275,7 +275,7 @@ public:
virtual ~DependenceGraphInfo() {}
/// Return the label that is used to name this graph.
- const StringRef getName() const { return Name; }
+ StringRef getName() const { return Name; }
/// Return the root node of the graph.
NodeType &getRoot() const {
@@ -293,8 +293,8 @@ public:
/// Return a string representing the type of dependence that the dependence
/// analysis identified between the two given nodes. This function assumes
/// that there is a memory dependence between the given two nodes.
- const std::string getDependenceString(const NodeType &Src,
- const NodeType &Dst) const;
+ std::string getDependenceString(const NodeType &Src,
+ const NodeType &Dst) const;
protected:
// Name of the graph.
@@ -470,7 +470,7 @@ bool DependenceGraphInfo<NodeType>::getDependencies(
}
template <typename NodeType>
-const std::string
+std::string
DependenceGraphInfo<NodeType>::getDependenceString(const NodeType &Src,
const NodeType &Dst) const {
std::string Str;
diff --git a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
index ecf54cd8a680..59737744f576 100644
--- a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -97,7 +97,7 @@ public:
errs() << "Writing '" << Filename << "'...";
- raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF);
std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
std::string Title = GraphName + " for '" + F.getName().str() + "' function";
@@ -160,7 +160,7 @@ public:
errs() << "Writing '" << Filename << "'...";
- raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF);
std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph);
if (!EC)
diff --git a/llvm/include/llvm/Analysis/DemandedBits.h b/llvm/include/llvm/Analysis/DemandedBits.h
index 7a8618a27ce7..5a68fcbebfea 100644
--- a/llvm/include/llvm/Analysis/DemandedBits.h
+++ b/llvm/include/llvm/Analysis/DemandedBits.h
@@ -18,8 +18,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_DEMANDED_BITS_H
-#define LLVM_ANALYSIS_DEMANDED_BITS_H
+#ifndef LLVM_ANALYSIS_DEMANDEDBITS_H
+#define LLVM_ANALYSIS_DEMANDEDBITS_H
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
@@ -53,6 +53,9 @@ public:
/// accepted, but will always produce a mask with all bits set.
APInt getDemandedBits(Instruction *I);
+ /// Return the bits demanded from use U.
+ APInt getDemandedBits(Use *U);
+
/// Return true if, during analysis, I could not be reached.
bool isInstructionDead(Instruction *I);
@@ -146,4 +149,4 @@ FunctionPass *createDemandedBitsWrapperPass();
} // end namespace llvm
-#endif // LLVM_ANALYSIS_DEMANDED_BITS_H
+#endif // LLVM_ANALYSIS_DEMANDEDBITS_H
diff --git a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
index 6f4e1be94164..332829cbc8a9 100644
--- a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
+++ b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
-#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+#ifndef LLVM_ANALYSIS_DEPENDENCEGRAPHBUILDER_H
+#define LLVM_ANALYSIS_DEPENDENCEGRAPHBUILDER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
@@ -200,4 +200,4 @@ protected:
} // namespace llvm
-#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+#endif // LLVM_ANALYSIS_DEPENDENCEGRAPHBUILDER_H
diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
index 2e4ae65d0981..6f759a81fdef 100644
--- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
-#define LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
+#ifndef LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
+#define LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/SyncDependenceAnalysis.h"
@@ -34,7 +34,7 @@ class TargetTransformInfo;
/// This analysis propagates divergence in a data-parallel context from sources
/// of divergence to all users. It requires reducible CFGs. All assignments
/// should be in SSA form.
-class DivergenceAnalysis {
+class DivergenceAnalysisImpl {
public:
/// \brief This instance will analyze the whole function \p F or the loop \p
/// RegionLoop.
@@ -43,9 +43,9 @@ public:
/// Otherwise the whole function is analyzed.
/// \param IsLCSSAForm whether the analysis may assume that the IR in the
/// region in in LCSSA form.
- DivergenceAnalysis(const Function &F, const Loop *RegionLoop,
- const DominatorTree &DT, const LoopInfo &LI,
- SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
+ DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop,
+ const DominatorTree &DT, const LoopInfo &LI,
+ SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
/// \brief The loop that defines the analyzed region (if any).
const Loop *getRegionLoop() const { return RegionLoop; }
@@ -82,8 +82,6 @@ public:
/// divergent.
bool isDivergentUse(const Use &U) const;
- void print(raw_ostream &OS, const Module *) const;
-
private:
/// \brief Mark \p Term as divergent and push all Instructions that become
/// divergent as a result on the worklist.
@@ -114,13 +112,6 @@ private:
bool isTemporalDivergent(const BasicBlock &ObservingBlock,
const Value &Val) const;
- /// \brief Whether \p Block is join divergent
- ///
- /// (see markBlockJoinDivergent).
- bool isJoinDivergent(const BasicBlock &Block) const {
- return DivergentJoinBlocks.contains(&Block);
- }
-
private:
const Function &F;
// If regionLoop != nullptr, analysis is only performed within \p RegionLoop.
@@ -142,9 +133,6 @@ private:
// Set of known-uniform values.
DenseSet<const Value *> UniformOverrides;
- // Blocks with joining divergent control from different predecessors.
- DenseSet<const BasicBlock *> DivergentJoinBlocks; // FIXME Deprecated
-
// Detected/marked divergent values.
DenseSet<const Value *> DivergentValues;
@@ -152,28 +140,39 @@ private:
std::vector<const Instruction *> Worklist;
};
-/// \brief Divergence analysis frontend for GPU kernels.
-class GPUDivergenceAnalysis {
- SyncDependenceAnalysis SDA;
- DivergenceAnalysis DA;
+class DivergenceInfo {
+ Function &F;
+
+ // If the function contains an irreducible region the divergence
+ // analysis can run indefinitely. We set ContainsIrreducible and no
+ // analysis is actually performed on the function. All values in
+ // this function are conservatively reported as divergent instead.
+ bool ContainsIrreducible;
+ std::unique_ptr<SyncDependenceAnalysis> SDA;
+ std::unique_ptr<DivergenceAnalysisImpl> DA;
public:
- /// Runs the divergence analysis on @F, a GPU kernel
- GPUDivergenceAnalysis(Function &F, const DominatorTree &DT,
- const PostDominatorTree &PDT, const LoopInfo &LI,
- const TargetTransformInfo &TTI);
+ DivergenceInfo(Function &F, const DominatorTree &DT,
+ const PostDominatorTree &PDT, const LoopInfo &LI,
+ const TargetTransformInfo &TTI, bool KnownReducible);
/// Whether any divergence was detected.
- bool hasDivergence() const { return DA.hasDetectedDivergence(); }
+ bool hasDivergence() const {
+ return ContainsIrreducible || DA->hasDetectedDivergence();
+ }
/// The GPU kernel this analysis result is for
- const Function &getFunction() const { return DA.getFunction(); }
+ const Function &getFunction() const { return F; }
/// Whether \p V is divergent at its definition.
- bool isDivergent(const Value &V) const;
+ bool isDivergent(const Value &V) const {
+ return ContainsIrreducible || DA->isDivergent(V);
+ }
/// Whether \p U is divergent. Uses of a uniform value can be divergent.
- bool isDivergentUse(const Use &U) const;
+ bool isDivergentUse(const Use &U) const {
+ return ContainsIrreducible || DA->isDivergentUse(U);
+ }
/// Whether \p V is uniform/non-divergent.
bool isUniform(const Value &V) const { return !isDivergent(V); }
@@ -181,11 +180,32 @@ public:
/// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
/// divergent.
bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
+};
+
+/// \brief Divergence analysis frontend for GPU kernels.
+class DivergenceAnalysis : public AnalysisInfoMixin<DivergenceAnalysis> {
+ friend AnalysisInfoMixin<DivergenceAnalysis>;
+
+ static AnalysisKey Key;
+
+public:
+ using Result = DivergenceInfo;
- /// Print all divergent values in the kernel.
- void print(raw_ostream &OS, const Module *) const;
+ /// Runs the divergence analysis on @F, a GPU kernel
+ Result run(Function &F, FunctionAnalysisManager &AM);
};
+/// Printer pass to dump divergence analysis results.
+struct DivergenceAnalysisPrinterPass
+ : public PassInfoMixin<DivergenceAnalysisPrinterPass> {
+ DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+
+private:
+ raw_ostream &OS;
+}; // class DivergenceAnalysisPrinterPass
+
} // namespace llvm
-#endif // LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
+#endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
index a5f96e72ce97..cf07c873b17c 100644
--- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
+++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_FUNCTIONPROPERTIESANALYSIS_H_
-#define LLVM_FUNCTIONPROPERTIESANALYSIS_H_
+#ifndef LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
+#define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/PassManager.h"
@@ -83,4 +83,4 @@ public:
};
} // namespace llvm
-#endif // LLVM_FUNCTIONPROPERTIESANALYSIS_H_
+#endif // LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 9e97541e542b..b623b9ca58d8 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -654,12 +654,6 @@ public:
IRSimilarityIdentifier()
: Mapper(&InstDataAllocator, &InstDataListAllocator) {}
- /// \param M the module to find similarity in.
- explicit IRSimilarityIdentifier(Module &M)
- : Mapper(&InstDataAllocator, &InstDataListAllocator) {
- findSimilarity(M);
- }
-
private:
/// Map the instructions in the module to unsigned integers, using mapping
/// already present in the Mapper if possible.
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 28546110ba04..82e1b14960bd 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -14,6 +14,7 @@
#define LLVM_ANALYSIS_IVDESCRIPTORS_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -68,29 +69,31 @@ public:
RecurrenceDescriptor() = default;
RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K,
- FastMathFlags FMF, Instruction *UAI, Type *RT,
- bool Signed, SmallPtrSetImpl<Instruction *> &CI)
+ FastMathFlags FMF, Instruction *ExactFP, Type *RT,
+ bool Signed, bool Ordered,
+ SmallPtrSetImpl<Instruction *> &CI)
: StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
- UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
+ ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed),
+ IsOrdered(Ordered) {
CastInsts.insert(CI.begin(), CI.end());
}
/// This POD struct holds information about a potential recurrence operation.
class InstDesc {
public:
- InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
+ InstDesc(bool IsRecur, Instruction *I, Instruction *ExactFP = nullptr)
: IsRecurrence(IsRecur), PatternLastInst(I),
- RecKind(RecurKind::None), UnsafeAlgebraInst(UAI) {}
+ RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
- InstDesc(Instruction *I, RecurKind K, Instruction *UAI = nullptr)
+ InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
: IsRecurrence(true), PatternLastInst(I), RecKind(K),
- UnsafeAlgebraInst(UAI) {}
+ ExactFPMathInst(ExactFP) {}
bool isRecurrence() const { return IsRecurrence; }
- bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; }
+ bool needsExactFPMath() const { return ExactFPMathInst != nullptr; }
- Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; }
+ Instruction *getExactFPMathInst() const { return ExactFPMathInst; }
RecurKind getRecKind() const { return RecKind; }
@@ -104,8 +107,8 @@ public:
Instruction *PatternLastInst;
// If this is a min/max pattern.
RecurKind RecKind;
- // Recurrence has unsafe algebra.
- Instruction *UnsafeAlgebraInst;
+ // Recurrence does not allow floating-point reassociation.
+ Instruction *ExactFPMathInst;
};
/// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -114,7 +117,7 @@ public:
/// compare instruction to the select instruction and stores this pointer in
/// 'PatternLastInst' member of the returned struct.
static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind,
- InstDesc &Prev, bool HasFunNoNaNAttr);
+ InstDesc &Prev, FastMathFlags FMF);
/// Returns true if instruction I has multiple uses in Insts
static bool hasMultipleUsesOf(Instruction *I,
@@ -136,7 +139,8 @@ public:
static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I);
/// Returns identity corresponding to the RecurrenceKind.
- static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp);
+ static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp,
+ FastMathFlags FMF);
/// Returns the opcode corresponding to the RecurrenceKind.
static unsigned getOpcode(RecurKind Kind);
@@ -146,7 +150,7 @@ public:
/// non-null, the minimal bit width needed to compute the reduction will be
/// computed.
static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
- bool HasFunNoNaNAttr,
+ FastMathFlags FMF,
RecurrenceDescriptor &RedDes,
DemandedBits *DB = nullptr,
AssumptionCache *AC = nullptr,
@@ -171,7 +175,7 @@ public:
/// to handle Phi as a first-order recurrence.
static bool
isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
- DenseMap<Instruction *, Instruction *> &SinkAfter,
+ MapVector<Instruction *, Instruction *> &SinkAfter,
DominatorTree *DT);
RecurKind getRecurrenceKind() const { return Kind; }
@@ -184,12 +188,12 @@ public:
Instruction *getLoopExitInstr() const { return LoopExitInstr; }
- /// Returns true if the recurrence has unsafe algebra which requires a relaxed
- /// floating-point model.
- bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; }
+ /// Returns true if the recurrence has floating-point math that requires
+ /// precise (ordered) operations.
+ bool hasExactFPMath() const { return ExactFPMathInst != nullptr; }
- /// Returns first unsafe algebra instruction in the PHI node's use-chain.
- Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; }
+ /// Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
+ Instruction *getExactFPMathInst() const { return ExactFPMathInst; }
/// Returns true if the recurrence kind is an integer kind.
static bool isIntegerRecurrenceKind(RecurKind Kind);
@@ -227,6 +231,9 @@ public:
/// Returns true if all source operands of the recurrence are SExtInsts.
bool isSigned() const { return IsSigned; }
+ /// Expose an ordered FP reduction to the instance users.
+ bool isOrdered() const { return IsOrdered; }
+
/// Attempts to find a chain of operations from Phi to LoopExitInst that can
/// be treated as a set of reductions instructions for in-loop reductions.
SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi,
@@ -243,12 +250,16 @@ private:
// The fast-math flags on the recurrent instructions. We propagate these
// fast-math flags into the vectorized FP instructions we generate.
FastMathFlags FMF;
- // First occurrence of unasfe algebra in the PHI's use-chain.
- Instruction *UnsafeAlgebraInst = nullptr;
+ // First instance of non-reassociative floating-point in the PHI's use-chain.
+ Instruction *ExactFPMathInst = nullptr;
// The type of the recurrence.
Type *RecurrenceType = nullptr;
// True if all source operands of the recurrence are SExtInsts.
bool IsSigned = false;
+ // True if this recurrence can be treated as an in-order reduction.
+ // Currently only a non-reassociative FAdd can be considered in-order,
+ // if it is also the only FAdd in the PHI's use chain.
+ bool IsOrdered = false;
// Instructions used for type-promoting the recurrence.
SmallPtrSet<Instruction *, 8> CastInsts;
};
@@ -302,23 +313,14 @@ public:
PredicatedScalarEvolution &PSE,
InductionDescriptor &D, bool Assume = false);
- /// Returns true if the induction type is FP and the binary operator does
- /// not have the "fast-math" property. Such operation requires a relaxed FP
- /// mode.
- bool hasUnsafeAlgebra() {
- return (IK == IK_FpInduction) && InductionBinOp &&
- !cast<FPMathOperator>(InductionBinOp)->isFast();
- }
-
- /// Returns induction operator that does not have "fast-math" property
- /// and requires FP unsafe mode.
- Instruction *getUnsafeAlgebraInst() {
- if (IK != IK_FpInduction)
- return nullptr;
-
- if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast())
- return nullptr;
- return InductionBinOp;
+ /// Returns floating-point induction operator that does not allow
+ /// reassociation (transforming the induction requires an override of normal
+ /// floating-point rules).
+ Instruction *getExactFPMathInst() {
+ if (IK == IK_FpInduction && InductionBinOp &&
+ !InductionBinOp->hasAllowReassoc())
+ return InductionBinOp;
+ return nullptr;
}
/// Returns binary opcode of the induction operator.
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index c39fae13d3b8..c27aaf0db8f2 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -6,13 +6,13 @@
//
//===----------------------------------------------------------------------===//
//
-#ifndef LLVM_INLINEADVISOR_H_
-#define LLVM_INLINEADVISOR_H_
+#ifndef LLVM_ANALYSIS_INLINEADVISOR_H
+#define LLVM_ANALYSIS_INLINEADVISOR_H
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include <memory>
#include <unordered_set>
@@ -36,11 +36,7 @@ class OptimizationRemarkEmitter;
/// requires the full C Tensorflow API library, and evaluates models
/// dynamically. This mode also permits generating training logs, for offline
/// training.
-enum class InliningAdvisorMode : int {
- Default,
- Release,
- Development
-};
+enum class InliningAdvisorMode : int { Default, Release, Development };
class InlineAdvisor;
/// Capture state between an inlining decision having had been made, and
@@ -283,4 +279,4 @@ void setInlineRemark(CallBase &CB, StringRef Message);
/// Utility for extracting the inline cost message to a string.
std::string inlineCostStr(const InlineCost &IC);
} // namespace llvm
-#endif // LLVM_INLINEADVISOR_H_
+#endif // LLVM_ANALYSIS_INLINEADVISOR_H
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index 7f04a8ce8f5f..4e1b28d4633f 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -15,6 +15,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/InlineModelFeatureMaps.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include <cassert>
#include <climits>
@@ -43,7 +44,7 @@ const int OptAggressiveThreshold = 250;
// Various magic constants used to adjust heuristics.
const int InstrCost = 5;
const int IndirectCallThreshold = 100;
-const int CallPenalty = 25;
+const int LoopPenalty = 25;
const int LastCallToStaticBonus = 15000;
const int ColdccPenalty = 2000;
/// Do not inline functions which allocate this many bytes on the stack
@@ -54,6 +55,20 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024;
const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
} // namespace InlineConstants
+// The cost-benefit pair computed by cost-benefit analysis.
+class CostBenefitPair {
+public:
+ CostBenefitPair(APInt Cost, APInt Benefit) : Cost(Cost), Benefit(Benefit) {}
+
+ const APInt &getCost() const { return Cost; }
+
+ const APInt &getBenefit() const { return Benefit; }
+
+private:
+ APInt Cost;
+ APInt Benefit;
+};
+
/// Represents the cost of inlining a function.
///
/// This supports special values for functions which should "always" or
@@ -76,9 +91,14 @@ class InlineCost {
/// Must be set for Always and Never instances.
const char *Reason = nullptr;
+ /// The cost-benefit pair computed by cost-benefit analysis.
+ Optional<CostBenefitPair> CostBenefit = None;
+
// Trivial constructor, interesting logic in the factory functions below.
- InlineCost(int Cost, int Threshold, const char *Reason = nullptr)
- : Cost(Cost), Threshold(Threshold), Reason(Reason) {
+ InlineCost(int Cost, int Threshold, const char *Reason = nullptr,
+ Optional<CostBenefitPair> CostBenefit = None)
+ : Cost(Cost), Threshold(Threshold), Reason(Reason),
+ CostBenefit(CostBenefit) {
assert((isVariable() || Reason) &&
"Reason must be provided for Never or Always");
}
@@ -89,11 +109,13 @@ public:
assert(Cost < NeverInlineCost && "Cost crosses sentinel value");
return InlineCost(Cost, Threshold);
}
- static InlineCost getAlways(const char *Reason) {
- return InlineCost(AlwaysInlineCost, 0, Reason);
+ static InlineCost getAlways(const char *Reason,
+ Optional<CostBenefitPair> CostBenefit = None) {
+ return InlineCost(AlwaysInlineCost, 0, Reason, CostBenefit);
}
- static InlineCost getNever(const char *Reason) {
- return InlineCost(NeverInlineCost, 0, Reason);
+ static InlineCost getNever(const char *Reason,
+ Optional<CostBenefitPair> CostBenefit = None) {
+ return InlineCost(NeverInlineCost, 0, Reason, CostBenefit);
}
/// Test whether the inline cost is low enough for inlining.
@@ -116,6 +138,9 @@ public:
return Threshold;
}
+ /// Get the cost-benefit pair which was computed by cost-benefit analysis
+ Optional<CostBenefitPair> getCostBenefit() const { return CostBenefit; }
+
/// Get the reason of Always or Never.
const char *getReason() const {
assert((Reason || isVariable()) &&
@@ -270,6 +295,15 @@ Optional<int> getInliningCostEstimate(
ProfileSummaryInfo *PSI = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);
+/// Get the expanded cost features. The features are returned unconditionally,
+/// even if inlining is impossible.
+Optional<InlineCostFeatures> getInliningCostFeatures(
+ CallBase &Call, TargetTransformInfo &CalleeTTI,
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
+ ProfileSummaryInfo *PSI = nullptr,
+ OptimizationRemarkEmitter *ORE = nullptr);
+
/// Minimal filter to detect invalid constructs for inlining.
InlineResult isInlineViable(Function &Callee);
diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
index 8da442cc4a53..1afa8a825f15 100644
--- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
+++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
@@ -16,6 +16,61 @@
namespace llvm {
+// List of cost features. A "cost" feature is a summand of the heuristic-based
+// inline cost, and we define them separately to preserve the original heuristic
+// behavior.
+#define INLINE_COST_FEATURE_ITERATOR(M) \
+ M(SROASavings, "sroa_savings") \
+ M(SROALosses, "sroa_losses") \
+ M(LoadElimination, "load_elimination") \
+ M(CallPenalty, "call_penalty") \
+ M(CallArgumentSetup, "call_argument_setup") \
+ M(LoadRelativeIntrinsic, "load_relative_intrinsic") \
+ M(LoweredCallArgSetup, "lowered_call_arg_setup") \
+ M(IndirectCallPenalty, "indirect_call_penalty") \
+ M(JumpTablePenalty, "jump_table_penalty") \
+ M(CaseClusterPenalty, "case_cluster_penalty") \
+ M(SwitchPenalty, "switch_penalty") \
+ M(UnsimplifiedCommonInstructions, "unsimplified_common_instructions") \
+ M(NumLoops, "num_loops") \
+ M(DeadBlocks, "dead_blocks") \
+ M(SimplifiedInstructions, "simplified_instructions") \
+ M(ConstantArgs, "constant_args") \
+ M(ConstantOffsetPtrArgs, "constant_offset_ptr_args") \
+ M(CallSiteCost, "callsite_cost") \
+ M(ColdCcPenalty, "cold_cc_penalty") \
+ M(LastCallToStaticBonus, "last_call_to_static_bonus") \
+ M(IsMultipleBlocks, "is_multiple_blocks") \
+ M(NestedInlines, "nested_inlines") \
+ M(NestedInlineCostEstimate, "nested_inline_cost_estimate") \
+ M(Threshold, "threshold")
+
+// clang-format off
+enum class InlineCostFeatureIndex : size_t {
+#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME,
+ INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES)
+#undef POPULATE_INDICES
+
+ NumberOfFeatures
+};
+// clang-format on
+
+using InlineCostFeatures =
+ std::array<int,
+ static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures)>;
+
+constexpr bool isHeuristicInlineCostFeature(InlineCostFeatureIndex Feature) {
+ return Feature != InlineCostFeatureIndex::SROASavings &&
+ Feature != InlineCostFeatureIndex::IsMultipleBlocks &&
+ Feature != InlineCostFeatureIndex::DeadBlocks &&
+ Feature != InlineCostFeatureIndex::SimplifiedInstructions &&
+ Feature != InlineCostFeatureIndex::ConstantArgs &&
+ Feature != InlineCostFeatureIndex::ConstantOffsetPtrArgs &&
+ Feature != InlineCostFeatureIndex::NestedInlines &&
+ Feature != InlineCostFeatureIndex::NestedInlineCostEstimate &&
+ Feature != InlineCostFeatureIndex::Threshold;
+}
+
// List of features. Each feature is defined through a triple:
// - the name of an enum member, which will be the feature index
// - a textual name, used for Tensorflow model binding (so it needs to match the
@@ -34,11 +89,10 @@ namespace llvm {
M(NrCtantParams, "nr_ctant_params", \
"number of parameters in the call site that are constants") \
M(CostEstimate, "cost_estimate", "total cost estimate (threshold - free)") \
- M(EdgeCount, "edge_count", \
+ M(EdgeCount, "edge_count", "total number of calls in the module") \
+ M(CallerUsers, "caller_users", \
"number of module-internal users of the caller, +1 if the caller is " \
"exposed externally") \
- M(CallerUsers, "caller_users", \
- "number of blocks reached from a conditional instruction, in the caller") \
M(CallerConditionallyExecutedBlocks, "caller_conditionally_executed_blocks", \
"number of blocks reached from a conditional instruction, in the caller") \
M(CallerBasicBlockCount, "caller_basic_block_count", \
@@ -46,14 +100,29 @@ namespace llvm {
M(CalleeConditionallyExecutedBlocks, "callee_conditionally_executed_blocks", \
"number of blocks reached from a conditional instruction, in the callee") \
M(CalleeUsers, "callee_users", \
- "number of blocks reached from a conditional instruction, in the callee")
+ "number of module-internal users of the callee, +1 if the callee is " \
+ "exposed externally")
+// clang-format off
enum class FeatureIndex : size_t {
+// InlineCost features - these must come first
+#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME,
+ INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES)
+#undef POPULATE_INDICES
+
+// Non-cost features
#define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME,
INLINE_FEATURE_ITERATOR(POPULATE_INDICES)
#undef POPULATE_INDICES
- NumberOfFeatures
+
+ NumberOfFeatures
};
+// clang-format on
+
+constexpr FeatureIndex
+inlineCostFeatureToMlFeature(InlineCostFeatureIndex Feature) {
+ return static_cast<FeatureIndex>(static_cast<size_t>(Feature));
+}
constexpr size_t NumberOfFeatures =
static_cast<size_t>(FeatureIndex::NumberOfFeatures);
diff --git a/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h b/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
index 46bc974c4a7f..192630e62a54 100644
--- a/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
+++ b/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
@@ -80,6 +80,11 @@ public:
/// It makes all necessary updates to internal caches to keep them consistent.
void removeInstruction(const Instruction *Inst);
+ /// Notifies this tracking that we are going to replace all uses of \p Inst.
+ /// It makes all necessary updates to internal caches to keep them consistent.
+ /// Should typically be called before a RAUW.
+ void removeUsersOf(const Instruction *Inst);
+
/// Invalidates all information from this tracking.
void clear();
};
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index 17d6f30a35cb..efaf1847276b 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -37,6 +37,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
namespace llvm {
@@ -133,7 +134,9 @@ struct SimplifyQuery {
bool isUndefValue(Value *V) const {
if (!CanUseUndef)
return false;
- return isa<UndefValue>(V);
+
+ using namespace PatternMatch;
+ return match(V, m_Undef());
}
};
@@ -142,8 +145,7 @@ struct SimplifyQuery {
// Please use the SimplifyQuery versions in new code.
/// Given operand for an FNeg, fold the result or return null.
-Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF,
- const SimplifyQuery &Q);
+Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q);
/// Given operands for an Add, fold the result or return null.
Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
@@ -154,23 +156,34 @@ Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
const SimplifyQuery &Q);
/// Given operands for an FAdd, fold the result or return null.
-Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF,
- const SimplifyQuery &Q);
+Value *
+SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+ const SimplifyQuery &Q,
+ fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+ RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for an FSub, fold the result or return null.
-Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
- const SimplifyQuery &Q);
+Value *
+SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+ const SimplifyQuery &Q,
+ fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+ RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for an FMul, fold the result or return null.
-Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
- const SimplifyQuery &Q);
+Value *
+SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+ const SimplifyQuery &Q,
+ fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+ RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for the multiplication of a FMA, fold the result or return
/// null. In contrast to SimplifyFMulInst, this function will not perform
/// simplifications whose unrounded results differ when rounded to the argument
/// type.
Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF,
- const SimplifyQuery &Q);
+ const SimplifyQuery &Q,
+ fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+ RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for a Mul, fold the result or return null.
Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
@@ -182,8 +195,11 @@ Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
/// Given operands for an FDiv, fold the result or return null.
-Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF,
- const SimplifyQuery &Q);
+Value *
+SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+ const SimplifyQuery &Q,
+ fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+ RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for an SRem, fold the result or return null.
Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
@@ -192,8 +208,11 @@ Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
/// Given operands for an FRem, fold the result or return null.
-Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF,
- const SimplifyQuery &Q);
+Value *
+SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+ const SimplifyQuery &Q,
+ fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+ RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for a Shl, fold the result or return null.
Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
@@ -277,8 +296,8 @@ Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// Given operands for a BinaryOperator, fold the result or return null.
/// Try to use FastMathFlags when folding the result.
-Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- FastMathFlags FMF, const SimplifyQuery &Q);
+Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF,
+ const SimplifyQuery &Q);
/// Given a callsite, fold the result or return null.
Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q);
@@ -292,11 +311,18 @@ Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q);
Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
OptimizationRemarkEmitter *ORE = nullptr);
+/// Like \p SimplifyInstruction but the operands of \p I are replaced with
+/// \p NewOps. Returns a simplified value, or null if none was found.
+Value *
+SimplifyInstructionWithOperands(Instruction *I, ArrayRef<Value *> NewOps,
+ const SimplifyQuery &Q,
+ OptimizationRemarkEmitter *ORE = nullptr);
+
/// See if V simplifies when its operand Op is replaced with RepOp. If not,
/// return null.
-/// AllowRefinement specifies whether the simplification can be a refinement,
-/// or whether it needs to be strictly identical.
-Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+/// AllowRefinement specifies whether the simplification can be a refinement
+/// (e.g. 0 instead of poison), or whether it needs to be strictly identical.
+Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const SimplifyQuery &Q, bool AllowRefinement);
/// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
@@ -325,4 +351,3 @@ const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &,
} // end namespace llvm
#endif
-
diff --git a/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
index 8166b52aa226..542a741ee07e 100644
--- a/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_IDF_H
-#define LLVM_ANALYSIS_IDF_H
+#ifndef LLVM_ANALYSIS_ITERATEDDOMINANCEFRONTIER_H
+#define LLVM_ANALYSIS_ITERATEDDOMINANCEFRONTIER_H
#include "llvm/Support/CFGDiff.h"
#include "llvm/Support/GenericIteratedDominanceFrontier.h"
diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h
index f7a5adac2b43..ca276d2f3cf8 100644
--- a/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -60,6 +60,7 @@
namespace llvm {
+template <class GraphType> struct GraphTraits;
class Module;
class Value;
@@ -115,8 +116,6 @@ public:
class EdgeSequence;
class SCC;
class RefSCC;
- class edge_iterator;
- class call_edge_iterator;
/// A class used to represent edges in the call graph.
///
@@ -464,7 +463,7 @@ public:
/// Dump a short description of this SCC to stderr.
void dump() const;
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
/// Verify invariants about the SCC.
///
/// This will attempt to validate all of the basic invariants within an
@@ -585,7 +584,7 @@ public:
/// Dump a short description of this RefSCC to stderr.
void dump() const;
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
/// Verify invariants about the RefSCC and all its SCCs.
///
/// This will attempt to validate all of the invariants *within* the
diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h
index 363cb49af382..57f732cc854b 100644
--- a/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -75,7 +75,15 @@ public:
/// \p Pred is a CmpInst predicate. If \p UseBlockValue is true, the block
/// value is also taken into account.
Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C,
- Instruction *CxtI, bool UseBlockValue = false);
+ Instruction *CxtI, bool UseBlockValue);
+
+ /// Determine whether the specified value comparison is known to be true
+ /// or false at the specified instruction. While this takes two Value's,
+ /// it still requires that one of them is a constant.
+ /// \p Pred is a CmpInst predicate.
+ /// If \p UseBlockValue is true, the block value is also taken into account.
+ Tristate getPredicateAt(unsigned Pred, Value *LHS, Value *RHS,
+ Instruction *CxtI, bool UseBlockValue);
/// Determine whether the specified value is known to be a constant at the
/// specified instruction. Return null if not.
diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
index 15400f5e07ff..0132c88077d2 100644
--- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -12,16 +12,16 @@
// better decisions.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
-#define LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
+#ifndef LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
+#define LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
#include "llvm/ADT/DenseSet.h"
#include "llvm/Pass.h"
#include <memory>
namespace llvm {
+class DivergenceInfo;
class Function;
-class GPUDivergenceAnalysis;
class Module;
class raw_ostream;
class TargetTransformInfo;
@@ -63,7 +63,7 @@ private:
const TargetTransformInfo &TTI) const;
// (optional) handle to new DivergenceAnalysis
- std::unique_ptr<GPUDivergenceAnalysis> gpuDA;
+ std::unique_ptr<DivergenceInfo> gpuDA;
// Stores all divergent values.
DenseSet<const Value *> DivergentValues;
@@ -73,4 +73,4 @@ private:
};
} // End llvm namespace
-#endif //LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
+#endif // LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 24a05610e68d..ced1943b81d9 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -25,7 +25,9 @@ class Instruction;
class LoadInst;
class Loop;
class MDNode;
+class MemoryLocation;
class ScalarEvolution;
+class TargetLibraryInfo;
/// Return true if this is always a dereferenceable pointer. If the context
/// instruction is specified perform context-sensitive analysis and return true
@@ -33,7 +35,8 @@ class ScalarEvolution;
bool isDereferenceablePointer(const Value *V, Type *Ty,
const DataLayout &DL,
const Instruction *CtxI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ const TargetLibraryInfo *TLI = nullptr);
/// Returns true if V is always a dereferenceable pointer with alignment
/// greater or equal than requested. If the context instruction is specified
@@ -43,7 +46,8 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
MaybeAlign Alignment,
const DataLayout &DL,
const Instruction *CtxI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ const TargetLibraryInfo *TLI = nullptr);
/// Returns true if V is always dereferenceable for Size byte with alignment
/// greater or equal than requested. If the context instruction is specified
@@ -52,7 +56,8 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
const APInt &Size, const DataLayout &DL,
const Instruction *CtxI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ const TargetLibraryInfo *TLI = nullptr);
/// Return true if we know that executing a load from this value cannot trap.
///
@@ -65,7 +70,8 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size,
const DataLayout &DL,
Instruction *ScanFrom = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ const TargetLibraryInfo *TLI = nullptr);
/// Return true if we can prove that the given load (which is assumed to be
/// within the specified loop) would access only dereferenceable memory, and
@@ -89,7 +95,8 @@ bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
bool isSafeToLoadUnconditionally(Value *V, Type *Ty, Align Alignment,
const DataLayout &DL,
Instruction *ScanFrom = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ const TargetLibraryInfo *TLI = nullptr);
/// The default number of maximum instructions to scan in the block, used by
/// FindAvailableLoadedValue().
@@ -127,6 +134,13 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
bool *IsLoadCSE = nullptr,
unsigned *NumScanedInst = nullptr);
+/// This overload provides a more efficient implementation of
+/// FindAvailableLoadedValue() for the case where we are not interested in
+/// finding the closest clobbering instruction if no available load is found.
+/// This overload cannot be used to scan across multiple blocks.
+Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
+ unsigned MaxInstsToScan = DefMaxInstsToScan);
+
/// Scan backwards to see if we have the value of the given pointer available
/// locally within a small number of instructions.
///
@@ -134,7 +148,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
/// this function, if ScanFrom points at the beginning of the block, it's safe
/// to continue scanning the predecessors.
///
-/// \param Ptr The pointer we want the load and store to originate from.
+/// \param Loc The location we want the load and store to originate from.
/// \param AccessTy The access type of the pointer.
/// \param AtLeastAtomic Are we looking for at-least an atomic load/store ? In
/// case it is false, we can return an atomic or non-atomic load or store. In
@@ -150,8 +164,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
/// location in memory, as opposed to the value operand of a store.
///
/// \returns The found value, or nullptr if no value is found.
-Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic,
- BasicBlock *ScanBB,
+Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy,
+ bool AtLeastAtomic, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan, AAResults *AA,
bool *IsLoadCSE, unsigned *NumScanedInst);
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 13fbe884eddf..0a0ef1536caf 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -341,17 +341,21 @@ struct RuntimeCheckingPtrGroup {
/// pointer, with index \p Index in RtCheck.
RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck);
+ RuntimeCheckingPtrGroup(unsigned Index, const SCEV *Start, const SCEV *End,
+ unsigned AS)
+ : High(End), Low(Start), AddressSpace(AS) {
+ Members.push_back(Index);
+ }
+
/// Tries to add the pointer recorded in RtCheck at index
/// \p Index to this pointer checking group. We can only add a pointer
/// to a checking group if we will still be able to get
/// the upper and lower bounds of the check. Returns true in case
/// of success, false otherwise.
- bool addPointer(unsigned Index);
+ bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
+ bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
+ unsigned AS, ScalarEvolution &SE);
- /// Constitutes the context of this pointer checking group. For each
- /// pointer that is a member of this group we will retain the index
- /// at which it appears in RtCheck.
- RuntimePointerChecking &RtCheck;
/// The SCEV expression which represents the upper bound of all the
/// pointers in this group.
const SCEV *High;
@@ -360,6 +364,8 @@ struct RuntimeCheckingPtrGroup {
const SCEV *Low;
/// Indices of all the pointers that constitute this grouping.
SmallVector<unsigned, 2> Members;
+ /// Address space of the involved pointers.
+ unsigned AddressSpace;
};
/// A memcheck which made up of a pair of grouped pointers.
@@ -679,6 +685,16 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap = ValueToValueMap(),
bool Assume = false, bool ShouldCheckWrap = true);
+/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
+/// compatible and it is possible to calculate the distance between them. This
+/// is a simple API that does not depend on the analysis pass.
+/// \param StrictCheck Ensure that the calculated distance matches the
+/// type-based one after all the bitcasts removal in the provided pointers.
+Optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
+ Value *PtrB, const DataLayout &DL,
+ ScalarEvolution &SE, bool StrictCheck = false,
+ bool CheckType = true);
+
/// Attempt to sort the pointers in \p VL and return the sorted indices
/// in \p SortedIndices, if reordering is required.
///
@@ -689,7 +705,7 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
/// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
/// saves the mask for actual memory accesses in program order in
/// \p SortedIndices as <1,2,0,3>
-bool sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
+bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
ScalarEvolution &SE,
SmallVectorImpl<unsigned> &SortedIndices);
diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
index 11dbd15c8678..92db1d67fc4e 100644
--- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
@@ -47,7 +47,7 @@ class TargetTransformInfo;
/// The adaptor from a function pass to a loop pass computes these analyses and
/// makes them available to the loop passes "for free". Each loop pass is
-/// expected expected to update these analyses if necessary to ensure they're
+/// expected to update these analyses if necessary to ensure they're
/// valid after it runs.
struct LoopStandardAnalysisResults {
AAResults &AA;
diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
index e8f2205545eb..21882ebd0087 100644
--- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -205,7 +205,7 @@ public:
}
/// Return the estimated ordered loop costs.
- const ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
+ ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
private:
/// Calculate the cache footprint of each loop in the nest (when it is
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index a5717bae12c3..164ec50e47bc 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -479,7 +479,8 @@ public:
bool isAnnotatedParallel() const { return false; }
/// Print loop with all the BBs inside it.
- void print(raw_ostream &OS, unsigned Depth = 0, bool Verbose = false) const;
+ void print(raw_ostream &OS, bool Verbose = false, bool PrintNested = true,
+ unsigned Depth = 0) const;
protected:
friend class LoopInfoBase<BlockT, LoopT>;
@@ -588,6 +589,9 @@ public:
///
PHINode *getCanonicalInductionVariable() const;
+ /// Get the latch condition instruction.
+ ICmpInst *getLatchCmpInst() const;
+
/// Obtain the unique incoming and back edge. Return false if they are
/// non-unique or the loop is dead; otherwise, return true.
bool getIncomingAndBackEdge(BasicBlock *&Incoming,
@@ -1199,6 +1203,14 @@ public:
return true;
}
+
+ // Return true if a new use of V added in ExitBB would require an LCSSA PHI
+ // to be inserted at the begining of the block. Note that V is assumed to
+ // dominate ExitBB, and ExitBB must be the exit block of some loop. The
+ // IR is assumed to be in LCSSA form before the planned insertion.
+ bool wouldBeOutOfLoopUseRequiringLCSSA(const Value *V,
+ const BasicBlock *ExitBB) const;
+
};
// Allow clients to walk the list of nested loops...
@@ -1283,6 +1295,33 @@ MDNode *findOptionMDForLoopID(MDNode *LoopID, StringRef Name);
/// found, return nullptr.
MDNode *findOptionMDForLoop(const Loop *TheLoop, StringRef Name);
+Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
+ StringRef Name);
+
+/// Returns true if Name is applied to TheLoop and enabled.
+bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name);
+
+/// Find named metadata for a loop with an integer value.
+llvm::Optional<int>
+getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name);
+
+/// Find string metadata for loop
+///
+/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
+/// operand or null otherwise. If the string metadata is not found return
+/// Optional's not-a-value.
+Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
+ StringRef Name);
+
+/// Look for the loop attribute that requires progress within the loop.
+/// Note: Most consumers probably want "isMustProgress" which checks
+/// the containing function attribute too.
+bool hasMustProgress(const Loop *L);
+
+/// Return true if this loop can be assumed to make progress. (i.e. can't
+/// be infinite without side effects without also being undefined)
+bool isMustProgress(const Loop *L);
+
/// Return whether an MDNode might represent an access group.
///
/// Access group metadata nodes have to be distinct and empty. Being
diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 426b349c6b8a..2cc9afb7c2cd 100644
--- a/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
@@ -380,8 +381,8 @@ void LoopBase<BlockT, LoopT>::verifyLoopNest(
}
template <class BlockT, class LoopT>
-void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth,
- bool Verbose) const {
+void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, bool Verbose,
+ bool PrintNested, unsigned Depth) const {
OS.indent(Depth * 2);
if (static_cast<const LoopT *>(this)->isAnnotatedParallel())
OS << "Parallel ";
@@ -406,10 +407,13 @@ void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth,
if (Verbose)
BB->print(OS);
}
- OS << "\n";
- for (iterator I = begin(), E = end(); I != E; ++I)
- (*I)->print(OS, Depth + 2);
+ if (PrintNested) {
+ OS << "\n";
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ (*I)->print(OS, /*Verbose*/ false, PrintNested, Depth + 2);
+ }
}
//===----------------------------------------------------------------------===//
@@ -676,10 +680,7 @@ static void compareLoops(const LoopT *L, const LoopT *OtherL,
const SmallPtrSetImpl<const BlockT *> &OtherBlocksSet =
OtherL->getBlocksSet();
assert(BlocksSet.size() == OtherBlocksSet.size() &&
- llvm::all_of(BlocksSet,
- [&OtherBlocksSet](const BlockT *BB) {
- return OtherBlocksSet.count(BB);
- }) &&
+ llvm::set_is_subset(BlocksSet, OtherBlocksSet) &&
"Mismatched basic blocks in BlocksSets!");
}
#endif
diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h
index 9c4fb4dbc29b..9a749a1c8eae 100644
--- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h
@@ -30,7 +30,6 @@ public:
LoopNest(Loop &Root, ScalarEvolution &SE);
LoopNest() = delete;
- LoopNest &operator=(const LoopNest &) = delete;
/// Construct a LoopNest object.
static std::unique_ptr<LoopNest> getLoopNest(Loop &Root, ScalarEvolution &SE);
@@ -61,10 +60,12 @@ public:
static unsigned getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE);
/// Recursivelly traverse all empty 'single successor' basic blocks of \p From
- /// (if there are any). Return the last basic block found or \p End if it was
- /// reached during the search.
+ /// (if there are any). When \p CheckUniquePred is set to true, check if
+ /// each of the empty single successors has a unique predecessor. Return
+ /// the last basic block found or \p End if it was reached during the search.
static const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From,
- const BasicBlock *End);
+ const BasicBlock *End,
+ bool CheckUniquePred = false);
/// Return the outermost loop in the loop nest.
Loop &getOutermostLoop() const { return *Loops.front(); }
@@ -139,6 +140,11 @@ public:
return all_of(Loops, [](const Loop *L) { return L->isRotatedForm(); });
}
+ /// Return the function to which the loop-nest belongs.
+ Function *getParent() const {
+ return Loops.front()->getHeader()->getParent();
+ }
+
StringRef getName() const { return Loops.front()->getName(); }
protected:
diff --git a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
index 5f332e3cac16..7cf8a081f9a2 100644
--- a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
+++ b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
@@ -46,7 +46,7 @@ class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
public:
UnrolledInstAnalyzer(unsigned Iteration,
- DenseMap<Value *, Constant *> &SimplifiedValues,
+ DenseMap<Value *, Value *> &SimplifiedValues,
ScalarEvolution &SE, const Loop *L)
: SimplifiedValues(SimplifiedValues), SE(SE), L(L) {
IterationNumber = SE.getConstant(APInt(64, Iteration));
@@ -68,22 +68,19 @@ private:
/// iteration.
const SCEV *IterationNumber;
- /// A Value->Constant map for keeping values that we managed to
- /// constant-fold on the given iteration.
- ///
/// While we walk the loop instructions, we build up and maintain a mapping
/// of simplified values specific to this iteration. The idea is to propagate
/// any special information we have about loads that can be replaced with
/// constants after complete unrolling, and account for likely simplifications
/// post-unrolling.
- DenseMap<Value *, Constant *> &SimplifiedValues;
+ DenseMap<Value *, Value *> &SimplifiedValues;
ScalarEvolution &SE;
const Loop *L;
bool simplifyInstWithSCEV(Instruction *I);
- bool visitInstruction(Instruction &I) { return simplifyInstWithSCEV(&I); }
+ bool visitInstruction(Instruction &I);
bool visitBinaryOperator(BinaryOperator &I);
bool visitLoad(LoadInst &I);
bool visitCastInst(CastInst &I);
diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h
index c5428726995e..39ade20df53f 100644
--- a/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -212,6 +212,10 @@ struct ObjectSizeOpts {
/// object size in Size if successful, and false otherwise. In this context, by
/// object we mean the region of memory starting at Ptr to the end of the
/// underlying object pointed to by Ptr.
+///
+/// WARNING: The object size returned is the allocation size. This does not
+/// imply dereferenceability at site of use since the object may be freeed in
+/// between.
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
const TargetLibraryInfo *TLI, ObjectSizeOpts Opts = {});
diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index efde00f82d57..cb522cf731d3 100644
--- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -20,30 +20,18 @@
#include "llvm/ADT/PointerSumType.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cassert>
-#include <cstdint>
-#include <utility>
-#include <vector>
namespace llvm {
class AAResults;
class AssumptionCache;
+class BatchAAResults;
class DominatorTree;
-class Function;
-class Instruction;
-class LoadInst;
class PHITransAddr;
-class TargetLibraryInfo;
class PhiValues;
-class Value;
/// A memory dependence query can return one of three different answers.
class MemDepResult {
@@ -343,7 +331,7 @@ private:
// A map from instructions to their non-local dependencies.
using NonLocalDepMapType = DenseMap<Instruction *, PerInstNLInfo>;
- NonLocalDepMapType NonLocalDeps;
+ NonLocalDepMapType NonLocalDepsMap;
// A reverse mapping from dependencies to the dependees. This is
// used when removing instructions to keep the cache coherent.
@@ -364,6 +352,10 @@ private:
unsigned DefaultBlockScanLimit;
+ /// Offsets to dependant clobber loads.
+ using ClobberOffsetsMapType = DenseMap<LoadInst *, int32_t>;
+ ClobberOffsetsMapType ClobberOffsets;
+
public:
MemoryDependenceResults(AAResults &AA, AssumptionCache &AC,
const TargetLibraryInfo &TLI, DominatorTree &DT,
@@ -452,10 +444,18 @@ public:
Instruction *QueryInst = nullptr,
unsigned *Limit = nullptr);
+ MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad,
+ BasicBlock::iterator ScanIt,
+ BasicBlock *BB,
+ Instruction *QueryInst,
+ unsigned *Limit,
+ BatchAAResults &BatchAA);
+
MemDepResult
getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad,
BasicBlock::iterator ScanIt, BasicBlock *BB,
- Instruction *QueryInst, unsigned *Limit);
+ Instruction *QueryInst, unsigned *Limit,
+ BatchAAResults &BatchAA);
/// This analysis looks for other loads and stores with invariant.group
/// metadata and the same pointer operand. Returns Unknown if it does not
@@ -468,6 +468,14 @@ public:
/// Release memory in caches.
void releaseMemory();
+ /// Return the clobber offset to dependent instruction.
+ Optional<int32_t> getClobberOffset(LoadInst *DepInst) const {
+ const auto Off = ClobberOffsets.find(DepInst);
+ if (Off != ClobberOffsets.end())
+ return Off->getSecond();
+ return None;
+ }
+
private:
MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall,
BasicBlock::iterator ScanIt,
@@ -480,12 +488,13 @@ private:
DenseMap<BasicBlock *, Value *> &Visited,
bool SkipFirstBlock = false,
bool IsIncomplete = false);
- MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst,
+ MemDepResult getNonLocalInfoForBlock(Instruction *QueryInst,
const MemoryLocation &Loc, bool isLoad,
BasicBlock *BB, NonLocalDepInfo *Cache,
- unsigned NumSortedEntries);
+ unsigned NumSortedEntries,
+ BatchAAResults &BatchAA);
- void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P);
+ void removeCachedNonLocalPointerDependencies(ValueIsLoadPair P);
void verifyRemoved(Instruction *Inst) const;
};
diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h
index 63c031b1921f..f40b99968fd3 100644
--- a/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/llvm/include/llvm/Analysis/MemorySSA.h
@@ -288,7 +288,7 @@ protected:
DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB,
unsigned NumOperands)
: MemoryAccess(C, Vty, DeleteValue, BB, NumOperands),
- MemoryInstruction(MI), OptimizedAccessAlias(MayAlias) {
+ MemoryInstruction(MI), OptimizedAccessAlias(AliasResult::MayAlias) {
setDefiningAccess(DMA);
}
@@ -299,8 +299,9 @@ protected:
OptimizedAccessAlias = AR;
}
- void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false,
- Optional<AliasResult> AR = MayAlias) {
+ void setDefiningAccess(
+ MemoryAccess *DMA, bool Optimized = false,
+ Optional<AliasResult> AR = AliasResult(AliasResult::MayAlias)) {
if (!Optimized) {
setOperand(0, DMA);
return;
@@ -328,7 +329,8 @@ public:
/*NumOperands=*/1) {}
// allocate space for exactly one operand
- void *operator new(size_t s) { return User::operator new(s, 1); }
+ void *operator new(size_t S) { return User::operator new(S, 1); }
+ void operator delete(void *Ptr) { User::operator delete(Ptr); }
static bool classof(const Value *MA) {
return MA->getValueID() == MemoryUseVal;
@@ -388,7 +390,8 @@ public:
ID(Ver) {}
// allocate space for exactly two operands
- void *operator new(size_t s) { return User::operator new(s, 2); }
+ void *operator new(size_t S) { return User::operator new(S, 2); }
+ void operator delete(void *Ptr) { User::operator delete(Ptr); }
static bool classof(const Value *MA) {
return MA->getValueID() == MemoryDefVal;
@@ -483,9 +486,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess)
/// issue.
class MemoryPhi final : public MemoryAccess {
// allocate space for exactly zero operands
- void *operator new(size_t s) { return User::operator new(s); }
+ void *operator new(size_t S) { return User::operator new(S); }
public:
+ void operator delete(void *Ptr) { User::operator delete(Ptr); }
+
/// Provide fast operand accessors
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
@@ -1099,7 +1104,7 @@ public:
return MP->getIncomingBlock(ArgNo);
}
- typename BaseT::iterator::pointer operator*() const {
+ typename std::iterator_traits<BaseT>::pointer operator*() const {
assert(Access && "Tried to access past the end of our iterator");
// Go to the first argument for phis, and the defining access for everything
// else.
@@ -1195,7 +1200,7 @@ public:
return DefIterator == Other.DefIterator;
}
- BaseT::iterator::reference operator*() const {
+ typename std::iterator_traits<BaseT>::reference operator*() const {
assert(DefIterator != OriginalAccess->defs_end() &&
"Tried to access past the end of our iterator");
return CurrentPair;
diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index b0bf2e5ead62..659e6aff6e28 100644
--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -240,11 +240,6 @@ public:
/// successors.
void changeToUnreachable(const Instruction *I);
- /// Conditional branch BI is changed or replaced with an unconditional branch
- /// to `To`. Update Phis in BI's successors to remove BI's BB.
- void changeCondBranchToUnconditionalTo(const BranchInst *BI,
- const BasicBlock *To);
-
/// Get handle on MemorySSA.
MemorySSA* getMemorySSA() const { return MSSA; }
diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index 16c5f6701da0..62bdade95d96 100644
--- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -19,8 +19,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
-#define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
+#ifndef LLVM_ANALYSIS_OBJCARCANALYSISUTILS_H
+#define LLVM_ANALYSIS_OBJCARCANALYSISUTILS_H
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/ObjCARCInstKind.h"
diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h
new file mode 100644
index 000000000000..2566bfbcf61c
--- /dev/null
+++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h
@@ -0,0 +1,59 @@
+//===- ObjCARCUtil.h - ObjC ARC Utility Functions ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines ARC utility functions which are used by various parts of
+/// the compiler.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_OBJCARCUTIL_H
+#define LLVM_IR_OBJCARCUTIL_H
+
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace llvm {
+namespace objcarc {
+
+inline const char *getRVMarkerModuleFlagStr() {
+ return "clang.arc.retainAutoreleasedReturnValueMarker";
+}
+
+enum AttachedCallOperandBundle : unsigned { RVOB_Retain, RVOB_Claim };
+
+inline AttachedCallOperandBundle
+getAttachedCallOperandBundleEnum(bool IsRetain) {
+ return IsRetain ? RVOB_Retain : RVOB_Claim;
+}
+
+inline bool hasAttachedCallOpBundle(const CallBase *CB) {
+ // Ignore the bundle if the return type is void. Global optimization passes
+ // can turn the called function's return type to void. That should happen only
+ // if the call doesn't return and the call to @llvm.objc.clang.arc.noop.use
+ // no longer consumes the function return or is deleted. In that case, it's
+ // not necessary to emit the marker instruction or calls to the ARC runtime
+ // functions.
+ return !CB->getFunctionType()->getReturnType()->isVoidTy() &&
+ CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall)
+ .hasValue();
+}
+
+inline bool hasAttachedCallOpBundle(const CallBase *CB, bool IsRetain) {
+ assert(hasAttachedCallOpBundle(CB) &&
+ "call doesn't have operand bundle clang_arc_attachedcall");
+ auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall);
+ if (!B.hasValue())
+ return false;
+ return cast<ConstantInt>(B->Inputs[0])->getZExtValue() ==
+ getAttachedCallOperandBundleEnum(IsRetain);
+}
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h b/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h
index 9815dd05cd1c..ff706e91f3c4 100644
--- a/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h
+++ b/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h
@@ -11,8 +11,8 @@
// used to compute the "hotness" of the diagnostic message.
//===----------------------------------------------------------------------===//
-#ifndef LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
-#define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
+#ifndef LLVM_ANALYSIS_OPTIMIZATIONREMARKEMITTER_H
+#define LLVM_ANALYSIS_OPTIMIZATIONREMARKEMITTER_H
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -61,6 +61,12 @@ public:
bool invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv);
+ /// Return true iff at least *some* remarks are enabled.
+ bool enabled() const {
+ return F->getContext().getLLVMRemarkStreamer() ||
+ F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled();
+ }
+
/// Output the remark via the diagnostic handler and to the
/// optimization record file.
void emit(DiagnosticInfoOptimizationBase &OptDiag);
@@ -73,9 +79,11 @@ public:
// remarks enabled. We can't currently check whether remarks are requested
// for the calling pass since that requires actually building the remark.
- if (F->getContext().getLLVMRemarkStreamer() ||
- F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) {
+ if (enabled()) {
auto R = RemarkBuilder();
+ static_assert(
+ std::is_base_of<DiagnosticInfoOptimizationBase, decltype(R)>::value,
+ "the lambda passed to emit() must return a remark");
emit((DiagnosticInfoOptimizationBase &)R);
}
}
@@ -166,4 +174,4 @@ public:
Result run(Function &F, FunctionAnalysisManager &AM);
};
}
-#endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
+#endif // LLVM_ANALYSIS_OPTIMIZATIONREMARKEMITTER_H
diff --git a/llvm/include/llvm/Analysis/OverflowInstAnalysis.h b/llvm/include/llvm/Analysis/OverflowInstAnalysis.h
new file mode 100644
index 000000000000..7523fb9392cd
--- /dev/null
+++ b/llvm/include/llvm/Analysis/OverflowInstAnalysis.h
@@ -0,0 +1,45 @@
+//===-- OverflowInstAnalysis.h - Utils to fold overflow insts ----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file holds routines to help analyse overflow instructions
+// and fold them into constants or other overflow instructions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_OVERFLOWINSTANALYSIS_H
+#define LLVM_ANALYSIS_OVERFLOWINSTANALYSIS_H
+
+#include "llvm/IR/InstrTypes.h"
+
+namespace llvm {
+class Value;
+class Use;
+
+/// Match one of the patterns up to the select/logic op:
+/// %Op0 = icmp ne i4 %X, 0
+/// %Agg = call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %Y)
+/// %Op1 = extractvalue { i4, i1 } %Agg, 1
+/// %ret = select i1 %Op0, i1 %Op1, i1 false / %ret = and i1 %Op0, %Op1
+///
+/// %Op0 = icmp eq i4 %X, 0
+/// %Agg = call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %Y)
+/// %NotOp1 = extractvalue { i4, i1 } %Agg, 1
+/// %Op1 = xor i1 %NotOp1, true
+/// %ret = select i1 %Op0, i1 true, i1 %Op1 / %ret = or i1 %Op0, %Op1
+///
+/// Callers are expected to align that with the operands of the select/logic.
+/// IsAnd is set to true if the Op0 and Op1 are used as the first pattern.
+/// If Op0 and Op1 match one of the patterns above, return true and fill Y's
+/// use.
+
+bool isCheckForZeroAndMulWithOverflow(Value *Op0, Value *Op1, bool IsAnd,
+ Use *&Y);
+bool isCheckForZeroAndMulWithOverflow(Value *Op0, Value *Op1, bool IsAnd);
+} // end namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index a4e6ffc3dd58..c95404d96f4e 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_PROFILE_SUMMARY_INFO_H
-#define LLVM_ANALYSIS_PROFILE_SUMMARY_INFO_H
+#ifndef LLVM_ANALYSIS_PROFILESUMMARYINFO_H
+#define LLVM_ANALYSIS_PROFILESUMMARYINFO_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/PassManager.h"
@@ -38,7 +38,7 @@ class Function;
// units. This would require making this depend on BFI.
class ProfileSummaryInfo {
private:
- const Module &M;
+ const Module *M;
std::unique_ptr<ProfileSummary> Summary;
void computeThresholds();
// Count thresholds to answer isHotCount and isColdCount queries.
@@ -58,8 +58,7 @@ private:
mutable DenseMap<int, uint64_t> ThresholdCache;
public:
- ProfileSummaryInfo(const Module &M) : M(M) { refresh(); }
-
+ ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); }
ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default;
/// If no summary is present, attempt to refresh.
diff --git a/llvm/include/llvm/Analysis/RegionIterator.h b/llvm/include/llvm/Analysis/RegionIterator.h
index 72bc5bbcb506..fecb28725dcc 100644
--- a/llvm/include/llvm/Analysis/RegionIterator.h
+++ b/llvm/include/llvm/Analysis/RegionIterator.h
@@ -35,10 +35,15 @@ class BasicBlock;
///
/// For a subregion RegionNode there is just one successor. The RegionNode
/// representing the exit of the subregion.
-template <class NodeRef, class BlockT, class RegionT>
-class RNSuccIterator
- : public std::iterator<std::forward_iterator_tag, NodeRef> {
- using super = std::iterator<std::forward_iterator_tag, NodeRef>;
+template <class NodeRef, class BlockT, class RegionT> class RNSuccIterator {
+public:
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = NodeRef;
+ using difference_type = std::ptrdiff_t;
+ using pointer = value_type *;
+ using reference = value_type &;
+
+private:
using BlockTraits = GraphTraits<BlockT *>;
using SuccIterTy = typename BlockTraits::ChildIteratorType;
@@ -99,7 +104,6 @@ class RNSuccIterator
public:
using Self = RNSuccIterator<NodeRef, BlockT, RegionT>;
- using value_type = typename super::value_type;
/// Create begin iterator of a RegionNode.
inline RNSuccIterator(NodeRef node)
@@ -163,9 +167,7 @@ public:
/// are contained in the Region and its subregions. This is close to a virtual
/// control flow graph of the Region.
template <class NodeRef, class BlockT, class RegionT>
-class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>
- : public std::iterator<std::forward_iterator_tag, NodeRef> {
- using super = std::iterator<std::forward_iterator_tag, NodeRef>;
+class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT> {
using BlockTraits = GraphTraits<BlockT *>;
using SuccIterTy = typename BlockTraits::ChildIteratorType;
@@ -173,8 +175,13 @@ class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>
SuccIterTy Itor;
public:
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = NodeRef;
+ using difference_type = std::ptrdiff_t;
+ using pointer = value_type *;
+ using reference = value_type &;
+
using Self = RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>;
- using value_type = typename super::value_type;
/// Create the iterator from a RegionNode.
///
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index b3f199de2cfa..ae9c73fede96 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -505,6 +505,17 @@ public:
/// Erase Value from ValueExprMap and ExprValueMap.
void eraseValueFromMap(Value *V);
+ /// Is operation \p BinOp between \p LHS and \p RHS provably does not have
+ /// a signed/unsigned overflow (\p Signed)?
+ bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
+ const SCEV *LHS, const SCEV *RHS);
+
+ /// Parse NSW/NUW flags from add/sub/mul IR binary operation \p Op into
+ /// SCEV no-wrap flags, and deduce flag[s] that aren't known yet.
+ /// Does not mutate the original instruction.
+ std::pair<SCEV::NoWrapFlags, bool /*Deduced*/>
+ getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO);
+
/// Return a SCEV expression for the full generality of the specified
/// expression.
const SCEV *getSCEV(Value *V);
@@ -512,7 +523,8 @@ public:
const SCEV *getConstant(ConstantInt *V);
const SCEV *getConstant(const APInt &Val);
const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
- const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
+ const SCEV *getLosslessPtrToIntExpr(const SCEV *Op, unsigned Depth = 0);
+ const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty);
const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
@@ -575,7 +587,6 @@ public:
const SCEV *getGEPExpr(GEPOperator *GEP,
const SmallVectorImpl<const SCEV *> &IndexExprs);
const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW);
- const SCEV *getSignumExpr(const SCEV *Op);
const SCEV *getMinMaxExpr(SCEVTypes Kind,
SmallVectorImpl<const SCEV *> &Operands);
const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
@@ -622,10 +633,26 @@ public:
const SCEV *getNotSCEV(const SCEV *V);
/// Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
+ ///
+ /// If the LHS and RHS are pointers which don't share a common base
+ /// (according to getPointerBase()), this returns a SCEVCouldNotCompute.
+ /// To compute the difference between two unrelated pointers, you can
+ /// explicitly convert the arguments using getPtrToIntExpr(), for pointer
+ /// types that support it.
const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
unsigned Depth = 0);
+ /// Compute ceil(N / D). N and D are treated as unsigned values.
+ ///
+ /// Since SCEV doesn't have native ceiling division, this generates a
+ /// SCEV expression of the following form:
+ ///
+ /// umin(N, 1) + floor((N - umin(N, 1)) / D)
+ ///
+ /// A denominator of zero or poison is handled the same way as getUDivExpr().
+ const SCEV *getUDivCeilSCEV(const SCEV *N, const SCEV *D);
+
/// Return a SCEV corresponding to a conversion of the input value to the
/// specified type. If the type must be extended, it is zero extended.
const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
@@ -705,17 +732,25 @@ public:
bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS);
- /// Returns the maximum trip count of the loop if it is a single-exit
- /// loop and we can compute a small maximum for that loop.
- ///
- /// Implemented in terms of the \c getSmallConstantTripCount overload with
- /// the single exiting block passed to it. See that routine for details.
+ /// Convert from an "exit count" (i.e. "backedge taken count") to a "trip
+ /// count". A "trip count" is the number of times the header of the loop
+ /// will execute if an exit is taken after the specified number of backedges
+ /// have been taken. (e.g. TripCount = ExitCount + 1) A zero result
+ /// must be interpreted as a loop having an unknown trip count.
+ const SCEV *getTripCountFromExitCount(const SCEV *ExitCount);
+
+ /// Returns the exact trip count of the loop if we can compute it, and
+ /// the result is a small constant. '0' is used to represent an unknown
+ /// or non-constant trip count. Note that a trip count is simply one more
+ /// than the backedge taken count for the loop.
unsigned getSmallConstantTripCount(const Loop *L);
- /// Returns the maximum trip count of this loop as a normal unsigned
- /// value. Returns 0 if the trip count is unknown or not constant. This
- /// "trip count" assumes that control exits via ExitingBlock. More
- /// precisely, it is the number of times that control may reach ExitingBlock
+ /// Return the exact trip count for this loop if we exit through ExitingBlock.
+ /// '0' is used to represent an unknown or non-constant trip count. Note
+ /// that a trip count is simply one more than the backedge taken count for
+ /// the same exit.
+ /// This "trip count" assumes that control exits via ExitingBlock. More
+ /// precisely, it is the number of times that control will reach ExitingBlock
/// before taking the branch. For loops with multiple exits, it may not be
/// the number times that the loop header executes if the loop exits
/// prematurely via another branch.
@@ -727,12 +762,18 @@ public:
/// Returns 0 if the trip count is unknown or not constant.
unsigned getSmallConstantMaxTripCount(const Loop *L);
+ /// Returns the largest constant divisor of the trip count as a normal
+ /// unsigned value, if possible. This means that the actual trip count is
+ /// always a multiple of the returned value. Returns 1 if the trip count is
+ /// unknown or not guaranteed to be the multiple of a constant., Will also
+ /// return 1 if the trip count is very large (>= 2^32).
+ /// Note that the argument is an exit count for loop L, NOT a trip count.
+ unsigned getSmallConstantTripMultiple(const Loop *L,
+ const SCEV *ExitCount);
+
/// Returns the largest constant divisor of the trip count of the
- /// loop if it is a single-exit loop and we can compute a small maximum for
- /// that loop.
- ///
- /// Implemented in terms of the \c getSmallConstantTripMultiple overload with
- /// the single exiting block passed to it. See that routine for details.
+ /// loop. Will return 1 if no trip count could be computed, or if a
+ /// divisor could not be found.
unsigned getSmallConstantTripMultiple(const Loop *L);
/// Returns the largest constant divisor of the trip count of this loop as a
@@ -938,11 +979,24 @@ public:
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS);
+ /// Check whether the condition described by Pred, LHS, and RHS is true or
+ /// false. If we know it, return the evaluation of this condition. If neither
+ /// is proved, return None.
+ Optional<bool> evaluatePredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS);
+
/// Test if the given expression is known to satisfy the condition described
/// by Pred, LHS, and RHS in the given Context.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS, const Instruction *Context);
+ /// Check whether the condition described by Pred, LHS, and RHS is true or
+ /// false in the given \p Context. If we know it, return the evaluation of
+ /// this condition. If neither is proved, return None.
+ Optional<bool> evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS,
+ const Instruction *Context);
+
/// Test if the condition described by Pred, LHS, RHS is known to be true on
/// every iteration of the loop of the recurrency LHS.
bool isKnownOnEveryIteration(ICmpInst::Predicate Pred,
@@ -1177,6 +1231,9 @@ public:
/// sharpen it.
void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags);
+ /// Try to apply information from loop guards for \p L to \p Expr.
+ const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L);
+
private:
/// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
/// Value is deleted.
@@ -1225,7 +1282,8 @@ private:
/// The type for ExprValueMap.
using ValueOffsetPair = std::pair<Value *, ConstantInt *>;
- using ExprValueMapType = DenseMap<const SCEV *, SetVector<ValueOffsetPair>>;
+ using ValueOffsetPairSetVector = SmallSetVector<ValueOffsetPair, 4>;
+ using ExprValueMapType = DenseMap<const SCEV *, ValueOffsetPairSetVector>;
/// ExprValueMap -- This map records the original values from which
/// the SCEV expr is generated from.
@@ -1277,7 +1335,7 @@ private:
DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache;
/// Return the Value set from which the SCEV expr is generated.
- SetVector<ValueOffsetPair> *getSCEVValues(const SCEV *S);
+ ValueOffsetPairSetVector *getSCEVValues(const SCEV *S);
/// Private helper method for the GetMinTrailingZeros method
uint32_t GetMinTrailingZerosImpl(const SCEV *S);
@@ -1324,8 +1382,6 @@ private:
!isa<SCEVCouldNotCompute>(MaxNotTaken);
}
- bool hasOperand(const SCEV *S) const;
-
/// Test whether this ExitLimit contains all information.
bool hasFullInfo() const {
return !isa<SCEVCouldNotCompute>(ExactNotTaken);
@@ -1376,6 +1432,9 @@ private:
/// True iff the backedge is taken either exactly Max or zero times.
bool MaxOrZero = false;
+ /// SCEV expressions used in any of the ExitNotTakenInfo counts.
+ SmallPtrSet<const SCEV *, 4> Operands;
+
bool isComplete() const { return IsComplete; }
const SCEV *getConstantMax() const { return ConstantMax; }
@@ -1444,10 +1503,7 @@ private:
/// Return true if any backedge taken count expressions refer to the given
/// subexpression.
- bool hasOperand(const SCEV *S, ScalarEvolution *SE) const;
-
- /// Invalidate this result and free associated memory.
- void clear();
+ bool hasOperand(const SCEV *S) const;
};
/// Cache the backedge-taken count of the loops for this function as they
@@ -1502,6 +1558,10 @@ private:
return getLoopProperties(L).HasNoAbnormalExits;
}
+ /// Return true if this loop is finite by assumption. That is,
+ /// to be infinite, it must also be undefined.
+ bool loopIsFiniteByAssumption(const Loop *L);
+
/// Compute a LoopDisposition value.
LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
@@ -1558,6 +1618,12 @@ private:
ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop,
const SCEV *MaxBECount, unsigned BitWidth);
+ /// If the unknown expression U corresponds to a simple recurrence, return
+ /// a constant range which represents the entire recurrence. Note that
+ /// *add* recurrences with loop invariant steps aren't represented by
+ /// SCEVUnknowns and thus don't use this mechanism.
+ ConstantRange getRangeForUnknownRecurrence(const SCEVUnknown *U);
+
/// We know that there is no SCEV for the specified value. Analyze the
/// expression.
const SCEV *createSCEV(Value *V);
@@ -1966,11 +2032,6 @@ private:
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI);
- /// Compute the backedge taken count knowing the interval difference, the
- /// stride and presence of the equality in the comparison.
- const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride,
- bool Equality);
-
/// Compute the maximum backedge count based on the range of values
/// permitted by Start, End, and Stride. This is for loops of the form
/// {Start, +, Stride} LT End.
@@ -1983,15 +2044,13 @@ private:
/// Verify if an linear IV with positive stride can overflow when in a
/// less-than comparison, knowing the invariant term of the comparison,
- /// the stride and the knowledge of NSW/NUW flags on the recurrence.
- bool doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned,
- bool NoWrap);
+ /// the stride.
+ bool canIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned);
/// Verify if an linear IV with negative stride can overflow when in a
/// greater-than comparison, knowing the invariant term of the comparison,
- /// the stride and the knowledge of NSW/NUW flags on the recurrence.
- bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned,
- bool NoWrap);
+ /// the stride.
+ bool canIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned);
/// Get add expr already created or create a new one.
const SCEV *getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
@@ -2021,9 +2080,6 @@ private:
/// Assign A and B to LHS and RHS, respectively.
bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
- /// Try to apply information from loop guards for \p L to \p Expr.
- const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L);
-
/// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
/// `UniqueSCEVs`.
///
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
index 98d53237d4a0..20acb407ead0 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
@@ -33,6 +33,9 @@ public:
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
AAQueryInfo &AAQI);
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv);
+
private:
Value *GetBaseValue(const SCEV *S);
};
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 37e675f08afc..c0da311e4e48 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -210,8 +210,6 @@ class Type;
return make_range(op_begin(), op_end());
}
- Type *getType() const { return getOperand(0)->getType(); }
-
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask = NoWrapMask) const {
return (NoWrapFlags)(SubclassData & Mask);
}
@@ -293,6 +291,8 @@ class Type;
: SCEVCommutativeExpr(ID, scMulExpr, O, N) {}
public:
+ Type *getType() const { return getOperand(0)->getType(); }
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const SCEV *S) {
return S->getSCEVType() == scMulExpr;
@@ -359,6 +359,7 @@ class Type;
: SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {}
public:
+ Type *getType() const { return getStart()->getType(); }
const SCEV *getStart() const { return Operands[0]; }
const Loop *getLoop() const { return L; }
@@ -401,6 +402,11 @@ class Type;
/// iteration number.
const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const;
+ /// Return the value of this chain of recurrences at the specified iteration
+ /// number. Takes an explicit list of operands to represent an AddRec.
+ static const SCEV *evaluateAtIteration(ArrayRef<const SCEV *> Operands,
+ const SCEV *It, ScalarEvolution &SE);
+
/// Return the number of iterations of this loop that produce
/// values in the specified constant range. Another way of
/// looking at this is that it returns the first iteration number
@@ -440,6 +446,8 @@ class Type;
}
public:
+ Type *getType() const { return getOperand(0)->getType(); }
+
static bool classof(const SCEV *S) {
return isMinMaxType(S->getSCEVType());
}
@@ -895,13 +903,10 @@ class Type;
Operands.push_back(visit(Op));
const Loop *L = Expr->getLoop();
- const SCEV *Res = SE.getAddRecExpr(Operands, L, Expr->getNoWrapFlags());
-
if (0 == Map.count(L))
- return Res;
+ return SE.getAddRecExpr(Operands, L, Expr->getNoWrapFlags());
- const SCEVAddRecExpr *Rec = cast<SCEVAddRecExpr>(Res);
- return Rec->evaluateAtIteration(Map[L], SE);
+ return SCEVAddRecExpr::evaluateAtIteration(Operands, Map[L], SE);
}
private:
diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h
index 81a2533152de..27c58c0afa8a 100644
--- a/llvm/include/llvm/Analysis/SparsePropagation.h
+++ b/llvm/include/llvm/Analysis/SparsePropagation.h
@@ -470,8 +470,7 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::Solve() {
while (!BBWorkList.empty() || !ValueWorkList.empty()) {
// Process the value work list.
while (!ValueWorkList.empty()) {
- Value *V = ValueWorkList.back();
- ValueWorkList.pop_back();
+ Value *V = ValueWorkList.pop_back_val();
LLVM_DEBUG(dbgs() << "\nPopped off V-WL: " << *V << "\n");
diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
index 9838d629e93e..92459ea79ab4 100644
--- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
-#define LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
+#ifndef LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
+#define LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -89,4 +89,4 @@ private:
} // namespace llvm
-#endif // LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
+#endif // LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
index 358f757314ee..f9bac739cee6 100644
--- a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
+++ b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
-#define LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
+#ifndef LLVM_ANALYSIS_SYNTHETICCOUNTSUTILS_H
+#define LLVM_ANALYSIS_SYNTHETICCOUNTSUTILS_H
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CallGraph.h"
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index defc95d0062a..ded53617b304 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -329,6 +329,12 @@ TLI_DEFINE_STRING_INTERNAL("__isoc99_scanf")
/// int __isoc99_sscanf(const char *s, const char *format, ...)
TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_sscanf)
TLI_DEFINE_STRING_INTERNAL("__isoc99_sscanf")
+/// void __kmpc_alloc_shared(size_t nbyte);
+TLI_DEFINE_ENUM_INTERNAL(__kmpc_alloc_shared)
+TLI_DEFINE_STRING_INTERNAL("__kmpc_alloc_shared")
+/// void __kmpc_free_shared(void *ptr, size_t nbyte);
+TLI_DEFINE_ENUM_INTERNAL(__kmpc_free_shared)
+TLI_DEFINE_STRING_INTERNAL("__kmpc_free_shared")
/// double __log10_finite(double x);
TLI_DEFINE_ENUM_INTERNAL(log10_finite)
TLI_DEFINE_STRING_INTERNAL("__log10_finite")
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 34a8a1e3407c..22bfeda0efd0 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -28,7 +28,7 @@ class Triple;
struct VecDesc {
StringRef ScalarFnName;
StringRef VectorFnName;
- unsigned VectorizationFactor;
+ ElementCount VectorizationFactor;
};
enum LibFunc : unsigned {
@@ -52,6 +52,7 @@ class TargetLibraryInfoImpl {
llvm::DenseMap<unsigned, std::string> CustomNames;
static StringLiteral const StandardNames[NumLibFuncs];
bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param;
+ unsigned SizeOfInt;
enum AvailabilityState {
StandardName = 3, // (memset to all ones)
@@ -86,11 +87,12 @@ public:
/// addVectorizableFunctionsFromVecLib for filling up the tables of
/// vectorizable functions.
enum VectorLibrary {
- NoLibrary, // Don't use any vector library.
- Accelerate, // Use Accelerate framework.
- LIBMVEC_X86,// GLIBC Vector Math library.
- MASSV, // IBM MASS vector library.
- SVML // Intel short vector math library.
+ NoLibrary, // Don't use any vector library.
+ Accelerate, // Use Accelerate framework.
+ DarwinLibSystemM, // Use Darwin's libsystem_m.
+ LIBMVEC_X86, // GLIBC Vector Math library.
+ MASSV, // IBM MASS vector library.
+ SVML // Intel short vector math library.
};
TargetLibraryInfoImpl();
@@ -152,7 +154,7 @@ public:
/// Return true if the function F has a vector equivalent with vectorization
/// factor VF.
- bool isFunctionVectorizable(StringRef F, unsigned VF) const {
+ bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
return !getVectorizedFunction(F, VF).empty();
}
@@ -162,19 +164,7 @@ public:
/// Return the name of the equivalent of F, vectorized with factor VF. If no
/// such mapping exists, return the empty string.
- StringRef getVectorizedFunction(StringRef F, unsigned VF) const;
-
- /// Return true if the function F has a scalar equivalent, and set VF to be
- /// the vectorization factor.
- bool isFunctionScalarizable(StringRef F, unsigned &VF) const {
- return !getScalarizedFunction(F, VF).empty();
- }
-
- /// Return the name of the equivalent of F, scalarized. If no such mapping
- /// exists, return the empty string.
- ///
- /// Set VF to the vectorization factor.
- StringRef getScalarizedFunction(StringRef F, unsigned &VF) const;
+ StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const;
/// Set to true iff i32 parameters to library functions should have signext
/// or zeroext attributes if they correspond to C-level int or unsigned int,
@@ -200,9 +190,25 @@ public:
/// This queries the 'wchar_size' metadata.
unsigned getWCharSize(const Module &M) const;
+ /// Get size of a C-level int or unsigned int, in bits.
+ unsigned getIntSize() const {
+ return SizeOfInt;
+ }
+
+ /// Initialize the C-level size of an integer.
+ void setIntSize(unsigned Bits) {
+ SizeOfInt = Bits;
+ }
+
/// Returns the largest vectorization factor used in the list of
/// vector functions.
- unsigned getWidestVF(StringRef ScalarF) const;
+ void getWidestVF(StringRef ScalarF, ElementCount &FixedVF,
+ ElementCount &Scalable) const;
+
+ /// Returns true if call site / callee has cdecl-compatible calling
+ /// conventions.
+ static bool isCallingConvCCompatible(CallBase *CI);
+ static bool isCallingConvCCompatible(Function *Callee);
};
/// Provides information about what library functions are available for
@@ -317,13 +323,13 @@ public:
bool has(LibFunc F) const {
return getState(F) != TargetLibraryInfoImpl::Unavailable;
}
- bool isFunctionVectorizable(StringRef F, unsigned VF) const {
+ bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
return Impl->isFunctionVectorizable(F, VF);
}
bool isFunctionVectorizable(StringRef F) const {
return Impl->isFunctionVectorizable(F);
}
- StringRef getVectorizedFunction(StringRef F, unsigned VF) const {
+ StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
return Impl->getVectorizedFunction(F, VF);
}
@@ -395,6 +401,11 @@ public:
return Impl->getWCharSize(M);
}
+ /// \copydoc TargetLibraryInfoImpl::getIntSize()
+ unsigned getIntSize() const {
+ return Impl->getIntSize();
+ }
+
/// Handle invalidation from the pass manager.
///
/// If we try to invalidate this info, just return false. It cannot become
@@ -409,8 +420,9 @@ public:
}
/// Returns the largest vectorization factor used in the list of
/// vector functions.
- unsigned getWidestVF(StringRef ScalarF) const {
- return Impl->getWidestVF(ScalarF);
+ void getWidestVF(StringRef ScalarF, ElementCount &FixedVF,
+ ElementCount &ScalableVF) const {
+ Impl->getWidestVF(ScalarF, FixedVF, ScalableVF);
}
/// Check if the function "F" is listed in a library known to LLVM.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index cdfb04424e56..628058142e48 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -21,11 +21,13 @@
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/InstructionCost.h"
#include <functional>
@@ -59,6 +61,7 @@ class TargetLibraryInfo;
class Type;
class User;
class Value;
+class VPIntrinsic;
struct KnownBits;
template <typename T> class Optional;
@@ -118,46 +121,34 @@ class IntrinsicCostAttributes {
SmallVector<Type *, 4> ParamTys;
SmallVector<const Value *, 4> Arguments;
FastMathFlags FMF;
- ElementCount VF = ElementCount::getFixed(1);
// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
// arguments and the return value will be computed based on types.
- unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
+ InstructionCost ScalarizationCost = InstructionCost::getInvalid();
public:
- IntrinsicCostAttributes(const IntrinsicInst &I);
+ IntrinsicCostAttributes(
+ Intrinsic::ID Id, const CallBase &CI,
+ InstructionCost ScalarCost = InstructionCost::getInvalid());
- IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
-
- IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
- ElementCount Factor);
-
- IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
- ElementCount Factor, unsigned ScalarCost);
-
- IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
- ArrayRef<Type *> Tys, FastMathFlags Flags);
-
- IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
- ArrayRef<Type *> Tys, FastMathFlags Flags,
- unsigned ScalarCost);
-
- IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
- ArrayRef<Type *> Tys, FastMathFlags Flags,
- unsigned ScalarCost,
- const IntrinsicInst *I);
-
- IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
- ArrayRef<Type *> Tys);
+ IntrinsicCostAttributes(
+ Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
+ FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
+ InstructionCost ScalarCost = InstructionCost::getInvalid());
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<const Value *> Args);
+ IntrinsicCostAttributes(
+ Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
+ ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
+ const IntrinsicInst *I = nullptr,
+ InstructionCost ScalarCost = InstructionCost::getInvalid());
+
Intrinsic::ID getID() const { return IID; }
const IntrinsicInst *getInst() const { return II; }
Type *getReturnType() const { return RetTy; }
- ElementCount getVectorFactor() const { return VF; }
FastMathFlags getFlags() const { return FMF; }
- unsigned getScalarizationCost() const { return ScalarizationCost; }
+ InstructionCost getScalarizationCost() const { return ScalarizationCost; }
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
@@ -165,9 +156,7 @@ public:
return Arguments.empty();
}
- bool skipScalarizationCost() const {
- return ScalarizationCost != std::numeric_limits<unsigned>::max();
- }
+ bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
};
class TargetTransformInfo;
@@ -247,8 +236,6 @@ public:
Cost = getUserCost(I, kind);
break;
}
- if (Cost == -1)
- Cost.setInvalid();
return Cost;
}
@@ -277,9 +264,10 @@ public:
};
/// Estimate the cost of a GEP operation when lowered.
- int getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
- TargetCostKind CostKind = TCK_SizeAndLatency) const;
+ InstructionCost
+ getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind = TCK_SizeAndLatency) const;
/// \returns A value by which our inlining threshold should be multiplied.
/// This is primarily used to bump up the inlining threshold wholesale on
@@ -306,7 +294,7 @@ public:
/// \return the expected cost of a memcpy, which could e.g. depend on the
/// source/destination type and alignment and the number of bytes copied.
- int getMemcpyCost(const Instruction *I) const;
+ InstructionCost getMemcpyCost(const Instruction *I) const;
/// \return The estimated number of case clusters when lowering \p 'SI'.
/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
@@ -329,16 +317,20 @@ public:
///
/// The returned cost is defined in terms of \c TargetCostConstants, see its
/// comments for a detailed explanation of the cost values.
- int getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TargetCostKind CostKind) const;
+ InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind) const;
/// This is a helper function which calls the two-argument getUserCost
/// with \p Operands which are the current operands U has.
- int getUserCost(const User *U, TargetCostKind CostKind) const {
+ InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const {
SmallVector<const Value *, 4> Operands(U->operand_values());
return getUserCost(U, Operands, CostKind);
}
+ /// If a branch or a select condition is skewed in one direction by more than
+ /// this factor, it is very likely to be predicted correctly.
+ BranchProbability getPredictableBranchThreshold() const;
+
/// Return true if branch divergence exists.
///
/// Branch divergence has a significantly negative impact on GPU performance
@@ -638,13 +630,15 @@ public:
DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) const;
- /// \return True is LSR should make efforts to create/preserve post-inc
- /// addressing mode expressions.
- bool shouldFavorPostInc() const;
+ enum AddressingModeKind {
+ AMK_PreIndexed,
+ AMK_PostIndexed,
+ AMK_None
+ };
- /// Return true if LSR should make efforts to generate indexed addressing
- /// modes that operate across loop iterations.
- bool shouldFavorBackedgeIndex(const Loop *L) const;
+ /// Return the preferred addressing mode LSR should make efforts to generate.
+ AddressingModeKind getPreferredAddressingMode(const Loop *L,
+ ScalarEvolution *SE) const;
/// Return true if the target supports masked store.
bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
@@ -689,9 +683,10 @@ public:
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
/// TODO: Handle pre/postinc as well.
- int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace = 0) const;
+ InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale,
+ unsigned AddrSpace = 0) const;
/// Return true if the loop strength reduce pass should make
/// Instruction* based TTI queries to isLegalAddressingMode(). This is
@@ -714,7 +709,7 @@ public:
bool isTypeLegal(Type *Ty) const;
/// Returns the estimated number of registers required to represent \p Ty.
- unsigned getRegUsageForType(Type *Ty) const;
+ InstructionCost getRegUsageForType(Type *Ty) const;
/// Return true if switches should be turned into lookup tables for the
/// target.
@@ -724,6 +719,9 @@ public:
/// containing this constant value for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const;
+ /// Return true if lookup tables should be turned into relative lookup tables.
+ bool shouldBuildRelLookupTables() const;
+
/// Return true if the input function which is cold at all call sites,
/// should use coldcc calling convention.
bool useColdCCForColdCall(Function &F) const;
@@ -731,14 +729,15 @@ public:
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the demanded result elements need to be inserted and/or
/// extracted from vectors.
- unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
- bool Insert, bool Extract) const;
+ InstructionCost getScalarizationOverhead(VectorType *Ty,
+ const APInt &DemandedElts,
+ bool Insert, bool Extract) const;
/// Estimate the overhead of scalarizing an instructions unique
- /// non-constant operands. The types of the arguments are ordinarily
- /// scalar, in which case the costs are multiplied with VF.
- unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- unsigned VF) const;
+ /// non-constant operands. The (potentially vector) types to use for each of
+ /// argument are passes via Tys.
+ InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ ArrayRef<Type *> Tys) const;
/// If target has efficient vector element load/store instructions, it can
/// return true here so that insertion/extraction costs are not added to
@@ -798,7 +797,7 @@ public:
/// Determine if the target supports unaligned memory accesses.
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace = 0,
- unsigned Alignment = 1,
+ Align Alignment = Align(1),
bool *Fast = nullptr) const;
/// Return hardware support for population count.
@@ -815,20 +814,23 @@ public:
/// Return the expected cost of supporting the floating point operation
/// of the specified type.
- int getFPOpCost(Type *Ty) const;
+ InstructionCost getFPOpCost(Type *Ty) const;
/// Return the expected cost of materializing for the given integer
/// immediate of the specified type.
- int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const;
+ InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind) const;
/// Return the expected cost of materialization for the given integer
/// immediate of the specified type for a given instruction. The cost can be
/// zero if the immediate can be folded into the specified instruction.
- int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
- TargetCostKind CostKind,
- Instruction *Inst = nullptr) const;
- int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty, TargetCostKind CostKind) const;
+ InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind,
+ Instruction *Inst = nullptr) const;
+ InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind) const;
/// Return the expected cost for the given integer when optimising
/// for size. This is different than the other integer immediate cost
@@ -837,8 +839,8 @@ public:
/// with another such as Thumb. This return value is used as a penalty when
/// the total costs for a constant is calculated (the bigger the cost, the
/// more beneficial constant hoisting is).
- int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) const;
+ InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
+ const APInt &Imm, Type *Ty) const;
/// @}
/// \name Vector Target Information
@@ -856,44 +858,13 @@ public:
SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
///< with any shuffle mask.
- SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
+ SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
///< shuffle mask.
+ SK_Splice ///< Concatenates elements from the first input vector
+ ///< with elements of the second input vector. Returning
+ ///< a vector of the same type as the input vectors.
};
- /// Kind of the reduction data.
- enum ReductionKind {
- RK_None, /// Not a reduction.
- RK_Arithmetic, /// Binary reduction data.
- RK_MinMax, /// Min/max reduction data.
- RK_UnsignedMinMax, /// Unsigned min/max reduction data.
- };
-
- /// Contains opcode + LHS/RHS parts of the reduction operations.
- struct ReductionData {
- ReductionData() = delete;
- ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
- : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
- assert(Kind != RK_None && "expected binary or min/max reduction only.");
- }
- unsigned Opcode = 0;
- Value *LHS = nullptr;
- Value *RHS = nullptr;
- ReductionKind Kind = RK_None;
- bool hasSameData(ReductionData &RD) const {
- return Kind == RD.Kind && Opcode == RD.Opcode;
- }
- };
-
- static ReductionKind matchPairwiseReduction(
- const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
-
- static ReductionKind matchVectorSplittingReduction(
- const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
-
- static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot,
- unsigned &Opcode, VectorType *&Ty,
- bool &IsPairwise);
-
/// Additional information about an operand's possible values.
enum OperandValueKind {
OK_AnyValue, // Operand can have any value.
@@ -924,8 +895,10 @@ public:
/// \return the target-provided register class name
const char *getRegisterClassName(unsigned ClassID) const;
+ enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };
+
/// \return The width of the largest scalar or vector register type.
- unsigned getRegisterBitWidth(bool Vector) const;
+ TypeSize getRegisterBitWidth(RegisterKind K) const;
/// \return The width of the smallest vector register type.
unsigned getMinVectorRegisterBitWidth() const;
@@ -940,12 +913,13 @@ public:
/// creating vectors that span multiple vector registers.
/// If false, the vectorization factor will be chosen based on the
/// size of the widest element type.
- bool shouldMaximizeVectorBandwidth(bool OptSize) const;
+ bool shouldMaximizeVectorBandwidth() const;
/// \return The minimum vectorization factor for types of given element
/// bit width, or 0 if there is no minimum VF. The returned value only
/// applies when shouldMaximizeVectorBandwidth returns true.
- unsigned getMinimumVF(unsigned ElemWidth) const;
+ /// If IsScalable is true, the returned ElementCount must be a scalable VF.
+ ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
/// \return The maximum vectorization factor for types of given element
/// bit width and opcode, or 0 if there is no maximum VF.
@@ -1036,7 +1010,7 @@ public:
/// cases or optimizations based on those values.
/// \p CxtI is the optional original context instruction, if one exists, to
/// provide even more information.
- int getArithmeticInstrCost(
+ InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
OperandValueKind Opd1Info = OK_AnyValue,
@@ -1047,12 +1021,14 @@ public:
const Instruction *CxtI = nullptr) const;
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
+ /// The exact mask may be passed as Mask, or else the array will be empty.
/// The index and subtype parameters are used by the subvector insertion and
/// extraction shuffle kinds to show the insert/extract point and the type of
/// the subvector being inserted/extracted.
/// NOTE: For subvector extractions Tp represents the source type.
- int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0,
- VectorType *SubTp = nullptr) const;
+ InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
+ ArrayRef<int> Mask = None, int Index = 0,
+ VectorType *SubTp = nullptr) const;
/// Represents a hint about the context in which a cast is used.
///
@@ -1093,44 +1069,50 @@ public:
/// \return The expected cost of cast instructions, such as bitcast, trunc,
/// zext, etc. If there is an existing instruction that holds Opcode, it
/// may be passed in the 'I' parameter.
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::CastContextHint CCH,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- const Instruction *I = nullptr) const;
+ InstructionCost
+ getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ const Instruction *I = nullptr) const;
/// \return The expected cost of a sign- or zero-extended vector extract. Use
/// -1 to indicate that there is no information about the index value.
- int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
- unsigned Index = -1) const;
+ InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+ VectorType *VecTy,
+ unsigned Index = -1) const;
/// \return The expected cost of control-flow related instructions such as
- /// Phi, Ret, Br.
- int getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
+ /// Phi, Ret, Br, Switch.
+ InstructionCost
+ getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ const Instruction *I = nullptr) const;
/// \returns The expected cost of compare and select instructions. If there
/// is an existing instruction that holds Opcode, it may be passed in the
/// 'I' parameter. The \p VecPred parameter can be used to indicate the select
/// is using a compare with the specified predicate as condition. When vector
/// types are passed, \p VecPred must be used for all lanes.
- int getCmpSelInstrCost(
- unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
- CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
+ InstructionCost
+ getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
+ CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ const Instruction *I = nullptr) const;
/// \return The expected cost of vector Insert and Extract.
/// Use -1 to indicate that there is no information on the index value.
- int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index = -1) const;
/// \return The cost of Load and Store instructions.
- int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- const Instruction *I = nullptr) const;
+ InstructionCost
+ getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ const Instruction *I = nullptr) const;
/// \return The cost of masked Load and Store instructions.
- int getMaskedMemoryOpCost(
+ InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
@@ -1143,7 +1125,7 @@ public:
/// \p Alignment - alignment of single element
/// \p I - the optional original context instruction, if one exists, e.g. the
/// load/store to transform or the call to the gather/scatter intrinsic
- int getGatherScatterOpCost(
+ InstructionCost getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
@@ -1158,32 +1140,49 @@ public:
/// \p AddressSpace is address space of the pointer.
/// \p UseMaskForCond indicates if the memory access is predicated.
/// \p UseMaskForGaps indicates if gaps should be masked.
- int getInterleavedMemoryOpCost(
+ InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
- /// Calculate the cost of performing a vector reduction.
+ /// A helper function to determine the type of reduction algorithm used
+ /// for a given \p Opcode and set of FastMathFlags \p FMF.
+ static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) {
+ return FMF != None && !(*FMF).allowReassoc();
+ }
+
+ /// Calculate the cost of vector reduction intrinsics.
///
/// This is the cost of reducing the vector value of type \p Ty to a scalar
- /// value using the operation denoted by \p Opcode. The form of the reduction
- /// can either be a pairwise reduction or a reduction that splits the vector
- /// at every reduction level.
+ /// value using the operation denoted by \p Opcode. The FastMathFlags
+ /// parameter \p FMF indicates what type of reduction we are performing:
+ /// 1. Tree-wise. This is the typical 'fast' reduction performed that
+ /// involves successively splitting a vector into half and doing the
+ /// operation on the pair of halves until you have a scalar value. For
+ /// example:
+ /// (v0, v1, v2, v3)
+ /// ((v0+v2), (v1+v3), undef, undef)
+ /// ((v0+v2+v1+v3), undef, undef, undef)
+ /// This is the default behaviour for integer operations, whereas for
+ /// floating point we only do this if \p FMF indicates that
+ /// reassociation is allowed.
+ /// 2. Ordered. For a vector with N elements this involves performing N
+ /// operations in lane order, starting with an initial scalar value, i.e.
+ /// result = InitVal + v0
+ /// result = result + v1
+ /// result = result + v2
+ /// result = result + v3
+ /// This is only the case for FP operations and when reassociation is not
+ /// allowed.
///
- /// Pairwise:
- /// (v0, v1, v2, v3)
- /// ((v0+v1), (v2+v3), undef, undef)
- /// Split:
- /// (v0, v1, v2, v3)
- /// ((v0+v2), (v1+v3), undef, undef)
- int getArithmeticReductionCost(
- unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
-
- int getMinMaxReductionCost(
- VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+ InstructionCost getArithmeticReductionCost(
+ unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+
+ InstructionCost getMinMaxReductionCost(
+ VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// Calculate the cost of an extended reduction pattern, similar to
/// getArithmeticReductionCost of an Add reduction with an extension and
@@ -1198,12 +1197,13 @@ public:
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
/// Three cases are handled: 1. scalar instruction 2. vector instruction
/// 3. scalar instruction which is to be vectorized.
- int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind) const;
+ InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) const;
/// \returns The cost of Call instructions.
- int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
+ InstructionCost getCallInstrCost(
+ Function *F, Type *RetTy, ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
/// \returns The number of pieces into which the provided type must be
/// split during legalization. Zero is returned when the answer is unknown.
@@ -1216,15 +1216,16 @@ public:
/// The 'SE' parameter holds pointer for the scalar evolution object which
/// is used in order to get the Ptr step value in case of constant stride.
/// The 'Ptr' parameter holds SCEV of the access pointer.
- int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
- const SCEV *Ptr = nullptr) const;
+ InstructionCost getAddressComputationCost(Type *Ty,
+ ScalarEvolution *SE = nullptr,
+ const SCEV *Ptr = nullptr) const;
/// \returns The cost, if any, of keeping values of the given types alive
/// over a callsite.
///
/// Some types may require the use of register classes that do not have
/// any callee-saved registers, so would require a spill and fill.
- unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
+ InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
/// \returns True if the intrinsic is a supported memory intrinsic. Info
/// will contain additional information - whether the intrinsic may write
@@ -1305,6 +1306,13 @@ public:
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
+ /// \returns True if it is legal to vectorize the given reduction kind.
+ bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
+ ElementCount VF) const;
+
+ /// \returns True if the given type is supported for scalable vectors
+ bool isElementTypeLegalForScalableVector(Type *Ty) const;
+
/// \returns The new vector factor value if the target doesn't support \p
/// SizeInBytes loads or has a better vector factor.
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@@ -1325,11 +1333,6 @@ public:
bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
};
- /// \returns True if the target wants to handle the given reduction idiom in
- /// the intrinsics form instead of the shuffle form.
- bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
- ReductionFlags Flags) const;
-
/// \returns True if the target prefers reductions in loop.
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const;
@@ -1366,6 +1369,38 @@ public:
/// Intrinsics") Use of %evl is discouraged when that is not the case.
bool hasActiveVectorLength() const;
+ struct VPLegalization {
+ enum VPTransform {
+ // keep the predicating parameter
+ Legal = 0,
+ // where legal, discard the predicate parameter
+ Discard = 1,
+ // transform into something else that is also predicating
+ Convert = 2
+ };
+
+ // How to transform the EVL parameter.
+ // Legal: keep the EVL parameter as it is.
+ // Discard: Ignore the EVL parameter where it is safe to do so.
+ // Convert: Fold the EVL into the mask parameter.
+ VPTransform EVLParamStrategy;
+
+ // How to transform the operator.
+ // Legal: The target supports this operator.
+ // Convert: Convert this to a non-VP operation.
+ // The 'Discard' strategy is invalid.
+ VPTransform OpStrategy;
+
+ bool shouldDoNothing() const {
+ return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
+ }
+ VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
+ : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
+ };
+
+ /// \returns How the target needs this vector-predicated operation to be
+ /// transformed.
+ VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
/// @}
/// @}
@@ -1373,11 +1408,11 @@ public:
private:
/// Estimate the latency of specified instruction.
/// Returns 1 as the default value.
- int getInstructionLatency(const Instruction *I) const;
+ InstructionCost getInstructionLatency(const Instruction *I) const;
/// Returns the expected throughput cost of the instruction.
/// Returns -1 if the cost is unknown.
- int getInstructionThroughput(const Instruction *I) const;
+ InstructionCost getInstructionThroughput(const Instruction *I) const;
/// The abstract base class used to type erase specific TTI
/// implementations.
@@ -1394,19 +1429,21 @@ class TargetTransformInfo::Concept {
public:
virtual ~Concept() = 0;
virtual const DataLayout &getDataLayout() const = 0;
- virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind) = 0;
+ virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
virtual int getInlinerVectorBonusPercent() = 0;
- virtual int getMemcpyCost(const Instruction *I) = 0;
+ virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
virtual unsigned
getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) = 0;
- virtual int getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TargetCostKind CostKind) = 0;
+ virtual InstructionCost getUserCost(const User *U,
+ ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind) = 0;
+ virtual BranchProbability getPredictableBranchThreshold() = 0;
virtual bool hasBranchDivergence() = 0;
virtual bool useGPUDivergenceAnalysis() = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
@@ -1458,8 +1495,8 @@ public:
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) = 0;
- virtual bool shouldFavorPostInc() const = 0;
- virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
+ virtual AddressingModeKind
+ getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
@@ -1471,24 +1508,27 @@ public:
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;
- virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
- int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale, unsigned AddrSpace) = 0;
+ virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale,
+ unsigned AddrSpace) = 0;
virtual bool LSRWithInstrQueries() = 0;
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
virtual bool isProfitableToHoist(Instruction *I) = 0;
virtual bool useAA() = 0;
virtual bool isTypeLegal(Type *Ty) = 0;
- virtual unsigned getRegUsageForType(Type *Ty) = 0;
+ virtual InstructionCost getRegUsageForType(Type *Ty) = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
+ virtual bool shouldBuildRelLookupTables() = 0;
virtual bool useColdCCForColdCall(Function &F) = 0;
- virtual unsigned getScalarizationOverhead(VectorType *Ty,
- const APInt &DemandedElts,
- bool Insert, bool Extract) = 0;
- virtual unsigned
+ virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
+ const APInt &DemandedElts,
+ bool Insert,
+ bool Extract) = 0;
+ virtual InstructionCost
getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- unsigned VF) = 0;
+ ArrayRef<Type *> Tys) = 0;
virtual bool supportsEfficientVectorElementLoadStore() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual MemCmpExpansionOptions
@@ -1499,31 +1539,33 @@ public:
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
unsigned AddressSpace,
- unsigned Alignment,
+ Align Alignment,
bool *Fast) = 0;
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
- virtual int getFPOpCost(Type *Ty) = 0;
- virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
- const APInt &Imm, Type *Ty) = 0;
- virtual int getIntImmCost(const APInt &Imm, Type *Ty,
- TargetCostKind CostKind) = 0;
- virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty, TargetCostKind CostKind,
- Instruction *Inst = nullptr) = 0;
- virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty,
- TargetCostKind CostKind) = 0;
+ virtual InstructionCost getFPOpCost(Type *Ty) = 0;
+ virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
+ const APInt &Imm, Type *Ty) = 0;
+ virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind) = 0;
+ virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind,
+ Instruction *Inst = nullptr) = 0;
+ virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind) = 0;
virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
virtual unsigned getRegisterClassForType(bool Vector,
Type *Ty = nullptr) const = 0;
virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
- virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
- virtual unsigned getMinVectorRegisterBitWidth() = 0;
+ virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
+ virtual unsigned getMinVectorRegisterBitWidth() const = 0;
virtual Optional<unsigned> getMaxVScale() const = 0;
- virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
- virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
+ virtual bool shouldMaximizeVectorBandwidth() const = 0;
+ virtual ElementCount getMinimumVF(unsigned ElemWidth,
+ bool IsScalable) const = 0;
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
virtual bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
@@ -1556,64 +1598,71 @@ public:
virtual bool enableWritePrefetching() const = 0;
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
- virtual unsigned getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind,
- OperandValueKind Opd1Info,
- OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
- const Instruction *CxtI = nullptr) = 0;
- virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
- VectorType *SubTp) = 0;
- virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- CastContextHint CCH,
- TTI::TargetCostKind CostKind,
- const Instruction *I) = 0;
- virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
- VectorType *VecTy, unsigned Index) = 0;
- virtual int getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind) = 0;
- virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- CmpInst::Predicate VecPred,
- TTI::TargetCostKind CostKind,
- const Instruction *I) = 0;
- virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) = 0;
- virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- const Instruction *I) = 0;
- virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind) = 0;
- virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
- const Value *Ptr, bool VariableMask,
- Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr) = 0;
+ virtual InstructionCost getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ OperandValueKind Opd1Info, OperandValueKind Opd2Info,
+ OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
+ virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
+ ArrayRef<int> Mask, int Index,
+ VectorType *SubTp) = 0;
+ virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src, CastContextHint CCH,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) = 0;
+ virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+ VectorType *VecTy,
+ unsigned Index) = 0;
+ virtual InstructionCost getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) = 0;
+ virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy,
+ CmpInst::Predicate VecPred,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) = 0;
+ virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) = 0;
+ virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) = 0;
+ virtual InstructionCost
+ getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) = 0;
+ virtual InstructionCost
+ getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+ bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) = 0;
- virtual int getInterleavedMemoryOpCost(
+ virtual InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
- virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
- bool IsPairwiseForm,
- TTI::TargetCostKind CostKind) = 0;
- virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsPairwiseForm, bool IsUnsigned,
- TTI::TargetCostKind CostKind) = 0;
+ virtual InstructionCost
+ getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ Optional<FastMathFlags> FMF,
+ TTI::TargetCostKind CostKind) = 0;
+ virtual InstructionCost
+ getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
+ TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost getExtendedAddReductionCost(
bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
- virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind) = 0;
- virtual int getCallInstrCost(Function *F, Type *RetTy,
- ArrayRef<Type *> Tys,
- TTI::TargetCostKind CostKind) = 0;
+ virtual InstructionCost
+ getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) = 0;
+ virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) = 0;
virtual unsigned getNumberOfParts(Type *Tp) = 0;
- virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
- const SCEV *Ptr) = 0;
- virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
+ virtual InstructionCost
+ getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
+ virtual InstructionCost
+ getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) = 0;
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
@@ -1644,14 +1693,15 @@ public:
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
Align Alignment,
unsigned AddrSpace) const = 0;
+ virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
+ ElementCount VF) const = 0;
+ virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;
- virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
- ReductionFlags) const = 0;
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
@@ -1660,7 +1710,9 @@ public:
virtual unsigned getGISelRematGlobalCost() const = 0;
virtual bool supportsScalableVectors() const = 0;
virtual bool hasActiveVectorLength() const = 0;
- virtual int getInstructionLatency(const Instruction *I) = 0;
+ virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
+ virtual VPLegalization
+ getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
};
template <typename T>
@@ -1675,9 +1727,10 @@ public:
return Impl.getDataLayout();
}
- int getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
- enum TargetTransformInfo::TargetCostKind CostKind) override {
+ InstructionCost
+ getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands,
+ enum TargetTransformInfo::TargetCostKind CostKind) override {
return Impl.getGEPCost(PointeeType, Ptr, Operands);
}
unsigned getInliningThresholdMultiplier() override {
@@ -1689,13 +1742,16 @@ public:
int getInlinerVectorBonusPercent() override {
return Impl.getInlinerVectorBonusPercent();
}
- int getMemcpyCost(const Instruction *I) override {
+ InstructionCost getMemcpyCost(const Instruction *I) override {
return Impl.getMemcpyCost(I);
}
- int getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TargetCostKind CostKind) override {
+ InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind) override {
return Impl.getUserCost(U, Operands, CostKind);
}
+ BranchProbability getPredictableBranchThreshold() override {
+ return Impl.getPredictableBranchThreshold();
+ }
bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
bool useGPUDivergenceAnalysis() override {
return Impl.useGPUDivergenceAnalysis();
@@ -1801,9 +1857,10 @@ public:
TargetLibraryInfo *LibInfo) override {
return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
}
- bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); }
- bool shouldFavorBackedgeIndex(const Loop *L) const override {
- return Impl.shouldFavorBackedgeIndex(L);
+ AddressingModeKind
+ getPreferredAddressingMode(const Loop *L,
+ ScalarEvolution *SE) const override {
+ return Impl.getPreferredAddressingMode(L, SE);
}
bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedStore(DataType, Alignment);
@@ -1838,9 +1895,10 @@ public:
bool prefersVectorizedAddressing() override {
return Impl.prefersVectorizedAddressing();
}
- int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace) override {
+ InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale,
+ unsigned AddrSpace) override {
return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
AddrSpace);
}
@@ -1853,7 +1911,7 @@ public:
}
bool useAA() override { return Impl.useAA(); }
bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
- unsigned getRegUsageForType(Type *Ty) override {
+ InstructionCost getRegUsageForType(Type *Ty) override {
return Impl.getRegUsageForType(Ty);
}
bool shouldBuildLookupTables() override {
@@ -1862,17 +1920,22 @@ public:
bool shouldBuildLookupTablesForConstant(Constant *C) override {
return Impl.shouldBuildLookupTablesForConstant(C);
}
+ bool shouldBuildRelLookupTables() override {
+ return Impl.shouldBuildRelLookupTables();
+ }
bool useColdCCForColdCall(Function &F) override {
return Impl.useColdCCForColdCall(F);
}
- unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
- bool Insert, bool Extract) override {
+ InstructionCost getScalarizationOverhead(VectorType *Ty,
+ const APInt &DemandedElts,
+ bool Insert, bool Extract) override {
return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
}
- unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- unsigned VF) override {
- return Impl.getOperandsScalarizationOverhead(Args, VF);
+ InstructionCost
+ getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ ArrayRef<Type *> Tys) override {
+ return Impl.getOperandsScalarizationOverhead(Args, Tys);
}
bool supportsEfficientVectorElementLoadStore() override {
@@ -1896,7 +1959,7 @@ public:
return Impl.isFPVectorizationPotentiallyUnsafe();
}
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
- unsigned AddressSpace, unsigned Alignment,
+ unsigned AddressSpace, Align Alignment,
bool *Fast) override {
return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
Alignment, Fast);
@@ -1910,23 +1973,27 @@ public:
return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
}
- int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
+ InstructionCost getFPOpCost(Type *Ty) override {
+ return Impl.getFPOpCost(Ty);
+ }
- int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) override {
+ InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
+ const APInt &Imm, Type *Ty) override {
return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
}
- int getIntImmCost(const APInt &Imm, Type *Ty,
- TargetCostKind CostKind) override {
+ InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind) override {
return Impl.getIntImmCost(Imm, Ty, CostKind);
}
- int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
- TargetCostKind CostKind,
- Instruction *Inst = nullptr) override {
+ InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind,
+ Instruction *Inst = nullptr) override {
return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
}
- int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty, TargetCostKind CostKind) override {
+ InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind) override {
return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
}
unsigned getNumberOfRegisters(unsigned ClassID) const override {
@@ -1939,20 +2006,21 @@ public:
const char *getRegisterClassName(unsigned ClassID) const override {
return Impl.getRegisterClassName(ClassID);
}
- unsigned getRegisterBitWidth(bool Vector) const override {
- return Impl.getRegisterBitWidth(Vector);
+ TypeSize getRegisterBitWidth(RegisterKind K) const override {
+ return Impl.getRegisterBitWidth(K);
}
- unsigned getMinVectorRegisterBitWidth() override {
+ unsigned getMinVectorRegisterBitWidth() const override {
return Impl.getMinVectorRegisterBitWidth();
}
Optional<unsigned> getMaxVScale() const override {
return Impl.getMaxVScale();
}
- bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
- return Impl.shouldMaximizeVectorBandwidth(OptSize);
+ bool shouldMaximizeVectorBandwidth() const override {
+ return Impl.shouldMaximizeVectorBandwidth();
}
- unsigned getMinimumVF(unsigned ElemWidth) const override {
- return Impl.getMinimumVF(ElemWidth);
+ ElementCount getMinimumVF(unsigned ElemWidth,
+ bool IsScalable) const override {
+ return Impl.getMinimumVF(ElemWidth, IsScalable);
}
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
return Impl.getMaximumVF(ElemWidth, Opcode);
@@ -2008,82 +2076,84 @@ public:
BlockFrequencyInfo *BFI) override {
return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
}
- unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind,
- OperandValueKind Opd1Info,
- OperandValueKind Opd2Info,
- OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args,
- const Instruction *CxtI = nullptr) override {
+ InstructionCost getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ OperandValueKind Opd1Info, OperandValueKind Opd2Info,
+ OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI = nullptr) override {
return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
- int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
- VectorType *SubTp) override {
- return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
+ InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
+ ArrayRef<int> Mask, int Index,
+ VectorType *SubTp) override {
+ return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
}
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- CastContextHint CCH, TTI::TargetCostKind CostKind,
- const Instruction *I) override {
+ InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ CastContextHint CCH,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) override {
return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
}
- int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
- unsigned Index) override {
+ InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+ VectorType *VecTy,
+ unsigned Index) override {
return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
}
- int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override {
- return Impl.getCFInstrCost(Opcode, CostKind);
+ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) override {
+ return Impl.getCFInstrCost(Opcode, CostKind, I);
}
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- CmpInst::Predicate VecPred,
- TTI::TargetCostKind CostKind,
- const Instruction *I) override {
+ InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) override {
return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
- int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
- int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace, TTI::TargetCostKind CostKind,
- const Instruction *I) override {
+ InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) override {
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind, I);
}
- int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind) override {
+ InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) override {
return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
}
- int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
- bool VariableMask, Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr) override {
+ InstructionCost
+ getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+ bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) override {
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
}
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- bool UseMaskForCond,
- bool UseMaskForGaps) override {
+ InstructionCost getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) override {
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
}
- int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
- bool IsPairwiseForm,
- TTI::TargetCostKind CostKind) override {
- return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
- CostKind);
- }
- int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsPairwiseForm, bool IsUnsigned,
+ InstructionCost
+ getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) override {
- return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
- CostKind);
+ return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
+ }
+ InstructionCost
+ getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
+ TTI::TargetCostKind CostKind) override {
+ return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
}
InstructionCost getExtendedAddReductionCost(
bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
@@ -2091,23 +2161,23 @@ public:
return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
CostKind);
}
- int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind) override {
+ InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) override {
return Impl.getIntrinsicInstrCost(ICA, CostKind);
}
- int getCallInstrCost(Function *F, Type *RetTy,
- ArrayRef<Type *> Tys,
- TTI::TargetCostKind CostKind) override {
+ InstructionCost getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) override {
return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
}
unsigned getNumberOfParts(Type *Tp) override {
return Impl.getNumberOfParts(Tp);
}
- int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
- const SCEV *Ptr) override {
+ InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) override {
return Impl.getAddressComputationCost(Ty, SE, Ptr);
}
- unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
+ InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
return Impl.getCostOfKeepingLiveOverCall(Tys);
}
bool getTgtMemIntrinsic(IntrinsicInst *Inst,
@@ -2170,6 +2240,13 @@ public:
return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
AddrSpace);
}
+ bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
+ ElementCount VF) const override {
+ return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
+ }
+ bool isElementTypeLegalForScalableVector(Type *Ty) const override {
+ return Impl.isElementTypeLegalForScalableVector(Ty);
+ }
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const override {
@@ -2180,10 +2257,6 @@ public:
VectorType *VecTy) const override {
return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
}
- bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
- ReductionFlags Flags) const override {
- return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
- }
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const override {
return Impl.preferInLoopReduction(Opcode, Ty, Flags);
@@ -2208,9 +2281,14 @@ public:
return Impl.hasActiveVectorLength();
}
- int getInstructionLatency(const Instruction *I) override {
+ InstructionCost getInstructionLatency(const Instruction *I) override {
return Impl.getInstructionLatency(I);
}
+
+ VPLegalization
+ getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
+ return Impl.getVPLegalizationStrategy(PI);
+ }
};
template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 7e31cb365a87..c07a33c9f155 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -22,8 +22,11 @@
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
+using namespace llvm::PatternMatch;
+
namespace llvm {
/// Base class for use as a mix-in that aids implementing
@@ -44,9 +47,10 @@ public:
const DataLayout &getDataLayout() const { return DL; }
- int getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const {
+ InstructionCost
+ getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const {
// In the basic model, we just assume that all-constant GEPs will be folded
// into their uses via addressing modes.
for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@@ -71,10 +75,20 @@ public:
int getInlinerVectorBonusPercent() const { return 150; }
- unsigned getMemcpyCost(const Instruction *I) const {
+ InstructionCost getMemcpyCost(const Instruction *I) const {
return TTI::TCC_Expensive;
}
+ // Although this default value is arbitrary, it is not random. It is assumed
+ // that a condition that evaluates the same way by a higher percentage than
+ // this is best represented as control flow. Therefore, the default value N
+ // should be set such that the win from N% correct executions is greater than
+ // the loss from (100 - N)% mispredicted executions for the majority of
+ // intended targets.
+ BranchProbability getPredictableBranchThreshold() const {
+ return BranchProbability(99, 100);
+ }
+
bool hasBranchDivergence() const { return false; }
bool useGPUDivergenceAnalysis() const { return false; }
@@ -209,9 +223,10 @@ public:
return false;
}
- bool shouldFavorPostInc() const { return false; }
-
- bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
+ TTI::AddressingModeKind
+ getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const {
+ return TTI::AMK_None;
+ }
bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
return false;
@@ -255,9 +270,10 @@ public:
bool prefersVectorizedAddressing() const { return true; }
- int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace) const {
+ InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale,
+ unsigned AddrSpace) const {
// Guess that all legal addressing mode are free.
if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
AddrSpace))
@@ -275,20 +291,24 @@ public:
bool isTypeLegal(Type *Ty) const { return false; }
- unsigned getRegUsageForType(Type *Ty) const { return 1; }
+ InstructionCost getRegUsageForType(Type *Ty) const { return 1; }
bool shouldBuildLookupTables() const { return true; }
+
bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
+ bool shouldBuildRelLookupTables() const { return false; }
+
bool useColdCCForColdCall(Function &F) const { return false; }
- unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
- bool Insert, bool Extract) const {
+ InstructionCost getScalarizationOverhead(VectorType *Ty,
+ const APInt &DemandedElts,
+ bool Insert, bool Extract) const {
return 0;
}
- unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- unsigned VF) const {
+ InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ ArrayRef<Type *> Tys) const {
return 0;
}
@@ -310,7 +330,7 @@ public:
bool isFPVectorizationPotentiallyUnsafe() const { return false; }
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
- unsigned AddressSpace, unsigned Alignment,
+ unsigned AddressSpace, Align Alignment,
bool *Fast) const {
return false;
}
@@ -323,29 +343,30 @@ public:
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
- unsigned getFPOpCost(Type *Ty) const {
+ InstructionCost getFPOpCost(Type *Ty) const {
return TargetTransformInfo::TCC_Basic;
}
- int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) const {
+ InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
return 0;
}
- unsigned getIntImmCost(const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) const {
+ InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) const {
return TTI::TCC_Basic;
}
- unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind,
- Instruction *Inst = nullptr) const {
+ InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr) const {
return TTI::TCC_Free;
}
- unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) const {
+ InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) const {
return TTI::TCC_Free;
}
@@ -366,15 +387,19 @@ public:
}
}
- unsigned getRegisterBitWidth(bool Vector) const { return 32; }
+ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
+ return TypeSize::getFixed(32);
+ }
unsigned getMinVectorRegisterBitWidth() const { return 128; }
Optional<unsigned> getMaxVScale() const { return None; }
- bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
+ bool shouldMaximizeVectorBandwidth() const { return false; }
- unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
+ ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
+ return ElementCount::get(0, IsScalable);
+ }
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
@@ -420,14 +445,12 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
- unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind,
- TTI::OperandValueKind Opd1Info,
- TTI::OperandValueKind Opd2Info,
- TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args,
- const Instruction *CxtI = nullptr) const {
+ InstructionCost getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+ const Instruction *CxtI = nullptr) const {
// FIXME: A number of transformation tests seem to require these values
// which seems a little odd for how arbitary there are.
switch (Opcode) {
@@ -445,15 +468,16 @@ public:
return 1;
}
- unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index,
- VectorType *SubTp) const {
+ InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
+ ArrayRef<int> Mask, int Index,
+ VectorType *SubTp) const {
return 1;
}
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::CastContextHint CCH,
- TTI::TargetCostKind CostKind,
- const Instruction *I) const {
+ InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) const {
switch (Opcode) {
default:
break;
@@ -488,12 +512,14 @@ public:
return 1;
}
- unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
- VectorType *VecTy, unsigned Index) const {
+ InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+ VectorType *VecTy,
+ unsigned Index) const {
return 1;
}
- unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const {
+ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) const {
// A phi would be free, unless we're costing the throughput because it
// will require a register.
if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
@@ -501,34 +527,36 @@ public:
return 1;
}
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- CmpInst::Predicate VecPred,
- TTI::TargetCostKind CostKind,
- const Instruction *I) const {
+ InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) const {
return 1;
}
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const {
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
return 1;
}
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace, TTI::TargetCostKind CostKind,
- const Instruction *I) const {
+ InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) const {
return 1;
}
- unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind) const {
+ InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) const {
return 1;
}
- unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
- const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr) const {
+ InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ const Value *Ptr, bool VariableMask,
+ Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) const {
return 1;
}
@@ -539,8 +567,8 @@ public:
return 1;
}
- unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind) const {
+ InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) const {
switch (ICA.getID()) {
default:
break;
@@ -548,6 +576,7 @@ public:
case Intrinsic::assume:
case Intrinsic::sideeffect:
case Intrinsic::pseudoprobe:
+ case Intrinsic::arithmetic_fence:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::dbg_label:
@@ -579,25 +608,27 @@ public:
return 1;
}
- unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
- TTI::TargetCostKind CostKind) const {
+ InstructionCost getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) const {
return 1;
}
unsigned getNumberOfParts(Type *Tp) const { return 0; }
- unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
- const SCEV *) const {
+ InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
+ const SCEV *) const {
return 0;
}
- unsigned getArithmeticReductionCost(unsigned, VectorType *, bool,
- TTI::TargetCostKind) const {
+ InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
+ Optional<FastMathFlags> FMF,
+ TTI::TargetCostKind) const {
return 1;
}
- unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool,
- TTI::TargetCostKind) const {
+ InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool,
+ TTI::TargetCostKind) const {
return 1;
}
@@ -607,7 +638,7 @@ public:
return 1;
}
- unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
+ InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
return 0;
}
@@ -686,6 +717,13 @@ public:
return true;
}
+ bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
+ ElementCount VF) const {
+ return true;
+ }
+
+ bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
+
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const {
@@ -698,11 +736,6 @@ public:
return VF;
}
- bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
- TTI::ReductionFlags Flags) const {
- return false;
- }
-
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const {
return false;
@@ -721,6 +754,13 @@ public:
bool hasActiveVectorLength() const { return false; }
+ TargetTransformInfo::VPLegalization
+ getVPLegalizationStrategy(const VPIntrinsic &PI) const {
+ return TargetTransformInfo::VPLegalization(
+ /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
+ /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
@@ -816,13 +856,13 @@ protected:
public:
using BaseT::getGEPCost;
- int getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
+ InstructionCost
+ getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
- // TODO: will remove this when pointers have an opaque type.
- assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
- PointeeType &&
+ assert(cast<PointerType>(Ptr->getType()->getScalarType())
+ ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
"explicit pointee type doesn't match operand's pointee type");
auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
bool HasBaseReg = (BaseGV == nullptr);
@@ -880,8 +920,8 @@ public:
return TTI::TCC_Basic;
}
- int getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind) {
+ InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
auto *TargetTTI = static_cast<T *>(this);
// Handle non-intrinsic calls, invokes, and callbr.
// FIXME: Unlikely to be true for anything but CodeSize.
@@ -914,7 +954,8 @@ public:
case Instruction::Br:
case Instruction::Ret:
case Instruction::PHI:
- return TargetTTI->getCFInstrCost(Opcode, CostKind);
+ case Instruction::Switch:
+ return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
case Instruction::ExtractValue:
case Instruction::Freeze:
return TTI::TCC_Free;
@@ -987,6 +1028,23 @@ public:
CostKind, I);
}
case Instruction::Select: {
+ const Value *Op0, *Op1;
+ if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
+ match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
+ // select x, y, false --> x & y
+ // select x, true, y --> x | y
+ TTI::OperandValueProperties Op1VP = TTI::OP_None;
+ TTI::OperandValueProperties Op2VP = TTI::OP_None;
+ TTI::OperandValueKind Op1VK = TTI::getOperandInfo(Op0, Op1VP);
+ TTI::OperandValueKind Op2VK = TTI::getOperandInfo(Op1, Op2VP);
+ assert(Op0->getType()->getScalarSizeInBits() == 1 &&
+ Op1->getType()->getScalarSizeInBits() == 1);
+
+ SmallVector<const Value *, 2> Operands{Op0, Op1};
+ return TargetTTI->getArithmeticInstrCost(
+ match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
+ CostKind, Op1VK, Op2VK, Op1VP, Op2VP, Operands, I);
+ }
Type *CondTy = U->getOperand(0)->getType();
return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
CmpInst::BAD_ICMP_PREDICATE,
@@ -1020,25 +1078,30 @@ public:
int SubIndex;
if (Shuffle->isExtractSubvectorMask(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
- SubIndex, VecTy);
+ Shuffle->getShuffleMask(), SubIndex,
+ VecTy);
else if (Shuffle->changesLength())
return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
else if (Shuffle->isIdentity())
return 0;
else if (Shuffle->isReverse())
- return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 0, nullptr);
+ return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
+ Shuffle->getShuffleMask(), 0, nullptr);
else if (Shuffle->isSelect())
- return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 0, nullptr);
+ return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
+ Shuffle->getShuffleMask(), 0, nullptr);
else if (Shuffle->isTranspose())
- return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 0, nullptr);
+ return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
+ Shuffle->getShuffleMask(), 0, nullptr);
else if (Shuffle->isZeroEltSplat())
- return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 0, nullptr);
+ return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
+ Shuffle->getShuffleMask(), 0, nullptr);
else if (Shuffle->isSingleSource())
- return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 0,
- nullptr);
+ return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
+ Shuffle->getShuffleMask(), 0, nullptr);
- return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 0,
- nullptr);
+ return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
+ Shuffle->getShuffleMask(), 0, nullptr);
}
case Instruction::ExtractElement: {
unsigned Idx = -1;
@@ -1050,26 +1113,6 @@ public:
if (CI)
Idx = CI->getZExtValue();
- // Try to match a reduction (a series of shufflevector and vector ops
- // followed by an extractelement).
- unsigned RdxOpcode;
- VectorType *RdxType;
- bool IsPairwise;
- switch (TTI::matchVectorReduction(EEI, RdxOpcode, RdxType, IsPairwise)) {
- case TTI::RK_Arithmetic:
- return TargetTTI->getArithmeticReductionCost(RdxOpcode, RdxType,
- IsPairwise, CostKind);
- case TTI::RK_MinMax:
- return TargetTTI->getMinMaxReductionCost(
- RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)),
- IsPairwise, /*IsUnsigned=*/false, CostKind);
- case TTI::RK_UnsignedMinMax:
- return TargetTTI->getMinMaxReductionCost(
- RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)),
- IsPairwise, /*IsUnsigned=*/true, CostKind);
- case TTI::RK_None:
- break;
- }
return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(),
Idx);
}
@@ -1078,7 +1121,7 @@ public:
return TTI::TCC_Basic;
}
- int getInstructionLatency(const Instruction *I) {
+ InstructionCost getInstructionLatency(const Instruction *I) {
SmallVector<const Value *, 4> Operands(I->operand_values());
if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free)
return 0;
diff --git a/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h b/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h
index d02bcd0e335b..45ef4dbe2155 100644
--- a/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h
+++ b/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h
@@ -9,13 +9,13 @@
// ThinLTO.
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
-#define LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
+#ifndef LLVM_ANALYSIS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
+#define LLVM_ANALYSIS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include <string>
+#include <memory>
#include <vector>
namespace llvm {
@@ -109,4 +109,4 @@ enum class InlinerFunctionImportStatsOpts {
} // llvm
-#endif // LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
+#endif // LLVM_ANALYSIS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
diff --git a/llvm/include/llvm/Analysis/Utils/Local.h b/llvm/include/llvm/Analysis/Utils/Local.h
index bd82b34165d6..031938c6f9c7 100644
--- a/llvm/include/llvm/Analysis/Utils/Local.h
+++ b/llvm/include/llvm/Analysis/Utils/Local.h
@@ -100,4 +100,4 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
}
-#endif // LLVM_TRANSFORMS_UTILS_LOCAL_H
+#endif // LLVM_ANALYSIS_UTILS_LOCAL_H
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
index ea6bc2cf19ee..47ee23e06000 100644
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -12,6 +12,7 @@
#include "llvm/Config/llvm-config.h"
#ifdef LLVM_HAVE_TF_API
+#include "llvm/ADT/StringMap.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/JSON.h"
@@ -120,56 +121,62 @@ loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
/// The assumption is that, for an event to be logged (i.e. a set of feature
/// values and a reward), the user calls the log* API for each feature exactly
/// once, providing the index matching the position in the feature spec list
-/// provided at construction:
+/// provided at construction. The example assumes the first feature's element
+/// type is float, the second is int64, and the reward is float:
+///
/// event 0:
-/// logTensorValue(0, ...)
-/// logTensorValue(1, ...)
+/// logFloatValue(0, ...)
+/// logInt64Value(1, ...)
/// ...
-/// logReward(...)
+/// logFloatReward(...)
/// event 1:
-/// logTensorValue(0, ...)
-/// logTensorValue(1, ...)
+/// logFloatValue(0, ...)
+/// logInt64Value(1, ...)
/// ...
-/// logReward(...)
+/// logFloatReward(...)
///
/// At the end, call print to generate the protobuf.
+/// Alternatively, don't call logReward at the end of each event, just
+/// log{Float|Int32|Int64}FinalReward at the end.
+class LoggerDataImpl;
class Logger final {
public:
- /// Construct a Logger. If IncludeReward is false, then logReward shouldn't
- /// be called, and the reward feature won't be printed out.
+ /// Construct a Logger. If IncludeReward is false, then logReward or
+ /// logFinalReward shouldn't be called, and the reward feature won't be
+ /// printed out.
Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
- const TensorSpec &RewardSpec, bool IncludeReward)
- : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
- RawLogData(FeatureSpecs.size() + IncludeReward),
- IncludeReward(IncludeReward) {}
-
- template <typename T> void logReward(T Value) {
- assert(IncludeReward);
- logTensorValue(RawLogData.size() - 1, &Value);
- }
+ const TensorSpec &RewardSpec, bool IncludeReward);
- template <typename T> void logFinalReward(T Value) {
- assert(RawLogData.back().empty());
- logReward(Value);
- }
+ ~Logger();
- template <typename T>
- void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) {
- const char *Start = reinterpret_cast<const char *>(Value);
- const char *End = Start + sizeof(T) * Size;
- RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End);
- }
+ void logFloatReward(float Value);
+ void logInt32Reward(int32_t Value);
+ void logInt64Reward(int64_t Value);
+
+ void logFloatFinalReward(float Value);
+ void logInt32FinalReward(int32_t Value);
+ void logInt64FinalReward(int64_t Value);
+
+ void logFloatValue(size_t FeatureID, const float *Value);
+ void logInt32Value(size_t FeatureID, const int32_t *Value);
+ void logInt64Value(size_t FeatureID, const int64_t *Value);
+
+ void logSpecifiedTensorValue(size_t FeatureID, const char *RawData);
+
+ // Warning! For int32_t, the return is set up for int64_t, so the caller needs
+ // to piecemeal cast their int32_t values.
+ // FIXME: let's drop int32_t support. While it's supported by evaluator, it's
+ // not supported by the tensorflow::SequenceExample proto. For small values,
+ // we can consider using bytes.
+ char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID);
void print(raw_ostream &OS);
private:
std::vector<LoggedFeatureSpec> FeatureSpecs;
TensorSpec RewardSpec;
- /// RawData has one entry per feature, plus one more for the reward.
- /// Each feature's values are then stored in a vector, in succession.
- /// This means the ith event is stored at [*][i]
- std::vector<std::vector<char>> RawLogData;
const bool IncludeReward;
+ std::unique_ptr<LoggerDataImpl> LoggerData;
};
class TFModelEvaluator final {
diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h
index 108d08033ac3..1b32fca50697 100644
--- a/llvm/include/llvm/Analysis/ValueLattice.h
+++ b/llvm/include/llvm/Analysis/ValueLattice.h
@@ -17,13 +17,13 @@
// ValueLatticeElement
//===----------------------------------------------------------------------===//
+namespace llvm {
+
/// This class represents lattice values for constants.
///
/// FIXME: This is basically just for bringup, this can be made a lot more rich
/// in the future.
///
-
-namespace llvm {
class ValueLatticeElement {
enum ValueLatticeElementTy {
/// This Value has no known value yet. As a result, this implies the
@@ -474,11 +474,9 @@ public:
const auto &CR = getConstantRange();
const auto &OtherCR = Other.getConstantRange();
- if (ConstantRange::makeSatisfyingICmpRegion(Pred, OtherCR).contains(CR))
+ if (CR.icmp(Pred, OtherCR))
return ConstantInt::getTrue(Ty);
- if (ConstantRange::makeSatisfyingICmpRegion(
- CmpInst::getInversePredicate(Pred), OtherCR)
- .contains(CR))
+ if (CR.icmp(CmpInst::getInversePredicate(Pred), OtherCR))
return ConstantInt::getFalse(Ty);
return nullptr;
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 86c0991451c5..90ec742f18e6 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -370,10 +370,11 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// that the returned value has pointer type if the specified value does. If
/// the MaxLookup value is non-zero, it limits the number of instructions to
/// be stripped off.
- Value *getUnderlyingObject(Value *V, unsigned MaxLookup = 6);
- inline const Value *getUnderlyingObject(const Value *V,
- unsigned MaxLookup = 6) {
- return getUnderlyingObject(const_cast<Value *>(V), MaxLookup);
+ const Value *getUnderlyingObject(const Value *V, unsigned MaxLookup = 6);
+ inline Value *getUnderlyingObject(Value *V, unsigned MaxLookup = 6) {
+ // Force const to avoid infinite recursion.
+ const Value *VConst = V;
+ return const_cast<Value *>(getUnderlyingObject(VConst, MaxLookup));
}
/// This method is similar to getUnderlyingObject except that it can
@@ -460,7 +461,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// for such instructions, moving them may change the resulting value.
bool isSafeToSpeculativelyExecute(const Value *V,
const Instruction *CtxI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ const TargetLibraryInfo *TLI = nullptr);
/// Returns true if the result or effects of the given instructions \p I
/// depend on or influence global memory.
@@ -582,6 +584,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// poison.
/// Formally, given I = `r = op v1 v2 .. vN`, propagatesPoison returns true
/// if, for all i, r is evaluated to poison or op raises UB if vi = poison.
+ /// If vi is a vector or an aggregate and r is a single value, any poison
+ /// element in vi should make r poison or raise UB.
/// To filter out operands that raise UB on poison, you can use
/// getGuaranteedNonPoisonOp.
bool propagatesPoison(const Operator *I);
@@ -590,6 +594,11 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// if I is executed and that operand has a poison value.
void getGuaranteedNonPoisonOps(const Instruction *I,
SmallPtrSetImpl<const Value *> &Ops);
+ /// Insert operands of I into Ops such that I will trigger undefined behavior
+ /// if I is executed and that operand is not a well-defined value
+ /// (i.e. has undef bits or poison).
+ void getGuaranteedWellDefinedOps(const Instruction *I,
+ SmallPtrSetImpl<const Value *> &Ops);
/// Return true if the given instruction must trigger undefined behavior
/// when I is executed with any operands which appear in KnownPoison holding
@@ -729,6 +738,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// For example, signed minimum is the inverse of signed maximum.
SelectPatternFlavor getInverseMinMaxFlavor(SelectPatternFlavor SPF);
+ Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID);
+
/// Return the canonical inverse comparison predicate for the specified
/// minimum/maximum flavor.
CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF);
@@ -741,6 +752,37 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
std::pair<Intrinsic::ID, bool>
canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL);
+ /// Attempt to match a simple first order recurrence cycle of the form:
+ /// %iv = phi Ty [%Start, %Entry], [%Inc, %backedge]
+ /// %inc = binop %iv, %step
+ /// OR
+ /// %iv = phi Ty [%Start, %Entry], [%Inc, %backedge]
+ /// %inc = binop %step, %iv
+ ///
+ /// A first order recurrence is a formula with the form: X_n = f(X_(n-1))
+ ///
+ /// A couple of notes on subtleties in that definition:
+ /// * The Step does not have to be loop invariant. In math terms, it can
+ /// be a free variable. We allow recurrences with both constant and
+ /// variable coefficients. Callers may wish to filter cases where Step
+ /// does not dominate P.
+ /// * For non-commutative operators, we will match both forms. This
+ /// results in some odd recurrence structures. Callers may wish to filter
+ /// out recurrences where the phi is not the LHS of the returned operator.
+ /// * Because of the structure matched, the caller can assume as a post
+ /// condition of the match the presence of a Loop with P's parent as it's
+ /// header *except* in unreachable code. (Dominance decays in unreachable
+ /// code.)
+ ///
+ /// NOTE: This is intentional simple. If you want the ability to analyze
+ /// non-trivial loop conditons, see ScalarEvolution instead.
+ bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
+ Value *&Start, Value *&Step);
+
+ /// Analogous to the above, but starting from the binary operator
+ bool matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P,
+ Value *&Start, Value *&Step);
+
/// Return true if RHS is known to be implied true by LHS. Return false if
/// RHS is known to be implied false by LHS. Otherwise, return None if no
/// implication can be made.
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index cfc3d6115866..8a1ebec4c727 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -17,402 +17,454 @@
#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) VEC,
#endif
+#define FIXED(NL) ElementCount::getFixed(NL)
+#define SCALABLE(NL) ElementCount::getScalable(NL)
+
#if !(defined(TLI_DEFINE_VECFUNC))
#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
-#endif
+#endif
#if defined(TLI_DEFINE_ACCELERATE_VECFUNCS)
// Accelerate framework's Vector Functions
// Floating-Point Arithmetic and Auxiliary Functions
-TLI_DEFINE_VECFUNC("ceilf", "vceilf", 4)
-TLI_DEFINE_VECFUNC("fabsf", "vfabsf", 4)
-TLI_DEFINE_VECFUNC("llvm.fabs.f32", "vfabsf", 4)
-TLI_DEFINE_VECFUNC("floorf", "vfloorf", 4)
-TLI_DEFINE_VECFUNC("sqrtf", "vsqrtf", 4)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "vsqrtf", 4)
+TLI_DEFINE_VECFUNC("ceilf", "vceilf", FIXED(4))
+TLI_DEFINE_VECFUNC("fabsf", "vfabsf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.fabs.f32", "vfabsf", FIXED(4))
+TLI_DEFINE_VECFUNC("floorf", "vfloorf", FIXED(4))
+TLI_DEFINE_VECFUNC("sqrtf", "vsqrtf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "vsqrtf", FIXED(4))
// Exponential and Logarithmic Functions
-TLI_DEFINE_VECFUNC("expf", "vexpf", 4)
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "vexpf", 4)
-TLI_DEFINE_VECFUNC("expm1f", "vexpm1f", 4)
-TLI_DEFINE_VECFUNC("logf", "vlogf", 4)
-TLI_DEFINE_VECFUNC("llvm.log.f32", "vlogf", 4)
-TLI_DEFINE_VECFUNC("log1pf", "vlog1pf", 4)
-TLI_DEFINE_VECFUNC("log10f", "vlog10f", 4)
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "vlog10f", 4)
-TLI_DEFINE_VECFUNC("logbf", "vlogbf", 4)
+TLI_DEFINE_VECFUNC("expf", "vexpf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "vexpf", FIXED(4))
+TLI_DEFINE_VECFUNC("expm1f", "vexpm1f", FIXED(4))
+TLI_DEFINE_VECFUNC("logf", "vlogf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log.f32", "vlogf", FIXED(4))
+TLI_DEFINE_VECFUNC("log1pf", "vlog1pf", FIXED(4))
+TLI_DEFINE_VECFUNC("log10f", "vlog10f", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "vlog10f", FIXED(4))
+TLI_DEFINE_VECFUNC("logbf", "vlogbf", FIXED(4))
// Trigonometric Functions
-TLI_DEFINE_VECFUNC("sinf", "vsinf", 4)
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", 4)
-TLI_DEFINE_VECFUNC("cosf", "vcosf", 4)
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", 4)
-TLI_DEFINE_VECFUNC("tanf", "vtanf", 4)
-TLI_DEFINE_VECFUNC("asinf", "vasinf", 4)
-TLI_DEFINE_VECFUNC("acosf", "vacosf", 4)
-TLI_DEFINE_VECFUNC("atanf", "vatanf", 4)
+TLI_DEFINE_VECFUNC("sinf", "vsinf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", FIXED(4))
+TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4))
+TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4))
+TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4))
+TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4))
+TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4))
// Hyperbolic Functions
-TLI_DEFINE_VECFUNC("sinhf", "vsinhf", 4)
-TLI_DEFINE_VECFUNC("coshf", "vcoshf", 4)
-TLI_DEFINE_VECFUNC("tanhf", "vtanhf", 4)
-TLI_DEFINE_VECFUNC("asinhf", "vasinhf", 4)
-TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4)
-TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4)
+TLI_DEFINE_VECFUNC("sinhf", "vsinhf", FIXED(4))
+TLI_DEFINE_VECFUNC("coshf", "vcoshf", FIXED(4))
+TLI_DEFINE_VECFUNC("tanhf", "vtanhf", FIXED(4))
+TLI_DEFINE_VECFUNC("asinhf", "vasinhf", FIXED(4))
+TLI_DEFINE_VECFUNC("acoshf", "vacoshf", FIXED(4))
+TLI_DEFINE_VECFUNC("atanhf", "vatanhf", FIXED(4))
+
+#elif defined(TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS)
+// Darwin libsystem_m vector functions.
+
+// Exponential and Logarithmic Functions
+TLI_DEFINE_VECFUNC("exp", "_simd_exp_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_simd_exp_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("expf", "_simd_exp_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_simd_exp_f4", FIXED(4))
+// Trigonometric Functions
+TLI_DEFINE_VECFUNC("acos", "_simd_acos_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("acosf", "_simd_acos_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("asin", "_simd_asin_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("asinf", "_simd_asin_f4", FIXED(4))
+
+TLI_DEFINE_VECFUNC("atan", "_simd_atan_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4))
+
+TLI_DEFINE_VECFUNC("cos", "_simd_cos_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_simd_cos_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("cosf", "_simd_cos_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_simd_cos_f4", FIXED(4))
+
+TLI_DEFINE_VECFUNC("sin", "_simd_sin_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_simd_sin_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("sinf", "_simd_sin_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_simd_sin_f4", FIXED(4))
+
+// Floating-Point Arithmetic and Auxiliary Functions
+TLI_DEFINE_VECFUNC("cbrt", "_simd_cbrt_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("cbrtf", "_simd_cbrt_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("erf", "_simd_erf_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("erff", "_simd_erf_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("pow", "_simd_pow_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_simd_pow_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("powf", "_simd_pow_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_simd_pow_f4", FIXED(4))
+
+// Hyperbolic Functions
+TLI_DEFINE_VECFUNC("sinh", "_simd_sinh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("sinhf", "_simd_sinh_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("cosh", "_simd_cosh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("coshf", "_simd_cosh_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("tanh", "_simd_tanh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("tanhf", "_simd_tanh_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("asinh", "_simd_asinh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("asinhf", "_simd_asinh_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("acosh", "_simd_acosh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("acoshf", "_simd_acosh_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("atanh", "_simd_atanh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("atanhf", "_simd_atanh_f4", FIXED(4))
#elif defined(TLI_DEFINE_LIBMVEC_X86_VECFUNCS)
// GLIBC Vector math Functions
-TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", 2)
-TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", 4)
+TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", FIXED(2))
+TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", FIXED(4))
-TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", 4)
-TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", 8)
+TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", FIXED(4))
+TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", 2)
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", FIXED(4))
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", 4)
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", 8)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", FIXED(8))
-TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", 2)
-TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", 4)
+TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", FIXED(2))
+TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", FIXED(4))
-TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", 4)
-TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", 8)
+TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", FIXED(4))
+TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", 2)
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", FIXED(4))
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", 4)
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", 8)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8))
-TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", 2)
-TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", 4)
+TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2))
+TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4))
-TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", 4)
-TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", 8)
+TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", FIXED(4))
+TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", FIXED(8))
-TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", 2)
-TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", 4)
+TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", FIXED(2))
+TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", FIXED(4))
-TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", 4)
-TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", 8)
+TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", FIXED(4))
+TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", 2)
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", FIXED(4))
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", 4)
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", 8)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", FIXED(8))
-TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", 2)
-TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", 4)
+TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", FIXED(2))
+TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", FIXED(4))
-TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", 4)
-TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", 8)
+TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", FIXED(4))
+TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", FIXED(8))
-TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", 2)
-TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", 4)
+TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", FIXED(2))
+TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", FIXED(4))
-TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", 4)
-TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", 8)
+TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", FIXED(4))
+TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", 2)
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", FIXED(4))
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", 4)
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", 8)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", FIXED(8))
-TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", 2)
-TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", 4)
+TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", FIXED(2))
+TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", FIXED(4))
-TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", 4)
-TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", 8)
+TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", FIXED(4))
+TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", FIXED(8))
-TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", 2)
-TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", 4)
+TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", FIXED(2))
+TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", FIXED(4))
-TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", 4)
-TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", 8)
+TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", FIXED(4))
+TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", 2)
-TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", FIXED(4))
-TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", 4)
-TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", 8)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", FIXED(8))
#elif defined(TLI_DEFINE_MASSV_VECFUNCS)
// IBM MASS library's vector Functions
// Floating-Point Arithmetic and Auxiliary Functions
-TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2_massv", 2)
-TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4_massv", 4)
-TLI_DEFINE_VECFUNC("pow", "__powd2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2_massv", 2)
-TLI_DEFINE_VECFUNC("powf", "__powf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4_massv", 4)
-TLI_DEFINE_VECFUNC("sqrt", "__sqrtd2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__sqrtd2_massv", 2)
-TLI_DEFINE_VECFUNC("sqrtf", "__sqrtf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__sqrtf4_massv", 4)
+TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2", FIXED(2))
+TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4", FIXED(4))
+TLI_DEFINE_VECFUNC("pow", "__powd2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2", FIXED(2))
+TLI_DEFINE_VECFUNC("powf", "__powf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4", FIXED(4))
// Exponential and Logarithmic Functions
-TLI_DEFINE_VECFUNC("exp", "__expd2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2_massv", 2)
-TLI_DEFINE_VECFUNC("expf", "__expf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4_massv", 4)
-TLI_DEFINE_VECFUNC("exp2", "__exp2d2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2_massv", 2)
-TLI_DEFINE_VECFUNC("exp2f", "__exp2f4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4_massv", 4)
-TLI_DEFINE_VECFUNC("expm1", "__expm1d2_massv", 2)
-TLI_DEFINE_VECFUNC("expm1f", "__expm1f4_massv", 4)
-TLI_DEFINE_VECFUNC("log", "__logd2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2_massv", 2)
-TLI_DEFINE_VECFUNC("logf", "__logf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4_massv", 4)
-TLI_DEFINE_VECFUNC("log1p", "__log1pd2_massv", 2)
-TLI_DEFINE_VECFUNC("log1pf", "__log1pf4_massv", 4)
-TLI_DEFINE_VECFUNC("log10", "__log10d2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2_massv", 2)
-TLI_DEFINE_VECFUNC("log10f", "__log10f4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4_massv", 4)
-TLI_DEFINE_VECFUNC("log2", "__log2d2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2_massv", 2)
-TLI_DEFINE_VECFUNC("log2f", "__log2f4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4_massv", 4)
+TLI_DEFINE_VECFUNC("exp", "__expd2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2", FIXED(2))
+TLI_DEFINE_VECFUNC("expf", "__expf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4", FIXED(4))
+TLI_DEFINE_VECFUNC("exp2", "__exp2d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2", FIXED(2))
+TLI_DEFINE_VECFUNC("exp2f", "__exp2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("expm1", "__expm1d2", FIXED(2))
+TLI_DEFINE_VECFUNC("expm1f", "__expm1f4", FIXED(4))
+TLI_DEFINE_VECFUNC("log", "__logd2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2", FIXED(2))
+TLI_DEFINE_VECFUNC("logf", "__logf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4", FIXED(4))
+TLI_DEFINE_VECFUNC("log1p", "__log1pd2", FIXED(2))
+TLI_DEFINE_VECFUNC("log1pf", "__log1pf4", FIXED(4))
+TLI_DEFINE_VECFUNC("log10", "__log10d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2", FIXED(2))
+TLI_DEFINE_VECFUNC("log10f", "__log10f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4", FIXED(4))
+TLI_DEFINE_VECFUNC("log2", "__log2d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2", FIXED(2))
+TLI_DEFINE_VECFUNC("log2f", "__log2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4", FIXED(4))
// Trigonometric Functions
-TLI_DEFINE_VECFUNC("sin", "__sind2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2_massv", 2)
-TLI_DEFINE_VECFUNC("sinf", "__sinf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4_massv", 4)
-TLI_DEFINE_VECFUNC("cos", "__cosd2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2_massv", 2)
-TLI_DEFINE_VECFUNC("cosf", "__cosf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4_massv", 4)
-TLI_DEFINE_VECFUNC("tan", "__tand2_massv", 2)
-TLI_DEFINE_VECFUNC("tanf", "__tanf4_massv", 4)
-TLI_DEFINE_VECFUNC("asin", "__asind2_massv", 2)
-TLI_DEFINE_VECFUNC("asinf", "__asinf4_massv", 4)
-TLI_DEFINE_VECFUNC("acos", "__acosd2_massv", 2)
-TLI_DEFINE_VECFUNC("acosf", "__acosf4_massv", 4)
-TLI_DEFINE_VECFUNC("atan", "__atand2_massv", 2)
-TLI_DEFINE_VECFUNC("atanf", "__atanf4_massv", 4)
-TLI_DEFINE_VECFUNC("atan2", "__atan2d2_massv", 2)
-TLI_DEFINE_VECFUNC("atan2f", "__atan2f4_massv", 4)
+TLI_DEFINE_VECFUNC("sin", "__sind2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2", FIXED(2))
+TLI_DEFINE_VECFUNC("sinf", "__sinf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4", FIXED(4))
+TLI_DEFINE_VECFUNC("cos", "__cosd2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2", FIXED(2))
+TLI_DEFINE_VECFUNC("cosf", "__cosf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4", FIXED(4))
+TLI_DEFINE_VECFUNC("tan", "__tand2", FIXED(2))
+TLI_DEFINE_VECFUNC("tanf", "__tanf4", FIXED(4))
+TLI_DEFINE_VECFUNC("asin", "__asind2", FIXED(2))
+TLI_DEFINE_VECFUNC("asinf", "__asinf4", FIXED(4))
+TLI_DEFINE_VECFUNC("acos", "__acosd2", FIXED(2))
+TLI_DEFINE_VECFUNC("acosf", "__acosf4", FIXED(4))
+TLI_DEFINE_VECFUNC("atan", "__atand2", FIXED(2))
+TLI_DEFINE_VECFUNC("atanf", "__atanf4", FIXED(4))
+TLI_DEFINE_VECFUNC("atan2", "__atan2d2", FIXED(2))
+TLI_DEFINE_VECFUNC("atan2f", "__atan2f4", FIXED(4))
// Hyperbolic Functions
-TLI_DEFINE_VECFUNC("sinh", "__sinhd2_massv", 2)
-TLI_DEFINE_VECFUNC("sinhf", "__sinhf4_massv", 4)
-TLI_DEFINE_VECFUNC("cosh", "__coshd2_massv", 2)
-TLI_DEFINE_VECFUNC("coshf", "__coshf4_massv", 4)
-TLI_DEFINE_VECFUNC("tanh", "__tanhd2_massv", 2)
-TLI_DEFINE_VECFUNC("tanhf", "__tanhf4_massv", 4)
-TLI_DEFINE_VECFUNC("asinh", "__asinhd2_massv", 2)
-TLI_DEFINE_VECFUNC("asinhf", "__asinhf4_massv", 4)
-TLI_DEFINE_VECFUNC("acosh", "__acoshd2_massv", 2)
-TLI_DEFINE_VECFUNC("acoshf", "__acoshf4_massv", 4)
-TLI_DEFINE_VECFUNC("atanh", "__atanhd2_massv", 2)
-TLI_DEFINE_VECFUNC("atanhf", "__atanhf4_massv", 4)
+TLI_DEFINE_VECFUNC("sinh", "__sinhd2", FIXED(2))
+TLI_DEFINE_VECFUNC("sinhf", "__sinhf4", FIXED(4))
+TLI_DEFINE_VECFUNC("cosh", "__coshd2", FIXED(2))
+TLI_DEFINE_VECFUNC("coshf", "__coshf4", FIXED(4))
+TLI_DEFINE_VECFUNC("tanh", "__tanhd2", FIXED(2))
+TLI_DEFINE_VECFUNC("tanhf", "__tanhf4", FIXED(4))
+TLI_DEFINE_VECFUNC("asinh", "__asinhd2", FIXED(2))
+TLI_DEFINE_VECFUNC("asinhf", "__asinhf4", FIXED(4))
+TLI_DEFINE_VECFUNC("acosh", "__acoshd2", FIXED(2))
+TLI_DEFINE_VECFUNC("acoshf", "__acoshf4", FIXED(4))
+TLI_DEFINE_VECFUNC("atanh", "__atanhd2", FIXED(2))
+TLI_DEFINE_VECFUNC("atanhf", "__atanhf4", FIXED(4))
#elif defined(TLI_DEFINE_SVML_VECFUNCS)
// Intel SVM library's Vector Functions
-TLI_DEFINE_VECFUNC("sin", "__svml_sin2", 2)
-TLI_DEFINE_VECFUNC("sin", "__svml_sin4", 4)
-TLI_DEFINE_VECFUNC("sin", "__svml_sin8", 8)
+TLI_DEFINE_VECFUNC("sin", "__svml_sin2", FIXED(2))
+TLI_DEFINE_VECFUNC("sin", "__svml_sin4", FIXED(4))
+TLI_DEFINE_VECFUNC("sin", "__svml_sin8", FIXED(8))
-TLI_DEFINE_VECFUNC("sinf", "__svml_sinf4", 4)
-TLI_DEFINE_VECFUNC("sinf", "__svml_sinf8", 8)
-TLI_DEFINE_VECFUNC("sinf", "__svml_sinf16", 16)
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf4", FIXED(4))
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf8", FIXED(8))
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf16", FIXED(16))
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin2", 2)
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin4", 4)
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin8", 8)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin8", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf4", 4)
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf8", 8)
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf16", 16)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf16", FIXED(16))
-TLI_DEFINE_VECFUNC("cos", "__svml_cos2", 2)
-TLI_DEFINE_VECFUNC("cos", "__svml_cos4", 4)
-TLI_DEFINE_VECFUNC("cos", "__svml_cos8", 8)
+TLI_DEFINE_VECFUNC("cos", "__svml_cos2", FIXED(2))
+TLI_DEFINE_VECFUNC("cos", "__svml_cos4", FIXED(4))
+TLI_DEFINE_VECFUNC("cos", "__svml_cos8", FIXED(8))
-TLI_DEFINE_VECFUNC("cosf", "__svml_cosf4", 4)
-TLI_DEFINE_VECFUNC("cosf", "__svml_cosf8", 8)
-TLI_DEFINE_VECFUNC("cosf", "__svml_cosf16", 16)
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf4", FIXED(4))
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf8", FIXED(8))
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf16", FIXED(16))
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos2", 2)
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos4", 4)
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos8", 8)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos8", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", 4)
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", 8)
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", 16)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", FIXED(16))
-TLI_DEFINE_VECFUNC("pow", "__svml_pow2", 2)
-TLI_DEFINE_VECFUNC("pow", "__svml_pow4", 4)
-TLI_DEFINE_VECFUNC("pow", "__svml_pow8", 8)
+TLI_DEFINE_VECFUNC("pow", "__svml_pow2", FIXED(2))
+TLI_DEFINE_VECFUNC("pow", "__svml_pow4", FIXED(4))
+TLI_DEFINE_VECFUNC("pow", "__svml_pow8", FIXED(8))
-TLI_DEFINE_VECFUNC("powf", "__svml_powf4", 4)
-TLI_DEFINE_VECFUNC("powf", "__svml_powf8", 8)
-TLI_DEFINE_VECFUNC("powf", "__svml_powf16", 16)
+TLI_DEFINE_VECFUNC("powf", "__svml_powf4", FIXED(4))
+TLI_DEFINE_VECFUNC("powf", "__svml_powf8", FIXED(8))
+TLI_DEFINE_VECFUNC("powf", "__svml_powf16", FIXED(16))
-TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow2", 2)
-TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow4", 4)
-TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow8", 8)
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow2", FIXED(2))
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow4", FIXED(4))
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow8", FIXED(8))
-TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf4", 4)
-TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf8", 8)
-TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf16", 16)
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf4", FIXED(4))
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf8", FIXED(8))
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf16", FIXED(16))
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow2", 2)
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow4", 4)
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow8", 8)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow8", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf4", 4)
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf8", 8)
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf16", 16)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf16", FIXED(16))
-TLI_DEFINE_VECFUNC("exp", "__svml_exp2", 2)
-TLI_DEFINE_VECFUNC("exp", "__svml_exp4", 4)
-TLI_DEFINE_VECFUNC("exp", "__svml_exp8", 8)
+TLI_DEFINE_VECFUNC("exp", "__svml_exp2", FIXED(2))
+TLI_DEFINE_VECFUNC("exp", "__svml_exp4", FIXED(4))
+TLI_DEFINE_VECFUNC("exp", "__svml_exp8", FIXED(8))
-TLI_DEFINE_VECFUNC("expf", "__svml_expf4", 4)
-TLI_DEFINE_VECFUNC("expf", "__svml_expf8", 8)
-TLI_DEFINE_VECFUNC("expf", "__svml_expf16", 16)
+TLI_DEFINE_VECFUNC("expf", "__svml_expf4", FIXED(4))
+TLI_DEFINE_VECFUNC("expf", "__svml_expf8", FIXED(8))
+TLI_DEFINE_VECFUNC("expf", "__svml_expf16", FIXED(16))
-TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp2", 2)
-TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp4", 4)
-TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp8", 8)
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp2", FIXED(2))
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp4", FIXED(4))
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp8", FIXED(8))
-TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf4", 4)
-TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf8", 8)
-TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf16", 16)
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf4", FIXED(4))
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf8", FIXED(8))
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf16", FIXED(16))
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp2", 2)
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp4", 4)
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp8", 8)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp8", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf4", 4)
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf8", 8)
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf16", 16)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf16", FIXED(16))
-TLI_DEFINE_VECFUNC("log", "__svml_log2", 2)
-TLI_DEFINE_VECFUNC("log", "__svml_log4", 4)
-TLI_DEFINE_VECFUNC("log", "__svml_log8", 8)
+TLI_DEFINE_VECFUNC("log", "__svml_log2", FIXED(2))
+TLI_DEFINE_VECFUNC("log", "__svml_log4", FIXED(4))
+TLI_DEFINE_VECFUNC("log", "__svml_log8", FIXED(8))
-TLI_DEFINE_VECFUNC("logf", "__svml_logf4", 4)
-TLI_DEFINE_VECFUNC("logf", "__svml_logf8", 8)
-TLI_DEFINE_VECFUNC("logf", "__svml_logf16", 16)
+TLI_DEFINE_VECFUNC("logf", "__svml_logf4", FIXED(4))
+TLI_DEFINE_VECFUNC("logf", "__svml_logf8", FIXED(8))
+TLI_DEFINE_VECFUNC("logf", "__svml_logf16", FIXED(16))
-TLI_DEFINE_VECFUNC("__log_finite", "__svml_log2", 2)
-TLI_DEFINE_VECFUNC("__log_finite", "__svml_log4", 4)
-TLI_DEFINE_VECFUNC("__log_finite", "__svml_log8", 8)
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log2", FIXED(2))
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log4", FIXED(4))
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log8", FIXED(8))
-TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf4", 4)
-TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf8", 8)
-TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf16", 16)
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf4", FIXED(4))
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf8", FIXED(8))
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf16", FIXED(16))
-TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log2", 2)
-TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log4", 4)
-TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log8", 8)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log8", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4)
-TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
-TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", FIXED(16))
-TLI_DEFINE_VECFUNC("log2", "__svml_log22", 2)
-TLI_DEFINE_VECFUNC("log2", "__svml_log24", 4)
-TLI_DEFINE_VECFUNC("log2", "__svml_log28", 8)
+TLI_DEFINE_VECFUNC("log2", "__svml_log22", FIXED(2))
+TLI_DEFINE_VECFUNC("log2", "__svml_log24", FIXED(4))
+TLI_DEFINE_VECFUNC("log2", "__svml_log28", FIXED(8))
-TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", 4)
-TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", 8)
-TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", 16)
+TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", FIXED(16))
-TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", 2)
-TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", 4)
-TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", 8)
+TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", FIXED(2))
+TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", FIXED(4))
+TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", FIXED(8))
-TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", 4)
-TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", 8)
-TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", 16)
+TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", FIXED(16))
-TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", 2)
-TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", 4)
-TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", 8)
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4)
-TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8)
-TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", FIXED(16))
-TLI_DEFINE_VECFUNC("log10", "__svml_log102", 2)
-TLI_DEFINE_VECFUNC("log10", "__svml_log104", 4)
-TLI_DEFINE_VECFUNC("log10", "__svml_log108", 8)
+TLI_DEFINE_VECFUNC("log10", "__svml_log102", FIXED(2))
+TLI_DEFINE_VECFUNC("log10", "__svml_log104", FIXED(4))
+TLI_DEFINE_VECFUNC("log10", "__svml_log108", FIXED(8))
-TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", 4)
-TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", 8)
-TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", 16)
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", FIXED(4))
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", FIXED(8))
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", FIXED(16))
-TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", 2)
-TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", 4)
-TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", 8)
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", FIXED(2))
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", FIXED(4))
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", FIXED(8))
-TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", 4)
-TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", 8)
-TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", 16)
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", FIXED(4))
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", FIXED(8))
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", FIXED(16))
-TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", 2)
-TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", 4)
-TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", 8)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", 4)
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", 8)
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", 16)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", FIXED(16))
-TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", 2)
-TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", 4)
-TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", 8)
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", FIXED(2))
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", FIXED(4))
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", FIXED(8))
-TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", 4)
-TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", 8)
-TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", 16)
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", FIXED(4))
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", FIXED(8))
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", FIXED(16))
-TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", 2)
-TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", 4)
-TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", 8)
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", FIXED(2))
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", FIXED(4))
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", FIXED(8))
-TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4)
-TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8)
-TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16)
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", FIXED(4))
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", FIXED(8))
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", FIXED(16))
-TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
-TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
-TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)
+TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", FIXED(2))
+TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", FIXED(4))
+TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", FIXED(8))
-TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f4", 4)
-TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f8", 8)
-TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f16", 16)
+TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f16", FIXED(16))
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp22", 2)
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp24", 4)
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp28", 8)
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp22", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp24", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp28", FIXED(8))
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f4", 4)
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f8", 8)
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f16", 16)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f16", FIXED(16))
-TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp22", 2)
-TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp24", 4)
-TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp28", 8)
+TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp22", FIXED(2))
+TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp24", FIXED(4))
+TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp28", FIXED(8))
-TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f4", 4)
-TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f8", 8)
-TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16)
+TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16))
#else
#error "Must choose which vector library functions are to be defined."
@@ -420,6 +472,7 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16)
#undef TLI_DEFINE_VECFUNC
#undef TLI_DEFINE_ACCELERATE_VECFUNCS
+#undef TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS
#undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS
#undef TLI_DEFINE_MASSV_VECFUNCS
#undef TLI_DEFINE_SVML_VECFUNCS
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 26cb0e456ed4..c890216c9e01 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -31,7 +31,7 @@ enum class VFParamKind {
OMP_LinearPos, // declare simd linear(i:c) uniform(c)
OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
- OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c
+ OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c)
OMP_Uniform, // declare simd uniform(i)
GlobalPredicate, // Global logical predicate that acts on all lanes
// of the input and output mask concurrently. For
@@ -80,13 +80,11 @@ struct VFParameter {
/// represent vector functions. in particular, it is not attached to
/// any target-specific ABI.
struct VFShape {
- unsigned VF; // Vectorization factor.
- bool IsScalable; // True if the function is a scalable function.
+ ElementCount VF; // Vectorization factor.
SmallVector<VFParameter, 8> Parameters; // List of parameter information.
// Comparison operator.
bool operator==(const VFShape &Other) const {
- return std::tie(VF, IsScalable, Parameters) ==
- std::tie(Other.VF, Other.IsScalable, Other.Parameters);
+ return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters);
}
/// Update the parameter in position P.ParamPos to P.
@@ -115,7 +113,7 @@ struct VFShape {
Parameters.push_back(
VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate}));
- return {EC.getKnownMinValue(), EC.isScalable(), Parameters};
+ return {EC, Parameters};
}
/// Sanity check on the Parameters in the VFShape.
bool hasValidParameterList() const;
@@ -127,12 +125,6 @@ struct VFInfo {
std::string ScalarName; /// Scalar Function Name.
std::string VectorName; /// Vector Function Name associated to this VFInfo.
VFISAKind ISA; /// Instruction Set Architecture.
-
- // Comparison operator.
- bool operator==(const VFInfo &Other) const {
- return std::tie(Shape, ScalarName, VectorName, ISA) ==
- std::tie(Shape, Other.ScalarName, Other.VectorName, Other.ISA);
- }
};
namespace VFABI {
@@ -186,12 +178,13 @@ Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName, const Module &M);
/// <isa> = "_LLVM_"
/// <mask> = "N". Note: TLI does not support masked interfaces.
/// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
-/// field of the `VecDesc` struct.
+/// field of the `VecDesc` struct. If the number of lanes is scalable
+/// then 'x' is printed instead.
/// <vparams> = "v", as many as are the numArgs.
/// <scalarname> = the name of the scalar function.
/// <vectorname> = the name of the vector function.
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
- unsigned numArgs, unsigned VF);
+ unsigned numArgs, ElementCount VF);
/// Retrieve the `VFParamKind` from a string token.
VFParamKind getVFParamKindFromString(const StringRef Token);
@@ -322,6 +315,11 @@ bool isTriviallyVectorizable(Intrinsic::ID ID);
/// Identifies if the vector form of the intrinsic has a scalar operand.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
+/// Identifies if the vector form of the intrinsic has a scalar operand that has
+/// an overloaded type.
+bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID,
+ unsigned ScalarOpdIdx);
+
/// Returns intrinsic ID for call.
/// For the input call instruction it finds mapping intrinsic and returns
/// its intrinsic ID, in case it does not found it return not_intrinsic.
@@ -601,10 +599,6 @@ public:
bool isReverse() const { return Reverse; }
uint32_t getFactor() const { return Factor; }
- LLVM_ATTRIBUTE_DEPRECATED(uint32_t getAlignment() const,
- "Use getAlign instead.") {
- return Alignment.value();
- }
Align getAlign() const { return Alignment; }
uint32_t getNumMembers() const { return Members.size(); }