65 files changed, 2316 insertions, 1267 deletions
diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 9f7461243f35..7fec0feb09d5 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -78,21 +78,64 @@ class Value;
 ///
 /// See docs/AliasAnalysis.html for more information on the specific meanings
 /// of these values.
-enum AliasResult : uint8_t {
-  /// The two locations do not alias at all.
-  ///
-  /// This value is arranged to convert to false, while all other values
-  /// convert to true. This allows a boolean context to convert the result to
-  /// a binary flag indicating whether there is the possibility of aliasing.
-  NoAlias = 0,
-  /// The two locations may or may not alias. This is the least precise result.
-  MayAlias,
-  /// The two locations alias, but only due to a partial overlap.
-  PartialAlias,
-  /// The two locations precisely alias each other.
-  MustAlias,
+class AliasResult {
+private:
+  static const int OffsetBits = 23;
+  static const int AliasBits = 8;
+  static_assert(AliasBits + 1 + OffsetBits <= 32,
+                "AliasResult size is intended to be 4 bytes!");
+
+  unsigned int Alias : AliasBits;
+  unsigned int HasOffset : 1;
+  signed int Offset : OffsetBits;
+
+public:
+  enum Kind : uint8_t {
+    /// The two locations do not alias at all.
+    ///
+    /// This value is arranged to convert to false, while all other values
+    /// convert to true. This allows a boolean context to convert the result to
+    /// a binary flag indicating whether there is the possibility of aliasing.
+    NoAlias = 0,
+    /// The two locations may or may not alias. This is the least precise
+    /// result.
+    MayAlias,
+    /// The two locations alias, but only due to a partial overlap.
+    PartialAlias,
+    /// The two locations precisely alias each other.
+    MustAlias,
+  };
+  static_assert(MustAlias < (1 << AliasBits),
+                "Not enough bit field size for the enum!");
+
+  explicit AliasResult() = delete;
+  constexpr AliasResult(const Kind &Alias)
+      : Alias(Alias), HasOffset(false), Offset(0) {}
+
+  operator Kind() const { return static_cast<Kind>(Alias); }
+
+  constexpr bool hasOffset() const { return HasOffset; }
+  constexpr int32_t getOffset() const {
+    assert(HasOffset && "No offset!");
+    return Offset;
+  }
+  void setOffset(int32_t NewOffset) {
+    if (isInt<OffsetBits>(NewOffset)) {
+      HasOffset = true;
+      Offset = NewOffset;
+    }
+  }
+
+  /// Helper for processing AliasResult for swapped memory location pairs.
+  void swap(bool DoSwap = true) {
+    if (DoSwap && hasOffset())
+      setOffset(-getOffset());
+  }
 };
 
+static_assert(sizeof(AliasResult) == 4,
+              "AliasResult size is intended to be 4 bytes!");
+
 /// << operator for AliasResult.
 raw_ostream &operator<<(raw_ostream &OS, AliasResult AR);
 
@@ -335,6 +378,31 @@ createModRefInfo(const FunctionModRefBehavior FMRB) {
   return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef));
 }
 
+/// Reduced version of MemoryLocation that only stores a pointer and size.
+/// Used for caching AATags independent BasicAA results.
+struct AACacheLoc {
+  const Value *Ptr;
+  LocationSize Size;
+};
+
+template <> struct DenseMapInfo<AACacheLoc> {
+  static inline AACacheLoc getEmptyKey() {
+    return {DenseMapInfo<const Value *>::getEmptyKey(),
+            DenseMapInfo<LocationSize>::getEmptyKey()};
+  }
+  static inline AACacheLoc getTombstoneKey() {
+    return {DenseMapInfo<const Value *>::getTombstoneKey(),
+            DenseMapInfo<LocationSize>::getTombstoneKey()};
+  }
+  static unsigned getHashValue(const AACacheLoc &Val) {
+    return DenseMapInfo<const Value *>::getHashValue(Val.Ptr) ^
+           DenseMapInfo<LocationSize>::getHashValue(Val.Size);
+  }
+  static bool isEqual(const AACacheLoc &LHS, const AACacheLoc &RHS) {
+    return LHS.Ptr == RHS.Ptr && LHS.Size == RHS.Size;
+  }
+};
+
 /// This class stores info we want to provide to or retain within an alias
 /// query. By default, the root query is stateless and starts with a freshly
 /// constructed info object. Specific alias analyses can use this query info to
@@ -345,7 +413,7 @@ createModRefInfo(const FunctionModRefBehavior FMRB) {
 /// caches used by BasicAA, but can further be extended to fit other AA needs.
 class AAQueryInfo {
 public:
-  using LocPair = std::pair<MemoryLocation, MemoryLocation>;
+  using LocPair = std::pair<AACacheLoc, AACacheLoc>;
   struct CacheEntry {
     AliasResult Result;
     /// Number of times a NoAlias assumption has been used.
@@ -360,6 +428,9 @@ public:
   using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>;
   IsCapturedCacheT IsCapturedCache;
 
+  /// Query depth used to distinguish recursive queries.
+  unsigned Depth = 0;
+
   /// How many active NoAlias assumption uses there are.
   int NumAssumptionUses = 0;
 
@@ -369,6 +440,15 @@ public:
   SmallVector<AAQueryInfo::LocPair, 4> AssumptionBasedResults;
 
   AAQueryInfo() : AliasCache(), IsCapturedCache() {}
+
+  /// Create a new AAQueryInfo based on this one, but with the cache cleared.
+  /// This is used for recursive queries across phis, where cache results may
+  /// not be valid.
+  AAQueryInfo withEmptyCache() {
+    AAQueryInfo NewAAQI;
+    NewAAQI.Depth = Depth;
+    return NewAAQI;
+  }
 };
 
 class BatchAAResults;
@@ -428,7 +508,7 @@ public:
   /// A trivial helper function to check to see if the specified pointers are
   /// no-alias.
   bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
-    return alias(LocA, LocB) == NoAlias;
+    return alias(LocA, LocB) == AliasResult::NoAlias;
   }
 
   /// A convenience wrapper around the \c isNoAlias helper interface.
@@ -446,13 +526,13 @@ public:
   /// A trivial helper function to check to see if the specified pointers are
   /// must-alias.
   bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
-    return alias(LocA, LocB) == MustAlias;
+    return alias(LocA, LocB) == AliasResult::MustAlias;
   }
 
   /// A convenience wrapper around the \c isMustAlias helper interface.
   bool isMustAlias(const Value *V1, const Value *V2) {
     return alias(V1, LocationSize::precise(1), V2, LocationSize::precise(1)) ==
-           MustAlias;
+           AliasResult::MustAlias;
   }
 
   /// Checks whether the given location points to constant memory, or if
@@ -715,7 +795,11 @@ public:
   /// Early exits in callCapturesBefore may lead to ModRefInfo::Must not being
   /// set.
   ModRefInfo callCapturesBefore(const Instruction *I,
-                                const MemoryLocation &MemLoc, DominatorTree *DT);
+                                const MemoryLocation &MemLoc,
+                                DominatorTree *DT) {
+    AAQueryInfo AAQIP;
+    return callCapturesBefore(I, MemLoc, DT, AAQIP);
+  }
 
   /// A convenience wrapper to synthesize a memory location.
   ModRefInfo callCapturesBefore(const Instruction *I, const Value *P,
@@ -784,6 +868,9 @@ private:
   ModRefInfo getModRefInfo(const Instruction *I,
                            const Optional<MemoryLocation> &OptLoc,
                            AAQueryInfo &AAQIP);
+  ModRefInfo callCapturesBefore(const Instruction *I,
+                                const MemoryLocation &MemLoc, DominatorTree *DT,
+                                AAQueryInfo &AAQIP);
 
   class Concept;
 
@@ -797,9 +884,6 @@ private:
 
   std::vector<AnalysisKey *> AADeps;
 
-  /// Query depth used to distinguish recursive queries.
-  unsigned Depth = 0;
-
   friend class BatchAAResults;
 };
 
@@ -841,11 +925,17 @@ public:
     return AA.getModRefBehavior(Call);
   }
   bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
-    return alias(LocA, LocB) == MustAlias;
+    return alias(LocA, LocB) == AliasResult::MustAlias;
   }
   bool isMustAlias(const Value *V1, const Value *V2) {
     return alias(MemoryLocation(V1, LocationSize::precise(1)),
-                 MemoryLocation(V2, LocationSize::precise(1))) == MustAlias;
+                 MemoryLocation(V2, LocationSize::precise(1))) ==
+           AliasResult::MustAlias;
+  }
+  ModRefInfo callCapturesBefore(const Instruction *I,
+                                const MemoryLocation &MemLoc,
+                                DominatorTree *DT) {
+    return AA.callCapturesBefore(I, MemLoc, DT, AAQI);
   }
 };
 
@@ -1073,7 +1163,7 @@ protected:
 public:
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
                     AAQueryInfo &AAQI) {
-    return MayAlias;
+    return AliasResult::MayAlias;
   }
 
   bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h
index b27fd5aa92a7..b66ff395454d 100644
--- a/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -35,20 +35,17 @@
 namespace llvm {
 
 class AAResults;
+class AliasResult;
 class AliasSetTracker;
-class BasicBlock;
-class LoadInst;
-class Loop;
-class MemorySSA;
 class AnyMemSetInst;
 class AnyMemTransferInst;
+class BasicBlock;
+class LoadInst;
 class raw_ostream;
 class StoreInst;
 class VAArgInst;
 class Value;
 
-enum AliasResult : uint8_t;
-
 class AliasSet : public ilist_node<AliasSet> {
   friend class AliasSetTracker;
 
@@ -235,11 +232,16 @@ public:
   void dump() const;
 
   /// Define an iterator for alias sets... this is just a forward iterator.
-  class iterator : public std::iterator<std::forward_iterator_tag,
-                                        PointerRec, ptrdiff_t> {
+  class iterator {
     PointerRec *CurNode;
 
   public:
+    using iterator_category = std::forward_iterator_tag;
+    using value_type = PointerRec;
+    using difference_type = std::ptrdiff_t;
+    using pointer = value_type *;
+    using reference = value_type &;
+
     explicit iterator(PointerRec *CN = nullptr) : CurNode(CN) {}
 
     bool operator==(const iterator& x) const {
@@ -343,8 +345,6 @@ class AliasSetTracker {
   struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {};
 
   AAResults &AA;
-  MemorySSA *MSSA = nullptr;
-  Loop *L = nullptr;
   ilist<AliasSet> AliasSets;
 
   using PointerMapType = DenseMap<ASTCallbackVH, AliasSet::PointerRec *,
@@ -357,8 +357,6 @@ public:
   /// Create an empty collection of AliasSets, and use the specified alias
   /// analysis object to disambiguate load and store addresses.
   explicit AliasSetTracker(AAResults &AA) : AA(AA) {}
-  explicit AliasSetTracker(AAResults &AA, MemorySSA *MSSA, Loop *L)
-      : AA(AA), MSSA(MSSA), L(L) {}
   ~AliasSetTracker() { clear(); }
 
   /// These methods are used to add different types of instructions to the alias
@@ -383,7 +381,6 @@ public:
   void add(BasicBlock &BB);       // Add all instructions in basic block
   void add(const AliasSetTracker &AST); // Add alias relations from another AST
   void addUnknown(Instruction *I);
-  void addAllInstructionsInLoopUsingMSSA();
 
   void clear();
 
diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
index 4d2884284d67..49c0cd89a4db 100644
--- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h
+++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
@@ -11,11 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TRANSFORMS_UTILS_ASSUMEBUNDLEQUERIES_H
-#define LLVM_TRANSFORMS_UTILS_ASSUMEBUNDLEQUERIES_H
+#ifndef LLVM_ANALYSIS_ASSUMEBUNDLEQUERIES_H
+#define LLVM_ANALYSIS_ASSUMEBUNDLEQUERIES_H
 
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
@@ -39,12 +40,12 @@ enum AssumeBundleArg {
 ///
 /// Return true iff the queried attribute was found.
 /// If ArgVal is set. the argument will be stored to ArgVal.
-bool hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn, StringRef AttrName,
+bool hasAttributeInAssume(AssumeInst &Assume, Value *IsOn, StringRef AttrName,
                           uint64_t *ArgVal = nullptr);
-inline bool hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn,
+inline bool hasAttributeInAssume(AssumeInst &Assume, Value *IsOn,
                                  Attribute::AttrKind Kind,
                                  uint64_t *ArgVal = nullptr) {
-  return hasAttributeInAssume(AssumeCI, IsOn,
+  return hasAttributeInAssume(Assume, IsOn,
                               Attribute::getNameFromAttrKind(Kind), ArgVal);
 }
 
@@ -87,7 +88,7 @@ using RetainedKnowledgeMap =
 /// many queries are going to be made on the same llvm.assume.
 /// String attributes are not inserted in the map.
 /// If the IR changes the map will be outdated.
-void fillMapFromAssume(CallInst &AssumeCI, RetainedKnowledgeMap &Result);
+void fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result);
 
 /// Represent one information held inside an operand bundle of an llvm.assume.
 /// AttrKind is the property that holds.
@@ -106,19 +107,28 @@ struct RetainedKnowledge {
            ArgValue == Other.ArgValue;
   }
   bool operator!=(RetainedKnowledge Other) const { return !(*this == Other); }
+  /// This is only intended for use in std::min/std::max between attribute that
+  /// only differ in ArgValue.
+  bool operator<(RetainedKnowledge Other) const {
+    assert(((AttrKind == Other.AttrKind && WasOn == Other.WasOn) ||
+            AttrKind == Attribute::None || Other.AttrKind == Attribute::None) &&
+           "This is only intend for use in min/max to select the best for "
+           "RetainedKnowledge that is otherwise equal");
+    return ArgValue < Other.ArgValue;
+  }
   operator bool() const { return AttrKind != Attribute::None; }
   static RetainedKnowledge none() { return RetainedKnowledge{}; }
 };
 
 /// Retreive the information help by Assume on the operand at index Idx.
 /// Assume should be an llvm.assume and Idx should be in the operand bundle.
-RetainedKnowledge getKnowledgeFromOperandInAssume(CallInst &Assume,
+RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume,
                                                   unsigned Idx);
 
 /// Retreive the information help by the Use U of an llvm.assume. the use should
 /// be in the operand bundle.
 inline RetainedKnowledge getKnowledgeFromUseInAssume(const Use *U) {
-  return getKnowledgeFromOperandInAssume(*cast<CallInst>(U->getUser()),
+  return getKnowledgeFromOperandInAssume(*cast<AssumeInst>(U->getUser()),
                                          U->getOperandNo());
 }
 
@@ -133,7 +143,7 @@ constexpr StringRef IgnoreBundleTag = "ignore";
 ///
 /// the argument to the call of llvm.assume may still be useful even if the
 /// function returned true.
-bool isAssumeWithEmptyBundle(CallInst &Assume);
+bool isAssumeWithEmptyBundle(AssumeInst &Assume);
 
 /// Return a valid Knowledge associated to the Use U if its Attribute kind is
 /// in AttrKinds.
@@ -159,7 +169,7 @@ RetainedKnowledge getKnowledgeValidInContext(
 
 /// This extracts the Knowledge from an element of an operand bundle.
 /// This is mostly for use in the assume builder.
-RetainedKnowledge getKnowledgeFromBundle(CallInst &Assume,
+RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume,
                                          const CallBase::BundleOpInfo &BOI);
 
 } // namespace llvm
diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h
index 0ef63dc68e1c..51d04bd8cf02 100644
--- a/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -26,7 +26,7 @@
 
 namespace llvm {
 
-class CallInst;
+class AssumeInst;
 class Function;
 class raw_ostream;
 class Value;
@@ -45,7 +45,7 @@ public:
   enum : unsigned { ExprResultIdx = std::numeric_limits<unsigned>::max() };
 
   struct ResultElem {
-    WeakTrackingVH Assume;
+    WeakVH Assume;
 
     /// contains either ExprResultIdx or the index of the operand bundle
     /// containing the knowledge.
@@ -116,15 +116,15 @@ public:
   ///
   /// The call passed in must be an instruction within this function and must
   /// not already be in the cache.
-  void registerAssumption(CallInst *CI);
+  void registerAssumption(AssumeInst *CI);
 
   /// Remove an \@llvm.assume intrinsic from this function's cache if it has
   /// been added to the cache earlier.
-  void unregisterAssumption(CallInst *CI);
+  void unregisterAssumption(AssumeInst *CI);
 
   /// Update the cache of values being affected by this assumption (i.e.
   /// the values about which this assumption provides information).
-  void updateAffectedValues(CallInst *CI);
+  void updateAffectedValues(AssumeInst *CI);
 
   /// Clear the cache of \@llvm.assume intrinsics for a function.
   ///
diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index 46b8cd1f3a88..991c0cbb642a 100644
--- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -35,7 +35,6 @@ class DataLayout;
 class DominatorTree;
 class Function;
 class GEPOperator;
-class LoopInfo;
 class PHINode;
 class SelectInst;
 class TargetLibraryInfo;
@@ -56,23 +55,20 @@ class BasicAAResult : public AAResultBase<BasicAAResult> {
   const TargetLibraryInfo &TLI;
   AssumptionCache &AC;
   DominatorTree *DT;
-  LoopInfo *LI;
   PhiValues *PV;
 
 public:
   BasicAAResult(const DataLayout &DL, const Function &F,
                 const TargetLibraryInfo &TLI, AssumptionCache &AC,
-                DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
-                PhiValues *PV = nullptr)
-      : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), PV(PV)
-        {}
+                DominatorTree *DT = nullptr, PhiValues *PV = nullptr)
+      : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), PV(PV) {}
 
   BasicAAResult(const BasicAAResult &Arg)
       : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC),
-        DT(Arg.DT),  LI(Arg.LI), PV(Arg.PV) {}
+        DT(Arg.DT), PV(Arg.PV) {}
   BasicAAResult(BasicAAResult &&Arg)
       : AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI),
-        AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {}
+        AC(Arg.AC), DT(Arg.DT), PV(Arg.PV) {}
 
   /// Handle invalidation events in the new pass manager.
   bool invalidate(Function &Fn, const PreservedAnalyses &PA,
@@ -120,14 +116,8 @@ private:
     // Context instruction to use when querying information about this index.
     const Instruction *CxtI;
 
-    bool operator==(const VariableGEPIndex &Other) const {
-      return V == Other.V && ZExtBits == Other.ZExtBits &&
-             SExtBits == Other.SExtBits && Scale == Other.Scale;
-    }
-
-    bool operator!=(const VariableGEPIndex &Other) const {
-      return !operator==(Other);
-    }
+    /// True if all operations in this expression are NSW.
+    bool IsNSW;
 
     void dump() const {
       print(dbgs());
@@ -152,6 +142,9 @@ private:
     SmallVector<VariableGEPIndex, 4> VarIndices;
     // Is GEP index scale compile-time constant.
     bool HasCompileTimeConstantScale;
+    // Are all operations inbounds GEPs or non-indexing operations?
+    // (None iff expression doesn't involve any geps)
+    Optional<bool> InBounds;
 
     void dump() const {
       print(dbgs());
@@ -159,15 +152,15 @@ private:
     }
     void print(raw_ostream &OS) const {
       OS << "(DecomposedGEP Base=" << Base->getName()
-	 << ", Offset=" << Offset
-	 << ", VarIndices=[";
+         << ", Offset=" << Offset
+         << ", VarIndices=[";
       for (size_t i = 0; i < VarIndices.size(); i++) {
-       if (i != 0)
-         OS << ", ";
-       VarIndices[i].print(OS);
+        if (i != 0)
+          OS << ", ";
+        VarIndices[i].print(OS);
       }
       OS << "], HasCompileTimeConstantScale=" << HasCompileTimeConstantScale
-	 << ")";
+         << ")";
     }
   };
 
@@ -190,12 +183,6 @@ private:
   /// Tracks instructions visited by pointsToConstantMemory.
   SmallPtrSet<const Value *, 16> Visited;
 
-  static const Value *
-  GetLinearExpression(const Value *V, APInt &Scale, APInt &Offset,
-                      unsigned &ZExtBits, unsigned &SExtBits,
-                      const DataLayout &DL, unsigned Depth, AssumptionCache *AC,
-                      DominatorTree *DT, bool &NSW, bool &NUW);
-
   static DecomposedGEP
   DecomposeGEPExpression(const Value *V, const DataLayout &DL,
                          AssumptionCache *AC, DominatorTree *DT);
@@ -224,29 +211,23 @@ private:
                           const SmallVectorImpl<VariableGEPIndex> &Src);
 
   AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size,
-                       const AAMDNodes &V1AAInfo, const Value *V2,
-                       LocationSize V2Size, const AAMDNodes &V2AAInfo,
+                       const Value *V2, LocationSize V2Size,
                        const Value *UnderlyingV1, const Value *UnderlyingV2,
                        AAQueryInfo &AAQI);
 
   AliasResult aliasPHI(const PHINode *PN, LocationSize PNSize,
-                       const AAMDNodes &PNAAInfo, const Value *V2,
-                       LocationSize V2Size, const AAMDNodes &V2AAInfo,
-                       AAQueryInfo &AAQI);
+                       const Value *V2, LocationSize V2Size, AAQueryInfo &AAQI);
 
   AliasResult aliasSelect(const SelectInst *SI, LocationSize SISize,
-                          const AAMDNodes &SIAAInfo, const Value *V2,
-                          LocationSize V2Size, const AAMDNodes &V2AAInfo,
+                          const Value *V2, LocationSize V2Size,
                           AAQueryInfo &AAQI);
 
   AliasResult aliasCheck(const Value *V1, LocationSize V1Size,
-                         const AAMDNodes &V1AATag, const Value *V2,
-                         LocationSize V2Size, const AAMDNodes &V2AATag,
+                         const Value *V2, LocationSize V2Size,
                          AAQueryInfo &AAQI);
 
   AliasResult aliasCheckRecursive(const Value *V1, LocationSize V1Size,
-                                  const AAMDNodes &V1AATag, const Value *V2,
-                                  LocationSize V2Size, const AAMDNodes &V2AATag,
+                                  const Value *V2, LocationSize V2Size,
                                   AAQueryInfo &AAQI, const Value *O1,
                                   const Value *O2);
 };
diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index c22787531117..f581b18bff17 100644
--- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -42,15 +42,20 @@
 #include <iterator>
 #include <limits>
 #include <list>
+#include <queue>
 #include <string>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
 #define DEBUG_TYPE "block-freq"
 
+namespace llvm {
 extern llvm::cl::opt<bool> CheckBFIUnknownBlockQueries;
 
-namespace llvm {
+extern llvm::cl::opt<bool> UseIterativeBFIInference;
+extern llvm::cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock;
+extern llvm::cl::opt<double> IterativeBFIPrecision;
 
 class BranchProbabilityInfo;
 class Function;
@@ -450,12 +455,6 @@ public:
   bool addToDist(Distribution &Dist, const LoopData *OuterLoop,
                  const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight);
 
-  LoopData &getLoopPackage(const BlockNode &Head) {
-    assert(Head.Index < Working.size());
-    assert(Working[Head.Index].isLoopHeader());
-    return *Working[Head.Index].Loop;
-  }
-
   /// Analyze irreducible SCCs.
   ///
   /// Separate irreducible SCCs from \c G, which is an explicit graph of \c
@@ -968,6 +967,45 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
     return bfi_detail::getBlockName(getBlock(Node));
   }
 
+  /// The current implementation for computing relative block frequencies does
+  /// not handle correctly control-flow graphs containing irreducible loops. To
+  /// resolve the problem, we apply a post-processing step, which iteratively
+  /// updates block frequencies based on the frequencies of their predesessors.
+  /// This corresponds to finding the stationary point of the Markov chain by
+  /// an iterative method aka "PageRank computation".
+  /// The algorithm takes at most O(|E| * IterativeBFIMaxIterations) steps but
+  /// typically converges faster.
+  ///
+  /// Decide whether we want to apply iterative inference for a given function.
+  bool needIterativeInference() const;
+
+  /// Apply an iterative post-processing to infer correct counts for irr loops.
+  void applyIterativeInference();
+
+  using ProbMatrixType = std::vector<std::vector<std::pair<size_t, Scaled64>>>;
+
+  /// Run iterative inference for a probability matrix and initial frequencies.
+  void iterativeInference(const ProbMatrixType &ProbMatrix,
+                          std::vector<Scaled64> &Freq) const;
+
+  /// Find all blocks to apply inference on, that is, reachable from the entry
+  /// and backward reachable from exists along edges with positive probability.
+  void findReachableBlocks(std::vector<const BlockT *> &Blocks) const;
+
+  /// Build a matrix of probabilities with transitions (edges) between the
+  /// blocks: ProbMatrix[I] holds pairs (J, P), where Pr[J -> I | J] = P
+  void initTransitionProbabilities(
+      const std::vector<const BlockT *> &Blocks,
+      const DenseMap<const BlockT *, size_t> &BlockIndex,
+      ProbMatrixType &ProbMatrix) const;
+
+#ifndef NDEBUG
+  /// Compute the discrepancy between current block frequencies and the
+  /// probability matrix.
+  Scaled64 discrepancy(const ProbMatrixType &ProbMatrix,
+                       const std::vector<Scaled64> &Freq) const;
+#endif
+
 public:
   BlockFrequencyInfoImpl() = default;
 
@@ -1094,6 +1132,10 @@ void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F,
   computeMassInLoops();
   computeMassInFunction();
   unwrapLoops();
+  // Apply a post-processing step improving computed frequencies for functions
+  // with irreducible loops.
+  if (needIterativeInference())
+    applyIterativeInference();
   finalizeMetrics();
 
   if (CheckBFIUnknownBlockQueries) {
@@ -1314,6 +1356,294 @@ template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() {
   llvm_unreachable("unhandled irreducible control flow");
 }
 
+template <class BT>
+bool BlockFrequencyInfoImpl<BT>::needIterativeInference() const {
+  if (!UseIterativeBFIInference)
+    return false;
+  if (!F->getFunction().hasProfileData())
+    return false;
+  // Apply iterative inference only if the function contains irreducible loops;
+  // otherwise, computed block frequencies are reasonably correct.
+  for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) {
+    if (L->isIrreducible())
+      return true;
+  }
+  return false;
+}
+
+template <class BT> void BlockFrequencyInfoImpl<BT>::applyIterativeInference() {
+  // Extract blocks for processing: a block is considered for inference iff it
+  // can be reached from the entry by edges with a positive probability.
+  // Non-processed blocks are assigned with the zero frequency and are ignored
+  // in the computation
+  std::vector<const BlockT *> ReachableBlocks;
+  findReachableBlocks(ReachableBlocks);
+  if (ReachableBlocks.empty())
+    return;
+
+  // The map is used to to index successors/predecessors of reachable blocks in
+  // the ReachableBlocks vector
+  DenseMap<const BlockT *, size_t> BlockIndex;
+  // Extract initial frequencies for the reachable blocks
+  auto Freq = std::vector<Scaled64>(ReachableBlocks.size());
+  Scaled64 SumFreq;
+  for (size_t I = 0; I < ReachableBlocks.size(); I++) {
+    const BlockT *BB = ReachableBlocks[I];
+    BlockIndex[BB] = I;
+    Freq[I] = getFloatingBlockFreq(BB);
+    SumFreq += Freq[I];
+  }
+  assert(!SumFreq.isZero() && "empty initial block frequencies");
+
+  LLVM_DEBUG(dbgs() << "Applying iterative inference for " << F->getName()
+                    << " with " << ReachableBlocks.size() << " blocks\n");
+
+  // Normalizing frequencies so they sum up to 1.0
+  for (auto &Value : Freq) {
+    Value /= SumFreq;
+  }
+
+  // Setting up edge probabilities using sparse matrix representation:
+  // ProbMatrix[I] holds a vector of pairs (J, P) where Pr[J -> I | J] = P
+  ProbMatrixType ProbMatrix;
+  initTransitionProbabilities(ReachableBlocks, BlockIndex, ProbMatrix);
+
+  // Run the propagation
+  iterativeInference(ProbMatrix, Freq);
+
+  // Assign computed frequency values
+  for (const BlockT &BB : *F) {
+    auto Node = getNode(&BB);
+    if (!Node.isValid())
+      continue;
+    if (BlockIndex.count(&BB)) {
+      Freqs[Node.Index].Scaled = Freq[BlockIndex[&BB]];
+    } else {
+      Freqs[Node.Index].Scaled = Scaled64::getZero();
+    }
+  }
+}
+
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::iterativeInference(
+    const ProbMatrixType &ProbMatrix, std::vector<Scaled64> &Freq) const {
+  assert(0.0 < IterativeBFIPrecision && IterativeBFIPrecision < 1.0 &&
+         "incorrectly specified precision");
+  // Convert double precision to Scaled64
+  const auto Precision =
+      Scaled64::getInverse(static_cast<uint64_t>(1.0 / IterativeBFIPrecision));
+  const size_t MaxIterations = IterativeBFIMaxIterationsPerBlock * Freq.size();
+
+#ifndef NDEBUG
+  LLVM_DEBUG(dbgs() << "  Initial discrepancy = "
+                    << discrepancy(ProbMatrix, Freq).toString() << "\n");
+#endif
+
+  // Successors[I] holds unique sucessors of the I-th block
+  auto Successors = std::vector<std::vector<size_t>>(Freq.size());
+  for (size_t I = 0; I < Freq.size(); I++) {
+    for (auto &Jump : ProbMatrix[I]) {
+      Successors[Jump.first].push_back(I);
+    }
+  }
+
+  // To speedup computation, we maintain a set of "active" blocks whose
+  // frequencies need to be updated based on the incoming edges.
+  // The set is dynamic and changes after every update. Initially all blocks
+  // with a positive frequency are active
+  auto IsActive = std::vector<bool>(Freq.size(), false);
+  std::queue<size_t> ActiveSet;
+  for (size_t I = 0; I < Freq.size(); I++) {
+    if (Freq[I] > 0) {
+      ActiveSet.push(I);
+      IsActive[I] = true;
+    }
+  }
+
+  // Iterate over the blocks propagating frequencies
+  size_t It = 0;
+  while (It++ < MaxIterations && !ActiveSet.empty()) {
+    size_t I = ActiveSet.front();
+    ActiveSet.pop();
+    IsActive[I] = false;
+
+    // Compute a new frequency for the block: NewFreq := Freq \times ProbMatrix.
+    // A special care is taken for self-edges that needs to be scaled by
+    // (1.0 - SelfProb), where SelfProb is the sum of probabilities on the edges
+    Scaled64 NewFreq;
+    Scaled64 OneMinusSelfProb = Scaled64::getOne();
+    for (auto &Jump : ProbMatrix[I]) {
+      if (Jump.first == I) {
+        OneMinusSelfProb -= Jump.second;
+      } else {
+        NewFreq += Freq[Jump.first] * Jump.second;
+      }
+    }
+    if (OneMinusSelfProb != Scaled64::getOne())
+      NewFreq /= OneMinusSelfProb;
+
+    // If the block's frequency has changed enough, then
+    // make sure the block and its successors are in the active set
+    auto Change = Freq[I] >= NewFreq ? Freq[I] - NewFreq : NewFreq - Freq[I];
+    if (Change > Precision) {
+      ActiveSet.push(I);
+      IsActive[I] = true;
+      for (size_t Succ : Successors[I]) {
+        if (!IsActive[Succ]) {
+          ActiveSet.push(Succ);
+          IsActive[Succ] = true;
+        }
+      }
+    }
+
+    // Update the frequency for the block
+    Freq[I] = NewFreq;
+  }
+
+  LLVM_DEBUG(dbgs() << "  Completed " << It << " inference iterations"
+                    << format(" (%0.0f per block)", double(It) / Freq.size())
+                    << "\n");
+#ifndef NDEBUG
+  LLVM_DEBUG(dbgs() << "  Final   discrepancy = "
+                    << discrepancy(ProbMatrix, Freq).toString() << "\n");
+#endif
+}
+
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::findReachableBlocks(
+    std::vector<const BlockT *> &Blocks) const {
+  // Find all blocks to apply inference on, that is, reachable from the entry
+  // along edges with non-zero probablities
+  std::queue<const BlockT *> Queue;
+  std::unordered_set<const BlockT *> Reachable;
+  const BlockT *Entry = &F->front();
+  Queue.push(Entry);
+  Reachable.insert(Entry);
+  while (!Queue.empty()) {
+    const BlockT *SrcBB = Queue.front();
+    Queue.pop();
+    for (const BlockT *DstBB : children<const BlockT *>(SrcBB)) {
+      auto EP = BPI->getEdgeProbability(SrcBB, DstBB);
+      if (EP.isZero())
+        continue;
+      if (Reachable.find(DstBB) == Reachable.end()) {
+        Queue.push(DstBB);
+        Reachable.insert(DstBB);
+      }
+    }
+  }
+
+  // Find all blocks to apply inference on, that is, backward reachable from
+  // the entry along (backward) edges with non-zero probablities
+  std::unordered_set<const BlockT *> InverseReachable;
+  for (const BlockT &BB : *F) {
+    // An exit block is a block without any successors
+    bool HasSucc = GraphTraits<const BlockT *>::child_begin(&BB) !=
+                   GraphTraits<const BlockT *>::child_end(&BB);
+    if (!HasSucc && Reachable.count(&BB)) {
+      Queue.push(&BB);
+      InverseReachable.insert(&BB);
+    }
+  }
+  while (!Queue.empty()) {
+    const BlockT *SrcBB = Queue.front();
+    Queue.pop();
+    for (const BlockT *DstBB : children<Inverse<const BlockT *>>(SrcBB)) {
+      auto EP = BPI->getEdgeProbability(DstBB, SrcBB);
+      if (EP.isZero())
+        continue;
+      if (InverseReachable.find(DstBB) == InverseReachable.end()) {
+        Queue.push(DstBB);
+        InverseReachable.insert(DstBB);
+      }
+    }
+  }
+
+  // Collect the result
+  Blocks.reserve(F->size());
+  for (const BlockT &BB : *F) {
+    if (Reachable.count(&BB) && InverseReachable.count(&BB)) {
+      Blocks.push_back(&BB);
+    }
+  }
+}
+
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::initTransitionProbabilities(
+    const std::vector<const BlockT *> &Blocks,
+    const DenseMap<const BlockT *, size_t> &BlockIndex,
+    ProbMatrixType &ProbMatrix) const {
+  const size_t NumBlocks = Blocks.size();
+  auto Succs = std::vector<std::vector<std::pair<size_t, Scaled64>>>(NumBlocks);
+  auto SumProb = std::vector<Scaled64>(NumBlocks);
+
+  // Find unique successors and corresponding probabilities for every block
+  for (size_t Src = 0; Src < NumBlocks; Src++) {
+    const BlockT *BB = Blocks[Src];
+    std::unordered_set<const BlockT *> UniqueSuccs;
+    for (const auto SI : children<const BlockT *>(BB)) {
+      // Ignore cold blocks
+      if (BlockIndex.find(SI) == BlockIndex.end())
+        continue;
+      // Ignore parallel edges between BB and SI blocks
+      if (UniqueSuccs.find(SI) != UniqueSuccs.end())
+        continue;
+      UniqueSuccs.insert(SI);
+      // Ignore jumps with zero probability
+      auto EP = BPI->getEdgeProbability(BB, SI);
+      if (EP.isZero())
+        continue;
+
+      auto EdgeProb =
+          Scaled64::getFraction(EP.getNumerator(), EP.getDenominator());
+      size_t Dst = BlockIndex.find(SI)->second;
+      Succs[Src].push_back(std::make_pair(Dst, EdgeProb));
+      SumProb[Src] += EdgeProb;
+    }
+  }
+
+  // Add transitions for every jump with positive branch probability
+  ProbMatrix = ProbMatrixType(NumBlocks);
+  for (size_t Src = 0; Src < NumBlocks; Src++) {
+    // Ignore blocks w/o successors
+    if (Succs[Src].empty())
+      continue;
+
+    assert(!SumProb[Src].isZero() && "Zero sum probability of non-exit block");
+    for (auto &Jump : Succs[Src]) {
+      size_t Dst = Jump.first;
+      Scaled64 Prob = Jump.second;
+      ProbMatrix[Dst].push_back(std::make_pair(Src, Prob / SumProb[Src]));
+    }
+  }
+
+  // Add transitions from sinks to the source
+  size_t EntryIdx = BlockIndex.find(&F->front())->second;
+  for (size_t Src = 0; Src < NumBlocks; Src++) {
+    if (Succs[Src].empty()) {
+      ProbMatrix[EntryIdx].push_back(std::make_pair(Src, Scaled64::getOne()));
+    }
+  }
+}
+
+#ifndef NDEBUG
+template <class BT>
+BlockFrequencyInfoImplBase::Scaled64 BlockFrequencyInfoImpl<BT>::discrepancy(
+    const ProbMatrixType &ProbMatrix, const std::vector<Scaled64> &Freq) const {
+  assert(Freq[0] > 0 && "Incorrectly computed frequency of the entry block");
+  Scaled64 Discrepancy;
+  for (size_t I = 0; I < ProbMatrix.size(); I++) {
+    Scaled64 Sum;
+    for (const auto &Jump : ProbMatrix[I]) {
+      Sum += Freq[Jump.first] * Jump.second;
+    }
+    Discrepancy += Freq[I] >= Sum ? Freq[I] - Sum : Sum - Freq[I];
+  }
+  // Normalizing by the frequency of the entry block
+  return Discrepancy / Freq[0];
+}
+#endif
+
 /// \note This should be a lambda, but that crashes GCC 4.7.
 namespace bfi_detail {
 
diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index 6a286236a80e..e2099eba0f65 100644
--- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -168,12 +168,6 @@ public:
   /// as having a relative probability >= 80%.
   bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const;
 
-  /// Retrieve the hot successor of a block if one exists.
-  ///
-  /// Given a basic block, look through its successors and if one exists for
-  /// which \see isEdgeHot would return true, return that successor block.
-  const BasicBlock *getHotSucc(const BasicBlock *BB) const;
-
   /// Print an edge's probability.
   ///
   /// Retrieves an edge's probability similarly to \see getEdgeProbability, but
diff --git a/llvm/include/llvm/Analysis/CFG.h b/llvm/include/llvm/Analysis/CFG.h
index a36ceb484f14..b90258f8efff 100644
--- a/llvm/include/llvm/Analysis/CFG.h
+++ b/llvm/include/llvm/Analysis/CFG.h
@@ -77,21 +77,10 @@ bool isPotentiallyReachable(
 /// Determine whether there is a path from From to To within a single function.
 /// Returns false only if we can prove that once 'From' has been reached then
 /// 'To' can not be executed. Conservatively returns true.
-bool isPotentiallyReachable(const BasicBlock *From, const BasicBlock *To,
-                            const DominatorTree *DT = nullptr,
-                            const LoopInfo *LI = nullptr);
-
-/// Determine whether there is at least one path from a block in
-/// 'Worklist' to 'StopBB', returning true if uncertain.
-///
-/// Determine whether there is a path from at least one block in Worklist to
-/// StopBB within a single function. Returns false only if we can prove that
-/// once any block in 'Worklist' has been reached then 'StopBB' can not be
-/// executed. Conservatively returns true.
-bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist,
-                                    BasicBlock *StopBB,
-                                    const DominatorTree *DT = nullptr,
-                                    const LoopInfo *LI = nullptr);
+bool isPotentiallyReachable(
+    const BasicBlock *From, const BasicBlock *To,
+    const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
+    const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
 
 /// Determine whether there is at least one path from a block in
 /// 'Worklist' to 'StopBB' without passing through any blocks in
diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h
index 53700798b6b3..c0cabceb4a54 100644
--- a/llvm/include/llvm/Analysis/CFGPrinter.h
+++ b/llvm/include/llvm/Analysis/CFGPrinter.h
@@ -72,15 +72,15 @@ public:
     RawWeights = !!BFI;  // Print RawWeights when BFI is available.
   }
 
-  const BlockFrequencyInfo *getBFI() { return BFI; }
+  const BlockFrequencyInfo *getBFI() const { return BFI; }
 
-  const BranchProbabilityInfo *getBPI() { return BPI; }
+  const BranchProbabilityInfo *getBPI() const { return BPI; }
 
-  const Function *getFunction() { return this->F; }
+  const Function *getFunction() const { return this->F; }
 
-  uint64_t getMaxFreq() { return MaxFreq; }
+  uint64_t getMaxFreq() const { return MaxFreq; }
 
-  uint64_t getFreq(const BasicBlock *BB) {
+  uint64_t getFreq(const BasicBlock *BB) const {
     return BFI->getBlockFreq(BB).getFrequency();
   }
 
@@ -123,7 +123,7 @@ template <>
 struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits {
 
   // Cache for is hidden property
-  llvm::DenseMap<const BasicBlock *, bool> isHiddenBasicBlock;
+  llvm::DenseMap<const BasicBlock *, bool> isOnDeoptOrUnreachablePath;
 
   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
 
@@ -296,7 +296,7 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits {
     return Attrs;
   }
   bool isNodeHidden(const BasicBlock *Node, const DOTFuncInfo *CFGInfo);
-  void computeHiddenNodes(const Function *F);
+  void computeDeoptOrUnreachablePaths(const Function *F);
 };
 } // End llvm namespace
 
diff --git a/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index 135321616b7c..ec05b3706ca3 100644
--- a/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -73,7 +73,7 @@ public:
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
                     AAQueryInfo &AAQI) {
     if (LocA.Ptr == LocB.Ptr)
-      return MustAlias;
+      return AliasResult::MustAlias;
 
     // Comparisons between global variables and other constants should be
     // handled by BasicAA.
@@ -84,7 +84,7 @@ public:
       return AAResultBase::alias(LocA, LocB, AAQI);
 
     AliasResult QueryResult = query(LocA, LocB);
-    if (QueryResult == MayAlias)
+    if (QueryResult == AliasResult::MayAlias)
       return AAResultBase::alias(LocA, LocB, AAQI);
 
     return QueryResult;
diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h
index 985424a74054..e361cccef960 100644
--- a/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -373,12 +373,12 @@ private:
 /// templated adaptor.
 template <typename CGSCCPassT>
 ModuleToPostOrderCGSCCPassAdaptor
-createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) {
+createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass) {
   using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
                                        PreservedAnalyses, CGSCCAnalysisManager,
                                        LazyCallGraph &, CGSCCUpdateResult &>;
   return ModuleToPostOrderCGSCCPassAdaptor(
-      std::make_unique<PassModelT>(std::move(Pass)));
+      std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)));
 }
 
 /// A proxy from a \c FunctionAnalysisManager to an \c SCC.
@@ -491,12 +491,12 @@ private:
 /// templated adaptor.
 template <typename FunctionPassT>
 CGSCCToFunctionPassAdaptor
-createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) {
+createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass) {
   using PassModelT =
       detail::PassModel<Function, FunctionPassT, PreservedAnalyses,
                         FunctionAnalysisManager>;
   return CGSCCToFunctionPassAdaptor(
-      std::make_unique<PassModelT>(std::move(Pass)));
+      std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass)));
 }
 
 /// A helper that repeats an SCC pass each time an indirect call is refined to
@@ -536,13 +536,14 @@ private:
 /// A function to deduce a function pass type and wrap it in the
 /// templated adaptor.
 template <typename CGSCCPassT>
-DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT Pass,
+DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT &&Pass,
                                                   int MaxIterations) {
   using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
                                        PreservedAnalyses, CGSCCAnalysisManager,
                                        LazyCallGraph &, CGSCCUpdateResult &>;
-  return DevirtSCCRepeatedPass(std::make_unique<PassModelT>(std::move(Pass)),
-                               MaxIterations);
+  return DevirtSCCRepeatedPass(
+      std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)),
+      MaxIterations);
 }
 
 // Clear out the debug logging macro.
diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index ef6e66b2b88e..62742fdf9a91 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -136,7 +136,9 @@ Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &
 /// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
 /// getelementptr constantexpr, return the constant value being addressed by the
 /// constant expression, or null if something is funny and we can't decide.
-Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE);
+Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE,
+                                                 Type *Ty,
+                                                 const DataLayout &DL);
 
 /// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
 /// indices (with an *implied* zero pointer index that is not in the list),
diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h
index 83c1fb4485fd..d5b8f208172b 100644
--- a/llvm/include/llvm/Analysis/ConstraintSystem.h
+++ b/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -30,9 +30,6 @@ class ConstraintSystem {
   // Eliminate constraints from the system using Fourier–Motzkin elimination.
   bool eliminateUsingFM();
 
-  /// Print the constraints in the system, using \p Names as variable names.
-  void dump(ArrayRef<std::string> Names) const;
-
   /// Print the constraints in the system, using x0...xn as variable names.
   void dump() const;
 
@@ -82,6 +79,9 @@ public:
 
   /// Returns the number of rows in the constraint system.
   unsigned size() const { return Constraints.size(); }
+
+  /// Print the constraints in the system, using \p Names as variable names.
+  void dump(ArrayRef<std::string> Names) const;
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h
index e3bef33e55c3..51dd4a738f00 100644
--- a/llvm/include/llvm/Analysis/DDG.h
+++ b/llvm/include/llvm/Analysis/DDG.h
@@ -275,7 +275,7 @@ public:
   virtual ~DependenceGraphInfo() {}
 
   /// Return the label that is used to name this graph.
-  const StringRef getName() const { return Name; }
+  StringRef getName() const { return Name; }
 
   /// Return the root node of the graph.
   NodeType &getRoot() const {
@@ -293,8 +293,8 @@ public:
   /// Return a string representing the type of dependence that the dependence
   /// analysis identified between the two given nodes. This function assumes
   /// that there is a memory dependence between the given two nodes.
-  const std::string getDependenceString(const NodeType &Src,
-                                        const NodeType &Dst) const;
+  std::string getDependenceString(const NodeType &Src,
+                                  const NodeType &Dst) const;
 
 protected:
   // Name of the graph.
@@ -470,7 +470,7 @@ bool DependenceGraphInfo<NodeType>::getDependencies(
 }
 
 template <typename NodeType>
-const std::string
+std::string
 DependenceGraphInfo<NodeType>::getDependenceString(const NodeType &Src,
                                                    const NodeType &Dst) const {
   std::string Str;
diff --git a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
index ecf54cd8a680..59737744f576 100644
--- a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -97,7 +97,7 @@ public:
 
     errs() << "Writing '" << Filename << "'...";
 
-    raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+    raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF);
     std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
     std::string Title = GraphName + " for '" + F.getName().str() + "' function";
 
@@ -160,7 +160,7 @@ public:
 
     errs() << "Writing '" << Filename << "'...";
 
-    raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+    raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF);
     std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph);
 
     if (!EC)
diff --git a/llvm/include/llvm/Analysis/DemandedBits.h b/llvm/include/llvm/Analysis/DemandedBits.h
index 7a8618a27ce7..5a68fcbebfea 100644
--- a/llvm/include/llvm/Analysis/DemandedBits.h
+++ b/llvm/include/llvm/Analysis/DemandedBits.h
@@ -18,8 +18,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_DEMANDED_BITS_H
-#define LLVM_ANALYSIS_DEMANDED_BITS_H
+#ifndef LLVM_ANALYSIS_DEMANDEDBITS_H
+#define LLVM_ANALYSIS_DEMANDEDBITS_H
 
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
@@ -53,6 +53,9 @@ public:
   /// accepted, but will always produce a mask with all bits set.
   APInt getDemandedBits(Instruction *I);
 
+  /// Return the bits demanded from use U.
+  APInt getDemandedBits(Use *U);
+
   /// Return true if, during analysis, I could not be reached.
   bool isInstructionDead(Instruction *I);
 
@@ -146,4 +149,4 @@ FunctionPass *createDemandedBitsWrapperPass();
 
 } // end namespace llvm
 
-#endif // LLVM_ANALYSIS_DEMANDED_BITS_H
+#endif // LLVM_ANALYSIS_DEMANDEDBITS_H
diff --git a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
index 6f4e1be94164..332829cbc8a9 100644
--- a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
+++ b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
-#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+#ifndef LLVM_ANALYSIS_DEPENDENCEGRAPHBUILDER_H
+#define LLVM_ANALYSIS_DEPENDENCEGRAPHBUILDER_H
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/EquivalenceClasses.h"
@@ -200,4 +200,4 @@ protected:
 
 } // namespace llvm
 
-#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+#endif // LLVM_ANALYSIS_DEPENDENCEGRAPHBUILDER_H
diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
index 2e4ae65d0981..6f759a81fdef 100644
--- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
-#define LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
+#ifndef LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
+#define LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
 
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/Analysis/SyncDependenceAnalysis.h"
@@ -34,7 +34,7 @@ class TargetTransformInfo;
 /// This analysis propagates divergence in a data-parallel context from sources
 /// of divergence to all users. It requires reducible CFGs. All assignments
 /// should be in SSA form.
-class DivergenceAnalysis {
+class DivergenceAnalysisImpl {
 public:
   /// \brief This instance will analyze the whole function \p F or the loop \p
   /// RegionLoop.
@@ -43,9 +43,9 @@ public:
   /// Otherwise the whole function is analyzed.
   /// \param IsLCSSAForm whether the analysis may assume that the IR in the
   /// region in in LCSSA form.
-  DivergenceAnalysis(const Function &F, const Loop *RegionLoop,
-                     const DominatorTree &DT, const LoopInfo &LI,
-                     SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
+  DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop,
+                         const DominatorTree &DT, const LoopInfo &LI,
+                         SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
 
   /// \brief The loop that defines the analyzed region (if any).
   const Loop *getRegionLoop() const { return RegionLoop; }
@@ -82,8 +82,6 @@ public:
   /// divergent.
   bool isDivergentUse(const Use &U) const;
 
-  void print(raw_ostream &OS, const Module *) const;
-
 private:
   /// \brief Mark \p Term as divergent and push all Instructions that become
   /// divergent as a result on the worklist.
@@ -114,13 +112,6 @@ private:
   bool isTemporalDivergent(const BasicBlock &ObservingBlock,
                            const Value &Val) const;
 
-  /// \brief Whether \p Block is join divergent
-  ///
-  /// (see markBlockJoinDivergent).
-  bool isJoinDivergent(const BasicBlock &Block) const {
-    return DivergentJoinBlocks.contains(&Block);
-  }
-
 private:
   const Function &F;
   // If regionLoop != nullptr, analysis is only performed within \p RegionLoop.
@@ -142,9 +133,6 @@ private:
   // Set of known-uniform values.
   DenseSet<const Value *> UniformOverrides;
 
-  // Blocks with joining divergent control from different predecessors.
-  DenseSet<const BasicBlock *> DivergentJoinBlocks; // FIXME Deprecated
-
   // Detected/marked divergent values.
   DenseSet<const Value *> DivergentValues;
 
@@ -152,28 +140,39 @@ private:
   std::vector<const Instruction *> Worklist;
 };
 
-/// \brief Divergence analysis frontend for GPU kernels.
-class GPUDivergenceAnalysis {
-  SyncDependenceAnalysis SDA;
-  DivergenceAnalysis DA;
+class DivergenceInfo {
+  Function &F;
+
+  // If the function contains an irreducible region the divergence
+  // analysis can run indefinitely. We set ContainsIrreducible and no
+  // analysis is actually performed on the function. All values in
+  // this function are conservatively reported as divergent instead.
+  bool ContainsIrreducible;
+  std::unique_ptr<SyncDependenceAnalysis> SDA;
+  std::unique_ptr<DivergenceAnalysisImpl> DA;
 
 public:
-  /// Runs the divergence analysis on @F, a GPU kernel
-  GPUDivergenceAnalysis(Function &F, const DominatorTree &DT,
-                        const PostDominatorTree &PDT, const LoopInfo &LI,
-                        const TargetTransformInfo &TTI);
+  DivergenceInfo(Function &F, const DominatorTree &DT,
+                 const PostDominatorTree &PDT, const LoopInfo &LI,
+                 const TargetTransformInfo &TTI, bool KnownReducible);
 
   /// Whether any divergence was detected.
-  bool hasDivergence() const { return DA.hasDetectedDivergence(); }
+  bool hasDivergence() const {
+    return ContainsIrreducible || DA->hasDetectedDivergence();
+  }
 
   /// The GPU kernel this analysis result is for
-  const Function &getFunction() const { return DA.getFunction(); }
+  const Function &getFunction() const { return F; }
 
   /// Whether \p V is divergent at its definition.
-  bool isDivergent(const Value &V) const;
+  bool isDivergent(const Value &V) const {
+    return ContainsIrreducible || DA->isDivergent(V);
+  }
 
   /// Whether \p U is divergent. Uses of a uniform value can be divergent.
-  bool isDivergentUse(const Use &U) const;
+  bool isDivergentUse(const Use &U) const {
+    return ContainsIrreducible || DA->isDivergentUse(U);
+  }
 
   /// Whether \p V is uniform/non-divergent.
   bool isUniform(const Value &V) const { return !isDivergent(V); }
@@ -181,11 +180,32 @@ public:
   /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
   /// divergent.
   bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
+};
+
+/// \brief Divergence analysis frontend for GPU kernels.
+class DivergenceAnalysis : public AnalysisInfoMixin<DivergenceAnalysis> {
+  friend AnalysisInfoMixin<DivergenceAnalysis>;
+
+  static AnalysisKey Key;
+
+public:
+  using Result = DivergenceInfo;
 
-  /// Print all divergent values in the kernel.
-  void print(raw_ostream &OS, const Module *) const;
+  /// Runs the divergence analysis on @F, a GPU kernel
+  Result run(Function &F, FunctionAnalysisManager &AM);
 };
 
+/// Printer pass to dump divergence analysis results.
+struct DivergenceAnalysisPrinterPass
+    : public PassInfoMixin<DivergenceAnalysisPrinterPass> {
+  DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+
+private:
+  raw_ostream &OS;
+}; // class DivergenceAnalysisPrinterPass
+
 } // namespace llvm
 
-#endif // LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
+#endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
index a5f96e72ce97..cf07c873b17c 100644
--- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
+++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_FUNCTIONPROPERTIESANALYSIS_H_
-#define LLVM_FUNCTIONPROPERTIESANALYSIS_H_
+#ifndef LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
+#define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
 
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/PassManager.h"
@@ -83,4 +83,4 @@ public:
 };
 
 } // namespace llvm
-#endif // LLVM_FUNCTIONPROPERTIESANALYSIS_H_
+#endif // LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 9e97541e542b..b623b9ca58d8 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -654,12 +654,6 @@ public:
   IRSimilarityIdentifier()
       : Mapper(&InstDataAllocator, &InstDataListAllocator) {}
 
-  /// \param M the module to find similarity in.
-  explicit IRSimilarityIdentifier(Module &M)
-      : Mapper(&InstDataAllocator, &InstDataListAllocator) {
-    findSimilarity(M);
-  }
-
 private:
   /// Map the instructions in the module to unsigned integers, using mapping
   /// already present in the Mapper if possible.
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 28546110ba04..82e1b14960bd 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -14,6 +14,7 @@
 #define LLVM_ANALYSIS_IVDESCRIPTORS_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -68,29 +69,31 @@ public:
   RecurrenceDescriptor() = default;
 
   RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K,
-                       FastMathFlags FMF, Instruction *UAI, Type *RT,
-                       bool Signed, SmallPtrSetImpl<Instruction *> &CI)
+                       FastMathFlags FMF, Instruction *ExactFP, Type *RT,
+                       bool Signed, bool Ordered,
+                       SmallPtrSetImpl<Instruction *> &CI)
       : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
-        UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
+        ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed),
+        IsOrdered(Ordered) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
   /// This POD struct holds information about a potential recurrence operation.
   class InstDesc {
   public:
-    InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
+    InstDesc(bool IsRecur, Instruction *I, Instruction *ExactFP = nullptr)
         : IsRecurrence(IsRecur), PatternLastInst(I),
-          RecKind(RecurKind::None), UnsafeAlgebraInst(UAI) {}
+          RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
 
-    InstDesc(Instruction *I, RecurKind K, Instruction *UAI = nullptr)
+    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
         : IsRecurrence(true), PatternLastInst(I), RecKind(K),
-          UnsafeAlgebraInst(UAI) {}
+          ExactFPMathInst(ExactFP) {}
 
     bool isRecurrence() const { return IsRecurrence; }
 
-    bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; }
+    bool needsExactFPMath() const { return ExactFPMathInst != nullptr; }
 
-    Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; }
+    Instruction *getExactFPMathInst() const { return ExactFPMathInst; }
 
     RecurKind getRecKind() const { return RecKind; }
 
@@ -104,8 +107,8 @@ public:
     Instruction *PatternLastInst;
     // If this is a min/max pattern.
     RecurKind RecKind;
-    // Recurrence has unsafe algebra.
-    Instruction *UnsafeAlgebraInst;
+    // Recurrence does not allow floating-point reassociation.
+    Instruction *ExactFPMathInst;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -114,7 +117,7 @@ public:
   /// compare instruction to the select instruction and stores this pointer in
   /// 'PatternLastInst' member of the returned struct.
   static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind,
-                                    InstDesc &Prev, bool HasFunNoNaNAttr);
+                                    InstDesc &Prev, FastMathFlags FMF);
 
   /// Returns true if instruction I has multiple uses in Insts
   static bool hasMultipleUsesOf(Instruction *I,
@@ -136,7 +139,8 @@ public:
   static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I);
 
   /// Returns identity corresponding to the RecurrenceKind.
-  static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp);
+  static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp,
+                                         FastMathFlags FMF);
 
   /// Returns the opcode corresponding to the RecurrenceKind.
   static unsigned getOpcode(RecurKind Kind);
@@ -146,7 +150,7 @@ public:
   /// non-null, the minimal bit width needed to compute the reduction will be
   /// computed.
   static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
-                              bool HasFunNoNaNAttr,
+                              FastMathFlags FMF,
                               RecurrenceDescriptor &RedDes,
                               DemandedBits *DB = nullptr,
                               AssumptionCache *AC = nullptr,
@@ -171,7 +175,7 @@ public:
   /// to handle Phi as a first-order recurrence.
   static bool
   isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
-                         DenseMap<Instruction *, Instruction *> &SinkAfter,
+                         MapVector<Instruction *, Instruction *> &SinkAfter,
                          DominatorTree *DT);
 
   RecurKind getRecurrenceKind() const { return Kind; }
@@ -184,12 +188,12 @@ public:
 
   Instruction *getLoopExitInstr() const { return LoopExitInstr; }
 
-  /// Returns true if the recurrence has unsafe algebra which requires a relaxed
-  /// floating-point model.
-  bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; }
+  /// Returns true if the recurrence has floating-point math that requires
+  /// precise (ordered) operations.
+  bool hasExactFPMath() const { return ExactFPMathInst != nullptr; }
 
-  /// Returns first unsafe algebra instruction in the PHI node's use-chain.
-  Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; }
+  /// Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
+  Instruction *getExactFPMathInst() const { return ExactFPMathInst; }
 
   /// Returns true if the recurrence kind is an integer kind.
   static bool isIntegerRecurrenceKind(RecurKind Kind);
@@ -227,6 +231,9 @@ public:
   /// Returns true if all source operands of the recurrence are SExtInsts.
   bool isSigned() const { return IsSigned; }
 
+  /// Expose an ordered FP reduction to the instance users.
+  bool isOrdered() const { return IsOrdered; }
+
   /// Attempts to find a chain of operations from Phi to LoopExitInst that can
   /// be treated as a set of reductions instructions for in-loop reductions.
   SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi,
@@ -243,12 +250,16 @@ private:
   // The fast-math flags on the recurrent instructions.  We propagate these
   // fast-math flags into the vectorized FP instructions we generate.
   FastMathFlags FMF;
-  // First occurrence of unasfe algebra in the PHI's use-chain.
-  Instruction *UnsafeAlgebraInst = nullptr;
+  // First instance of non-reassociative floating-point in the PHI's use-chain.
+  Instruction *ExactFPMathInst = nullptr;
   // The type of the recurrence.
   Type *RecurrenceType = nullptr;
   // True if all source operands of the recurrence are SExtInsts.
   bool IsSigned = false;
+  // True if this recurrence can be treated as an in-order reduction.
+  // Currently only a non-reassociative FAdd can be considered in-order,
+  // if it is also the only FAdd in the PHI's use chain.
+  bool IsOrdered = false;
   // Instructions used for type-promoting the recurrence.
   SmallPtrSet<Instruction *, 8> CastInsts;
 };
@@ -302,23 +313,14 @@ public:
                              PredicatedScalarEvolution &PSE,
                              InductionDescriptor &D, bool Assume = false);
 
-  /// Returns true if the induction type is FP and the binary operator does
-  /// not have the "fast-math" property. Such operation requires a relaxed FP
-  /// mode.
-  bool hasUnsafeAlgebra() {
-    return (IK == IK_FpInduction) && InductionBinOp &&
-           !cast<FPMathOperator>(InductionBinOp)->isFast();
-  }
-
-  /// Returns induction operator that does not have "fast-math" property
-  /// and requires FP unsafe mode.
-  Instruction *getUnsafeAlgebraInst() {
-    if (IK != IK_FpInduction)
-      return nullptr;
-
-    if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast())
-      return nullptr;
-    return InductionBinOp;
+  /// Returns floating-point induction operator that does not allow
+  /// reassociation (transforming the induction requires an override of normal
+  /// floating-point rules).
+  Instruction *getExactFPMathInst() {
+    if (IK == IK_FpInduction && InductionBinOp &&
+        !InductionBinOp->hasAllowReassoc())
+      return InductionBinOp;
+    return nullptr;
   }
 
   /// Returns binary opcode of the induction operator.
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index c39fae13d3b8..c27aaf0db8f2 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -6,13 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-#ifndef LLVM_INLINEADVISOR_H_
-#define LLVM_INLINEADVISOR_H_
+#ifndef LLVM_ANALYSIS_INLINEADVISOR_H
+#define LLVM_ANALYSIS_INLINEADVISOR_H
 
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/PassManager.h"
-#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
 #include <memory>
 #include <unordered_set>
 
@@ -36,11 +36,7 @@ class OptimizationRemarkEmitter;
 /// requires the full C Tensorflow API library, and evaluates models
 /// dynamically. This mode also permits generating training logs, for offline
 /// training.
-enum class InliningAdvisorMode : int {
-  Default,
-  Release,
-  Development
-};
+enum class InliningAdvisorMode : int { Default, Release, Development };
 
 class InlineAdvisor;
 /// Capture state between an inlining decision having had been made, and
@@ -283,4 +279,4 @@ void setInlineRemark(CallBase &CB, StringRef Message);
 /// Utility for extracting the inline cost message to a string.
 std::string inlineCostStr(const InlineCost &IC);
 } // namespace llvm
-#endif // LLVM_INLINEADVISOR_H_
+#endif // LLVM_ANALYSIS_INLINEADVISOR_H
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index 7f04a8ce8f5f..4e1b28d4633f 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/InlineModelFeatureMaps.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include <cassert>
 #include <climits>
@@ -43,7 +44,7 @@ const int OptAggressiveThreshold = 250;
 // Various magic constants used to adjust heuristics.
 const int InstrCost = 5;
 const int IndirectCallThreshold = 100;
-const int CallPenalty = 25;
+const int LoopPenalty = 25;
 const int LastCallToStaticBonus = 15000;
 const int ColdccPenalty = 2000;
 /// Do not inline functions which allocate this many bytes on the stack
@@ -54,6 +55,20 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024;
 const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
 } // namespace InlineConstants
 
+// The cost-benefit pair computed by cost-benefit analysis.
+class CostBenefitPair {
+public:
+  CostBenefitPair(APInt Cost, APInt Benefit) : Cost(Cost), Benefit(Benefit) {}
+
+  const APInt &getCost() const { return Cost; }
+
+  const APInt &getBenefit() const { return Benefit; }
+
+private:
+  APInt Cost;
+  APInt Benefit;
+};
+
 /// Represents the cost of inlining a function.
 ///
 /// This supports special values for functions which should "always" or
@@ -76,9 +91,14 @@ class InlineCost {
   /// Must be set for Always and Never instances.
   const char *Reason = nullptr;
 
+  /// The cost-benefit pair computed by cost-benefit analysis.
+  Optional<CostBenefitPair> CostBenefit = None;
+
   // Trivial constructor, interesting logic in the factory functions below.
-  InlineCost(int Cost, int Threshold, const char *Reason = nullptr)
-      : Cost(Cost), Threshold(Threshold), Reason(Reason) {
+  InlineCost(int Cost, int Threshold, const char *Reason = nullptr,
+             Optional<CostBenefitPair> CostBenefit = None)
+      : Cost(Cost), Threshold(Threshold), Reason(Reason),
+        CostBenefit(CostBenefit) {
     assert((isVariable() || Reason) &&
            "Reason must be provided for Never or Always");
   }
@@ -89,11 +109,13 @@ public:
     assert(Cost < NeverInlineCost && "Cost crosses sentinel value");
     return InlineCost(Cost, Threshold);
   }
-  static InlineCost getAlways(const char *Reason) {
-    return InlineCost(AlwaysInlineCost, 0, Reason);
+  static InlineCost getAlways(const char *Reason,
+                              Optional<CostBenefitPair> CostBenefit = None) {
+    return InlineCost(AlwaysInlineCost, 0, Reason, CostBenefit);
   }
-  static InlineCost getNever(const char *Reason) {
-    return InlineCost(NeverInlineCost, 0, Reason);
+  static InlineCost getNever(const char *Reason,
+                             Optional<CostBenefitPair> CostBenefit = None) {
+    return InlineCost(NeverInlineCost, 0, Reason, CostBenefit);
   }
 
   /// Test whether the inline cost is low enough for inlining.
@@ -116,6 +138,9 @@ public:
     return Threshold;
   }
 
+  /// Get the cost-benefit pair which was computed by cost-benefit analysis
+  Optional<CostBenefitPair> getCostBenefit() const { return CostBenefit; }
+
   /// Get the reason of Always or Never.
   const char *getReason() const {
     assert((Reason || isVariable()) &&
@@ -270,6 +295,15 @@ Optional<int> getInliningCostEstimate(
     ProfileSummaryInfo *PSI = nullptr,
     OptimizationRemarkEmitter *ORE = nullptr);
 
+/// Get the expanded cost features. The features are returned unconditionally,
+/// even if inlining is impossible.
+Optional<InlineCostFeatures> getInliningCostFeatures(
+    CallBase &Call, TargetTransformInfo &CalleeTTI,
+    function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+    function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
+    ProfileSummaryInfo *PSI = nullptr,
+    OptimizationRemarkEmitter *ORE = nullptr);
+
 /// Minimal filter to detect invalid constructs for inlining.
 InlineResult isInlineViable(Function &Callee);
 
diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
index 8da442cc4a53..1afa8a825f15 100644
--- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
+++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
@@ -16,6 +16,61 @@
 
 namespace llvm {
 
+// List of cost features. A "cost" feature is a summand of the heuristic-based
+// inline cost, and we define them separately to preserve the original heuristic
+// behavior.
+#define INLINE_COST_FEATURE_ITERATOR(M)                                        \
+  M(SROASavings, "sroa_savings")                                               \
+  M(SROALosses, "sroa_losses")                                                 \
+  M(LoadElimination, "load_elimination")                                       \
+  M(CallPenalty, "call_penalty")                                               \
+  M(CallArgumentSetup, "call_argument_setup")                                  \
+  M(LoadRelativeIntrinsic, "load_relative_intrinsic")                          \
+  M(LoweredCallArgSetup, "lowered_call_arg_setup")                             \
+  M(IndirectCallPenalty, "indirect_call_penalty")                              \
+  M(JumpTablePenalty, "jump_table_penalty")                                    \
+  M(CaseClusterPenalty, "case_cluster_penalty")                                \
+  M(SwitchPenalty, "switch_penalty")                                           \
+  M(UnsimplifiedCommonInstructions, "unsimplified_common_instructions")        \
+  M(NumLoops, "num_loops")                                                     \
+  M(DeadBlocks, "dead_blocks")                                                 \
+  M(SimplifiedInstructions, "simplified_instructions")                         \
+  M(ConstantArgs, "constant_args")                                             \
+  M(ConstantOffsetPtrArgs, "constant_offset_ptr_args")                         \
+  M(CallSiteCost, "callsite_cost")                                             \
+  M(ColdCcPenalty, "cold_cc_penalty")                                          \
+  M(LastCallToStaticBonus, "last_call_to_static_bonus")                        \
+  M(IsMultipleBlocks, "is_multiple_blocks")                                    \
+  M(NestedInlines, "nested_inlines")                                           \
+  M(NestedInlineCostEstimate, "nested_inline_cost_estimate")                   \
+  M(Threshold, "threshold")
+
+// clang-format off
+enum class InlineCostFeatureIndex : size_t {
+#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME,
+  INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES)
+#undef POPULATE_INDICES
+
+  NumberOfFeatures
+};
+// clang-format on
+
+using InlineCostFeatures =
+    std::array<int,
+               static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures)>;
+
+constexpr bool isHeuristicInlineCostFeature(InlineCostFeatureIndex Feature) {
+  return Feature != InlineCostFeatureIndex::SROASavings &&
+         Feature != InlineCostFeatureIndex::IsMultipleBlocks &&
+         Feature != InlineCostFeatureIndex::DeadBlocks &&
+         Feature != InlineCostFeatureIndex::SimplifiedInstructions &&
+         Feature != InlineCostFeatureIndex::ConstantArgs &&
+         Feature != InlineCostFeatureIndex::ConstantOffsetPtrArgs &&
+         Feature != InlineCostFeatureIndex::NestedInlines &&
+         Feature != InlineCostFeatureIndex::NestedInlineCostEstimate &&
+         Feature != InlineCostFeatureIndex::Threshold;
+}
+
 // List of features. Each feature is defined through a triple:
 // - the name of an enum member, which will be the feature index
 // - a textual name, used for Tensorflow model binding (so it needs to match the
@@ -34,11 +89,10 @@ namespace llvm {
   M(NrCtantParams, "nr_ctant_params",                                          \
     "number of parameters in the call site that are constants")                \
   M(CostEstimate, "cost_estimate", "total cost estimate (threshold - free)")   \
-  M(EdgeCount, "edge_count",                                                   \
+  M(EdgeCount, "edge_count", "total number of calls in the module")            \
+  M(CallerUsers, "caller_users",                                               \
     "number of module-internal users of the caller, +1 if the caller is "      \
     "exposed externally")                                                      \
-  M(CallerUsers, "caller_users",                                               \
-    "number of blocks reached from a conditional instruction, in the caller")  \
   M(CallerConditionallyExecutedBlocks, "caller_conditionally_executed_blocks", \
     "number of blocks reached from a conditional instruction, in the caller")  \
   M(CallerBasicBlockCount, "caller_basic_block_count",                         \
@@ -46,14 +100,29 @@ namespace llvm {
   M(CalleeConditionallyExecutedBlocks, "callee_conditionally_executed_blocks", \
     "number of blocks reached from a conditional instruction, in the callee")  \
   M(CalleeUsers, "callee_users",                                               \
-    "number of blocks reached from a conditional instruction, in the callee")
+    "number of module-internal users of the callee, +1 if the callee is "      \
+    "exposed externally")
 
+// clang-format off
 enum class FeatureIndex : size_t {
+// InlineCost features - these must come first
+#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME,
+  INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES)
+#undef POPULATE_INDICES
+
+// Non-cost features
 #define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME,
   INLINE_FEATURE_ITERATOR(POPULATE_INDICES)
 #undef POPULATE_INDICES
-      NumberOfFeatures
+
+  NumberOfFeatures
 };
+// clang-format on
+
+constexpr FeatureIndex
+inlineCostFeatureToMlFeature(InlineCostFeatureIndex Feature) {
+  return static_cast<FeatureIndex>(static_cast<size_t>(Feature));
+}
 
 constexpr size_t NumberOfFeatures =
     static_cast<size_t>(FeatureIndex::NumberOfFeatures);
diff --git a/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h b/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
index 46bc974c4a7f..192630e62a54 100644
--- a/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
+++ b/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
@@ -80,6 +80,11 @@ public:
   /// It makes all necessary updates to internal caches to keep them consistent.
   void removeInstruction(const Instruction *Inst);
 
+  /// Notifies this tracking that we are going to replace all uses of \p Inst.
+  /// It makes all necessary updates to internal caches to keep them consistent.
+  /// Should typically be called before a RAUW.
+  void removeUsersOf(const Instruction *Inst);
+
   /// Invalidates all information from this tracking.
   void clear();
 };
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index 17d6f30a35cb..efaf1847276b 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -37,6 +37,7 @@
 
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 
 namespace llvm {
 
@@ -133,7 +134,9 @@ struct SimplifyQuery {
   bool isUndefValue(Value *V) const {
     if (!CanUseUndef)
       return false;
-    return isa<UndefValue>(V);
+
+    using namespace PatternMatch;
+    return match(V, m_Undef());
   }
 };
 
@@ -142,8 +145,7 @@ struct SimplifyQuery {
 // Please use the SimplifyQuery versions in new code.
 
 /// Given operand for an FNeg, fold the result or return null.
-Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF,
-                        const SimplifyQuery &Q);
+Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q);
 
 /// Given operands for an Add, fold the result or return null.
 Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
@@ -154,23 +156,34 @@ Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
                        const SimplifyQuery &Q);
 
 /// Given operands for an FAdd, fold the result or return null.
-Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF,
-                        const SimplifyQuery &Q);
+Value *
+SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+                 const SimplifyQuery &Q,
+                 fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+                 RoundingMode Rounding = RoundingMode::NearestTiesToEven);
 
 /// Given operands for an FSub, fold the result or return null.
-Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
-                        const SimplifyQuery &Q);
+Value *
+SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+                 const SimplifyQuery &Q,
+                 fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+                 RoundingMode Rounding = RoundingMode::NearestTiesToEven);
 
 /// Given operands for an FMul, fold the result or return null.
-Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
-                        const SimplifyQuery &Q);
+Value *
+SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+                 const SimplifyQuery &Q,
+                 fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+                 RoundingMode Rounding = RoundingMode::NearestTiesToEven);
 
 /// Given operands for the multiplication of a FMA, fold the result or return
 /// null. In contrast to SimplifyFMulInst, this function will not perform
 /// simplifications whose unrounded results differ when rounded to the argument
 /// type.
 Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF,
-                       const SimplifyQuery &Q);
+                       const SimplifyQuery &Q,
+                       fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+                       RoundingMode Rounding = RoundingMode::NearestTiesToEven);
 
 /// Given operands for a Mul, fold the result or return null.
 Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
@@ -182,8 +195,11 @@ Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
 Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
 
 /// Given operands for an FDiv, fold the result or return null.
-Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF,
-                        const SimplifyQuery &Q);
+Value *
+SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+                 const SimplifyQuery &Q,
+                 fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+                 RoundingMode Rounding = RoundingMode::NearestTiesToEven);
 
 /// Given operands for an SRem, fold the result or return null.
 Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
@@ -192,8 +208,11 @@ Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
 Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
 
 /// Given operands for an FRem, fold the result or return null.
-Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF,
-                        const SimplifyQuery &Q);
+Value *
+SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+                 const SimplifyQuery &Q,
+                 fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
+                 RoundingMode Rounding = RoundingMode::NearestTiesToEven);
 
 /// Given operands for a Shl, fold the result or return null.
 Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
@@ -277,8 +296,8 @@ Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 
 /// Given operands for a BinaryOperator, fold the result or return null.
 /// Try to use FastMathFlags when folding the result.
-Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                     FastMathFlags FMF, const SimplifyQuery &Q);
+Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF,
+                     const SimplifyQuery &Q);
 
 /// Given a callsite, fold the result or return null.
 Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q);
@@ -292,11 +311,18 @@ Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q);
 Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
                            OptimizationRemarkEmitter *ORE = nullptr);
 
+/// Like \p SimplifyInstruction but the operands of \p I are replaced with
+/// \p NewOps. Returns a simplified value, or null if none was found.
+Value *
+SimplifyInstructionWithOperands(Instruction *I, ArrayRef<Value *> NewOps,
+                                const SimplifyQuery &Q,
+                                OptimizationRemarkEmitter *ORE = nullptr);
+
 /// See if V simplifies when its operand Op is replaced with RepOp. If not,
 /// return null.
-/// AllowRefinement specifies whether the simplification can be a refinement,
-/// or whether it needs to be strictly identical.
-Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+/// AllowRefinement specifies whether the simplification can be a refinement
+/// (e.g. 0 instead of poison), or whether it needs to be strictly identical.
+Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
                               const SimplifyQuery &Q, bool AllowRefinement);
 
 /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
@@ -325,4 +351,3 @@ const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &,
 } // end namespace llvm
 
 #endif
-
diff --git a/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
index 8166b52aa226..542a741ee07e 100644
--- a/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_IDF_H
-#define LLVM_ANALYSIS_IDF_H
+#ifndef LLVM_ANALYSIS_ITERATEDDOMINANCEFRONTIER_H
+#define LLVM_ANALYSIS_ITERATEDDOMINANCEFRONTIER_H
 
 #include "llvm/Support/CFGDiff.h"
 #include "llvm/Support/GenericIteratedDominanceFrontier.h"
diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h
index f7a5adac2b43..ca276d2f3cf8 100644
--- a/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -60,6 +60,7 @@
 
 namespace llvm {
 
+template <class GraphType> struct GraphTraits;
 class Module;
 class Value;
 
@@ -115,8 +116,6 @@ public:
   class EdgeSequence;
   class SCC;
   class RefSCC;
-  class edge_iterator;
-  class call_edge_iterator;
 
   /// A class used to represent edges in the call graph.
   ///
@@ -464,7 +463,7 @@ public:
     /// Dump a short description of this SCC to stderr.
     void dump() const;
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
     /// Verify invariants about the SCC.
     ///
     /// This will attempt to validate all of the basic invariants within an
@@ -585,7 +584,7 @@ public:
     /// Dump a short description of this RefSCC to stderr.
     void dump() const;
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
     /// Verify invariants about the RefSCC and all its SCCs.
     ///
     /// This will attempt to validate all of the invariants *within* the
diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h
index 363cb49af382..57f732cc854b 100644
--- a/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -75,7 +75,15 @@ public:
   /// \p Pred is a CmpInst predicate. If \p UseBlockValue is true, the block
   /// value is also taken into account.
   Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C,
-                          Instruction *CxtI, bool UseBlockValue = false);
+                          Instruction *CxtI, bool UseBlockValue);
+
+  /// Determine whether the specified value comparison is known to be true
+  /// or false at the specified instruction. While this takes two Value's,
+  /// it still requires that one of them is a constant.
+  /// \p Pred is a CmpInst predicate.
+  /// If \p UseBlockValue is true, the block value is also taken into account.
+  Tristate getPredicateAt(unsigned Pred, Value *LHS, Value *RHS,
+                          Instruction *CxtI, bool UseBlockValue);
 
   /// Determine whether the specified value is known to be a constant at the
   /// specified instruction. Return null if not.
diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
index 15400f5e07ff..0132c88077d2 100644
--- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -12,16 +12,16 @@
 // better decisions.
 //
 //===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
-#define LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
+#ifndef LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
+#define LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
 
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/Pass.h"
 #include <memory>
 
 namespace llvm {
+class DivergenceInfo;
 class Function;
-class GPUDivergenceAnalysis;
 class Module;
 class raw_ostream;
 class TargetTransformInfo;
@@ -63,7 +63,7 @@ private:
                                       const TargetTransformInfo &TTI) const;
 
   // (optional) handle to new DivergenceAnalysis
-  std::unique_ptr<GPUDivergenceAnalysis> gpuDA;
+  std::unique_ptr<DivergenceInfo> gpuDA;
 
   // Stores all divergent values.
   DenseSet<const Value *> DivergentValues;
@@ -73,4 +73,4 @@ private:
 };
 } // End llvm namespace
 
-#endif //LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
+#endif // LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 24a05610e68d..ced1943b81d9 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -25,7 +25,9 @@ class Instruction;
 class LoadInst;
 class Loop;
 class MDNode;
+class MemoryLocation;
 class ScalarEvolution;
+class TargetLibraryInfo;
 
 /// Return true if this is always a dereferenceable pointer. If the context
 /// instruction is specified perform context-sensitive analysis and return true
@@ -33,7 +35,8 @@ class ScalarEvolution;
 bool isDereferenceablePointer(const Value *V, Type *Ty,
                               const DataLayout &DL,
                               const Instruction *CtxI = nullptr,
-                              const DominatorTree *DT = nullptr);
+                              const DominatorTree *DT = nullptr,
+                              const TargetLibraryInfo *TLI = nullptr);
 
 /// Returns true if V is always a dereferenceable pointer with alignment
 /// greater or equal than requested. If the context instruction is specified
@@ -43,7 +46,8 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
                                         MaybeAlign Alignment,
                                         const DataLayout &DL,
                                         const Instruction *CtxI = nullptr,
-                                        const DominatorTree *DT = nullptr);
+                                        const DominatorTree *DT = nullptr,
+                                        const TargetLibraryInfo *TLI = nullptr);
 
 /// Returns true if V is always dereferenceable for Size byte with alignment
 /// greater or equal than requested. If the context instruction is specified
@@ -52,7 +56,8 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
 bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
                                         const APInt &Size, const DataLayout &DL,
                                         const Instruction *CtxI = nullptr,
-                                        const DominatorTree *DT = nullptr);
+                                        const DominatorTree *DT = nullptr,
+                                        const TargetLibraryInfo *TLI = nullptr);
 
 /// Return true if we know that executing a load from this value cannot trap.
 ///
@@ -65,7 +70,8 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
 bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size,
                                  const DataLayout &DL,
                                  Instruction *ScanFrom = nullptr,
-                                 const DominatorTree *DT = nullptr);
+                                 const DominatorTree *DT = nullptr,
+                                 const TargetLibraryInfo *TLI = nullptr);
 
 /// Return true if we can prove that the given load (which is assumed to be
 /// within the specified loop) would access only dereferenceable memory, and
@@ -89,7 +95,8 @@ bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
 bool isSafeToLoadUnconditionally(Value *V, Type *Ty, Align Alignment,
                                  const DataLayout &DL,
                                  Instruction *ScanFrom = nullptr,
-                                 const DominatorTree *DT = nullptr);
+                                 const DominatorTree *DT = nullptr,
+                                 const TargetLibraryInfo *TLI = nullptr);
 
 /// The default number of maximum instructions to scan in the block, used by
 /// FindAvailableLoadedValue().
@@ -127,6 +134,13 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
                                 bool *IsLoadCSE = nullptr,
                                 unsigned *NumScanedInst = nullptr);
 
+/// This overload provides a more efficient implementation of
+/// FindAvailableLoadedValue() for the case where we are not interested in
+/// finding the closest clobbering instruction if no available load is found.
+/// This overload cannot be used to scan across multiple blocks.
+Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
+                                unsigned MaxInstsToScan = DefMaxInstsToScan);
+
 /// Scan backwards to see if we have the value of the given pointer available
 /// locally within a small number of instructions.
 ///
@@ -134,7 +148,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
 /// this function, if ScanFrom points at the beginning of the block, it's safe
 /// to continue scanning the predecessors.
 ///
-/// \param Ptr The pointer we want the load and store to originate from.
+/// \param Loc The location we want the load and store to originate from.
 /// \param AccessTy The access type of the pointer.
 /// \param AtLeastAtomic Are we looking for at-least an atomic load/store ? In
 /// case it is false, we can return an atomic or non-atomic load or store. In
@@ -150,8 +164,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
 /// location in memory, as opposed to the value operand of a store.
 ///
 /// \returns The found value, or nullptr if no value is found.
-Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic,
-                                 BasicBlock *ScanBB,
+Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy,
+                                 bool AtLeastAtomic, BasicBlock *ScanBB,
                                  BasicBlock::iterator &ScanFrom,
                                  unsigned MaxInstsToScan, AAResults *AA,
                                  bool *IsLoadCSE, unsigned *NumScanedInst);
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 13fbe884eddf..0a0ef1536caf 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -341,17 +341,21 @@ struct RuntimeCheckingPtrGroup {
   /// pointer, with index \p Index in RtCheck.
   RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck);
 
+  RuntimeCheckingPtrGroup(unsigned Index, const SCEV *Start, const SCEV *End,
+                          unsigned AS)
+      : High(End), Low(Start), AddressSpace(AS) {
+    Members.push_back(Index);
+  }
+
   /// Tries to add the pointer recorded in RtCheck at index
   /// \p Index to this pointer checking group. We can only add a pointer
   /// to a checking group if we will still be able to get
   /// the upper and lower bounds of the check. Returns true in case
   /// of success, false otherwise.
-  bool addPointer(unsigned Index);
+  bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
+  bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
+                  unsigned AS, ScalarEvolution &SE);
 
-  /// Constitutes the context of this pointer checking group. For each
-  /// pointer that is a member of this group we will retain the index
-  /// at which it appears in RtCheck.
-  RuntimePointerChecking &RtCheck;
   /// The SCEV expression which represents the upper bound of all the
   /// pointers in this group.
   const SCEV *High;
@@ -360,6 +364,8 @@ struct RuntimeCheckingPtrGroup {
   const SCEV *Low;
   /// Indices of all the pointers that constitute this grouping.
   SmallVector<unsigned, 2> Members;
+  /// Address space of the involved pointers.
+  unsigned AddressSpace;
 };
 
 /// A memcheck which made up of a pair of grouped pointers.
@@ -679,6 +685,16 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
                      const ValueToValueMap &StridesMap = ValueToValueMap(),
                      bool Assume = false, bool ShouldCheckWrap = true);
 
+/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
+/// compatible and it is possible to calculate the distance between them. This
+/// is a simple API that does not depend on the analysis pass.
+/// \param StrictCheck Ensure that the calculated distance matches the
+/// type-based one after all the bitcasts removal in the provided pointers.
+Optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
+                              Value *PtrB, const DataLayout &DL,
+                              ScalarEvolution &SE, bool StrictCheck = false,
+                              bool CheckType = true);
+
 /// Attempt to sort the pointers in \p VL and return the sorted indices
 /// in \p SortedIndices, if reordering is required.
 ///
@@ -689,7 +705,7 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
 /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
 /// saves the mask for actual memory accesses in program order in
 /// \p SortedIndices as <1,2,0,3>
-bool sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
+bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
                      ScalarEvolution &SE,
                      SmallVectorImpl<unsigned> &SortedIndices);
 
diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
index 11dbd15c8678..92db1d67fc4e 100644
--- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
@@ -47,7 +47,7 @@ class TargetTransformInfo;
 
 /// The adaptor from a function pass to a loop pass computes these analyses and
 /// makes them available to the loop passes "for free". Each loop pass is
-/// expected expected to update these analyses if necessary to ensure they're
+/// expected to update these analyses if necessary to ensure they're
 /// valid after it runs.
 struct LoopStandardAnalysisResults {
   AAResults &AA;
diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
index e8f2205545eb..21882ebd0087 100644
--- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -205,7 +205,7 @@ public:
   }
 
   /// Return the estimated ordered loop costs.
-  const ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
+  ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
 
 private:
   /// Calculate the cache footprint of each loop in the nest (when it is
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index a5717bae12c3..164ec50e47bc 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -479,7 +479,8 @@ public:
   bool isAnnotatedParallel() const { return false; }
 
   /// Print loop with all the BBs inside it.
-  void print(raw_ostream &OS, unsigned Depth = 0, bool Verbose = false) const;
+  void print(raw_ostream &OS, bool Verbose = false, bool PrintNested = true,
+             unsigned Depth = 0) const;
 
 protected:
   friend class LoopInfoBase<BlockT, LoopT>;
@@ -588,6 +589,9 @@ public:
   ///
   PHINode *getCanonicalInductionVariable() const;
 
+  /// Get the latch condition instruction.
+  ICmpInst *getLatchCmpInst() const;
+
   /// Obtain the unique incoming and back edge. Return false if they are
   /// non-unique or the loop is dead; otherwise, return true.
   bool getIncomingAndBackEdge(BasicBlock *&Incoming,
@@ -1199,6 +1203,14 @@ public:
 
     return true;
   }
+
+  // Return true if a new use of V added in ExitBB would require an LCSSA PHI
+  // to be inserted at the begining of the block.  Note that V is assumed to
+  // dominate ExitBB, and ExitBB must be the exit block of some loop.  The
+  // IR is assumed to be in LCSSA form before the planned insertion.
+  bool wouldBeOutOfLoopUseRequiringLCSSA(const Value *V,
+                                         const BasicBlock *ExitBB) const;
+
 };
 
 // Allow clients to walk the list of nested loops...
@@ -1283,6 +1295,33 @@ MDNode *findOptionMDForLoopID(MDNode *LoopID, StringRef Name);
 /// found, return nullptr.
 MDNode *findOptionMDForLoop(const Loop *TheLoop, StringRef Name);
 
+Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
+                                            StringRef Name);
+  
+/// Returns true if Name is applied to TheLoop and enabled.
+bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name);
+
+/// Find named metadata for a loop with an integer value.
+llvm::Optional<int>
+getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name);
+
+/// Find string metadata for loop
+///
+/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
+/// operand or null otherwise.  If the string metadata is not found return
+/// Optional's not-a-value.
+Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
+                                                      StringRef Name);
+
+/// Look for the loop attribute that requires progress within the loop.
+/// Note: Most consumers probably want "isMustProgress" which checks
+/// the containing function attribute too.
+bool hasMustProgress(const Loop *L);
+
+/// Return true if this loop can be assumed to make progress.  (i.e. can't
+/// be infinite without side effects without also being undefined)
+bool isMustProgress(const Loop *L);
+
 /// Return whether an MDNode might represent an access group.
 ///
 /// Access group metadata nodes have to be distinct and empty. Being
diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 426b349c6b8a..2cc9afb7c2cd 100644
--- a/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Dominators.h"
 
@@ -380,8 +381,8 @@ void LoopBase<BlockT, LoopT>::verifyLoopNest(
 }
 
 template <class BlockT, class LoopT>
-void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth,
-                                    bool Verbose) const {
+void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, bool Verbose,
+                                    bool PrintNested, unsigned Depth) const {
   OS.indent(Depth * 2);
   if (static_cast<const LoopT *>(this)->isAnnotatedParallel())
     OS << "Parallel ";
@@ -406,10 +407,13 @@ void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth,
     if (Verbose)
       BB->print(OS);
   }
-  OS << "\n";
 
-  for (iterator I = begin(), E = end(); I != E; ++I)
-    (*I)->print(OS, Depth + 2);
+  if (PrintNested) {
+    OS << "\n";
+
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      (*I)->print(OS, /*Verbose*/ false, PrintNested, Depth + 2);
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -676,10 +680,7 @@ static void compareLoops(const LoopT *L, const LoopT *OtherL,
   const SmallPtrSetImpl<const BlockT *> &OtherBlocksSet =
       OtherL->getBlocksSet();
   assert(BlocksSet.size() == OtherBlocksSet.size() &&
-         llvm::all_of(BlocksSet,
-                      [&OtherBlocksSet](const BlockT *BB) {
-                        return OtherBlocksSet.count(BB);
-                      }) &&
+         llvm::set_is_subset(BlocksSet, OtherBlocksSet) &&
          "Mismatched basic blocks in BlocksSets!");
 }
 #endif
diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h
index 9c4fb4dbc29b..9a749a1c8eae 100644
--- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h
@@ -30,7 +30,6 @@ public:
   LoopNest(Loop &Root, ScalarEvolution &SE);
 
   LoopNest() = delete;
-  LoopNest &operator=(const LoopNest &) = delete;
 
   /// Construct a LoopNest object.
   static std::unique_ptr<LoopNest> getLoopNest(Loop &Root, ScalarEvolution &SE);
@@ -61,10 +60,12 @@ public:
   static unsigned getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE);
 
   /// Recursivelly traverse all empty 'single successor' basic blocks of \p From
-  /// (if there are any). Return the last basic block found or \p End if it was
-  /// reached during the search.
+  /// (if there are any). When \p CheckUniquePred is set to true, check if
+  /// each of the empty single successors has a unique predecessor. Return
+  /// the last basic block found or \p End if it was reached during the search.
   static const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From,
-                                               const BasicBlock *End);
+                                               const BasicBlock *End,
+                                               bool CheckUniquePred = false);
 
   /// Return the outermost loop in the loop nest.
   Loop &getOutermostLoop() const { return *Loops.front(); }
@@ -139,6 +140,11 @@ public:
     return all_of(Loops, [](const Loop *L) { return L->isRotatedForm(); });
   }
 
+  /// Return the function to which the loop-nest belongs.
+  Function *getParent() const {
+    return Loops.front()->getHeader()->getParent();
+  }
+
   StringRef getName() const { return Loops.front()->getName(); }
 
 protected:
diff --git a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
index 5f332e3cac16..7cf8a081f9a2 100644
--- a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
+++ b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
@@ -46,7 +46,7 @@ class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
 
 public:
   UnrolledInstAnalyzer(unsigned Iteration,
-                       DenseMap<Value *, Constant *> &SimplifiedValues,
+                       DenseMap<Value *, Value *> &SimplifiedValues,
                        ScalarEvolution &SE, const Loop *L)
       : SimplifiedValues(SimplifiedValues), SE(SE), L(L) {
       IterationNumber = SE.getConstant(APInt(64, Iteration));
@@ -68,22 +68,19 @@ private:
   /// iteration.
   const SCEV *IterationNumber;
 
-  /// A Value->Constant map for keeping values that we managed to
-  /// constant-fold on the given iteration.
-  ///
   /// While we walk the loop instructions, we build up and maintain a mapping
   /// of simplified values specific to this iteration.  The idea is to propagate
   /// any special information we have about loads that can be replaced with
   /// constants after complete unrolling, and account for likely simplifications
   /// post-unrolling.
-  DenseMap<Value *, Constant *> &SimplifiedValues;
+  DenseMap<Value *, Value *> &SimplifiedValues;
 
   ScalarEvolution &SE;
   const Loop *L;
 
   bool simplifyInstWithSCEV(Instruction *I);
 
-  bool visitInstruction(Instruction &I) { return simplifyInstWithSCEV(&I); }
+  bool visitInstruction(Instruction &I);
   bool visitBinaryOperator(BinaryOperator &I);
   bool visitLoad(LoadInst &I);
   bool visitCastInst(CastInst &I);
diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h
index c5428726995e..39ade20df53f 100644
--- a/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -212,6 +212,10 @@ struct ObjectSizeOpts {
 /// object size in Size if successful, and false otherwise. In this context, by
 /// object we mean the region of memory starting at Ptr to the end of the
 /// underlying object pointed to by Ptr.
+///
+/// WARNING: The object size returned is the allocation size.  This does not
+/// imply dereferenceability at site of use since the object may be freeed in
+/// between.
 bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
                    const TargetLibraryInfo *TLI, ObjectSizeOpts Opts = {});
 
diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index efde00f82d57..cb522cf731d3 100644
--- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -20,30 +20,18 @@
 #include "llvm/ADT/PointerSumType.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Metadata.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PredIteratorCache.h"
 #include "llvm/IR/ValueHandle.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cassert>
-#include <cstdint>
-#include <utility>
-#include <vector>
 
 namespace llvm {
 
 class AAResults;
 class AssumptionCache;
+class BatchAAResults;
 class DominatorTree;
-class Function;
-class Instruction;
-class LoadInst;
 class PHITransAddr;
-class TargetLibraryInfo;
 class PhiValues;
-class Value;
 
 /// A memory dependence query can return one of three different answers.
 class MemDepResult {
@@ -343,7 +331,7 @@ private:
   // A map from instructions to their non-local dependencies.
   using NonLocalDepMapType = DenseMap<Instruction *, PerInstNLInfo>;
 
-  NonLocalDepMapType NonLocalDeps;
+  NonLocalDepMapType NonLocalDepsMap;
 
   // A reverse mapping from dependencies to the dependees.  This is
   // used when removing instructions to keep the cache coherent.
@@ -364,6 +352,10 @@ private:
 
   unsigned DefaultBlockScanLimit;
 
+  /// Offsets to dependant clobber loads.
+  using ClobberOffsetsMapType = DenseMap<LoadInst *, int32_t>;
+  ClobberOffsetsMapType ClobberOffsets;
+
 public:
   MemoryDependenceResults(AAResults &AA, AssumptionCache &AC,
                           const TargetLibraryInfo &TLI, DominatorTree &DT,
@@ -452,10 +444,18 @@ public:
                                         Instruction *QueryInst = nullptr,
                                         unsigned *Limit = nullptr);
 
+  MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad,
+                                        BasicBlock::iterator ScanIt,
+                                        BasicBlock *BB,
+                                        Instruction *QueryInst,
+                                        unsigned *Limit,
+                                        BatchAAResults &BatchAA);
+
   MemDepResult
   getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad,
                                  BasicBlock::iterator ScanIt, BasicBlock *BB,
-                                 Instruction *QueryInst, unsigned *Limit);
+                                 Instruction *QueryInst, unsigned *Limit,
+                                 BatchAAResults &BatchAA);
 
   /// This analysis looks for other loads and stores with invariant.group
   /// metadata and the same pointer operand. Returns Unknown if it does not
@@ -468,6 +468,14 @@ public:
   /// Release memory in caches.
   void releaseMemory();
 
+  /// Return the clobber offset to dependent instruction.
+  Optional<int32_t> getClobberOffset(LoadInst *DepInst) const {
+    const auto Off = ClobberOffsets.find(DepInst);
+    if (Off != ClobberOffsets.end())
+      return Off->getSecond();
+    return None;
+  }
+
 private:
   MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall,
                                      BasicBlock::iterator ScanIt,
@@ -480,12 +488,13 @@ private:
                                    DenseMap<BasicBlock *, Value *> &Visited,
                                    bool SkipFirstBlock = false,
                                    bool IsIncomplete = false);
-  MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst,
+  MemDepResult getNonLocalInfoForBlock(Instruction *QueryInst,
                                        const MemoryLocation &Loc, bool isLoad,
                                        BasicBlock *BB, NonLocalDepInfo *Cache,
-                                       unsigned NumSortedEntries);
+                                       unsigned NumSortedEntries,
+                                       BatchAAResults &BatchAA);
 
-  void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P);
+  void removeCachedNonLocalPointerDependencies(ValueIsLoadPair P);
 
   void verifyRemoved(Instruction *Inst) const;
 };
diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h
index 63c031b1921f..f40b99968fd3 100644
--- a/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/llvm/include/llvm/Analysis/MemorySSA.h
@@ -288,7 +288,7 @@ protected:
                  DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB,
                  unsigned NumOperands)
       : MemoryAccess(C, Vty, DeleteValue, BB, NumOperands),
-        MemoryInstruction(MI), OptimizedAccessAlias(MayAlias) {
+        MemoryInstruction(MI), OptimizedAccessAlias(AliasResult::MayAlias) {
     setDefiningAccess(DMA);
   }
 
@@ -299,8 +299,9 @@ protected:
     OptimizedAccessAlias = AR;
   }
 
-  void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false,
-                         Optional<AliasResult> AR = MayAlias) {
+  void setDefiningAccess(
+      MemoryAccess *DMA, bool Optimized = false,
+      Optional<AliasResult> AR = AliasResult(AliasResult::MayAlias)) {
     if (!Optimized) {
       setOperand(0, DMA);
       return;
@@ -328,7 +329,8 @@ public:
                        /*NumOperands=*/1) {}
 
   // allocate space for exactly one operand
-  void *operator new(size_t s) { return User::operator new(s, 1); }
+  void *operator new(size_t S) { return User::operator new(S, 1); }
+  void operator delete(void *Ptr) { User::operator delete(Ptr); }
 
   static bool classof(const Value *MA) {
     return MA->getValueID() == MemoryUseVal;
@@ -388,7 +390,8 @@ public:
         ID(Ver) {}
 
   // allocate space for exactly two operands
-  void *operator new(size_t s) { return User::operator new(s, 2); }
+  void *operator new(size_t S) { return User::operator new(S, 2); }
+  void operator delete(void *Ptr) { User::operator delete(Ptr); }
 
   static bool classof(const Value *MA) {
     return MA->getValueID() == MemoryDefVal;
@@ -483,9 +486,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess)
 /// issue.
 class MemoryPhi final : public MemoryAccess {
   // allocate space for exactly zero operands
-  void *operator new(size_t s) { return User::operator new(s); }
+  void *operator new(size_t S) { return User::operator new(S); }
 
 public:
+  void operator delete(void *Ptr) { User::operator delete(Ptr); }
+
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
 
@@ -1099,7 +1104,7 @@ public:
     return MP->getIncomingBlock(ArgNo);
   }
 
-  typename BaseT::iterator::pointer operator*() const {
+  typename std::iterator_traits<BaseT>::pointer operator*() const {
     assert(Access && "Tried to access past the end of our iterator");
     // Go to the first argument for phis, and the defining access for everything
     // else.
@@ -1195,7 +1200,7 @@ public:
     return DefIterator == Other.DefIterator;
   }
 
-  BaseT::iterator::reference operator*() const {
+  typename std::iterator_traits<BaseT>::reference operator*() const {
     assert(DefIterator != OriginalAccess->defs_end() &&
            "Tried to access past the end of our iterator");
     return CurrentPair;
diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index b0bf2e5ead62..659e6aff6e28 100644
--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -240,11 +240,6 @@ public:
   /// successors.
   void changeToUnreachable(const Instruction *I);
 
-  /// Conditional branch BI is changed or replaced with an unconditional branch
-  /// to `To`. Update Phis in BI's successors to remove BI's BB.
-  void changeCondBranchToUnconditionalTo(const BranchInst *BI,
-                                         const BasicBlock *To);
-
   /// Get handle on MemorySSA.
   MemorySSA* getMemorySSA() const { return MSSA; }
 
diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index 16c5f6701da0..62bdade95d96 100644
--- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -19,8 +19,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
-#define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
+#ifndef LLVM_ANALYSIS_OBJCARCANALYSISUTILS_H
+#define LLVM_ANALYSIS_OBJCARCANALYSISUTILS_H
 
 #include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/ObjCARCInstKind.h"
diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h
new file mode 100644
index 000000000000..2566bfbcf61c
--- /dev/null
+++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h
@@ -0,0 +1,59 @@
+//===- ObjCARCUtil.h - ObjC ARC Utility Functions ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines ARC utility functions which are used by various parts of
+/// the compiler.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_OBJCARCUTIL_H
+#define LLVM_IR_OBJCARCUTIL_H
+
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace llvm {
+namespace objcarc {
+
+inline const char *getRVMarkerModuleFlagStr() {
+  return "clang.arc.retainAutoreleasedReturnValueMarker";
+}
+
+enum AttachedCallOperandBundle : unsigned { RVOB_Retain, RVOB_Claim };
+
+inline AttachedCallOperandBundle
+getAttachedCallOperandBundleEnum(bool IsRetain) {
+  return IsRetain ? RVOB_Retain : RVOB_Claim;
+}
+
+inline bool hasAttachedCallOpBundle(const CallBase *CB) {
+  // Ignore the bundle if the return type is void. Global optimization passes
+  // can turn the called function's return type to void. That should happen only
+  // if the call doesn't return and the call to @llvm.objc.clang.arc.noop.use
+  // no longer consumes the function return or is deleted. In that case, it's
+  // not necessary to emit the marker instruction or calls to the ARC runtime
+  // functions.
+  return !CB->getFunctionType()->getReturnType()->isVoidTy() &&
+         CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall)
+             .hasValue();
+}
+
+inline bool hasAttachedCallOpBundle(const CallBase *CB, bool IsRetain) {
+  assert(hasAttachedCallOpBundle(CB) &&
+         "call doesn't have operand bundle clang_arc_attachedcall");
+  auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall);
+  if (!B.hasValue())
+    return false;
+  return cast<ConstantInt>(B->Inputs[0])->getZExtValue() ==
+         getAttachedCallOperandBundleEnum(IsRetain);
+}
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h b/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h
index 9815dd05cd1c..ff706e91f3c4 100644
--- a/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h
+++ b/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h
@@ -11,8 +11,8 @@
 // used to compute the "hotness" of the diagnostic message.
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
-#define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
+#ifndef LLVM_ANALYSIS_OPTIMIZATIONREMARKEMITTER_H
+#define LLVM_ANALYSIS_OPTIMIZATIONREMARKEMITTER_H
 
 #include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -61,6 +61,12 @@ public:
   bool invalidate(Function &F, const PreservedAnalyses &PA,
                   FunctionAnalysisManager::Invalidator &Inv);
 
+  /// Return true iff at least *some* remarks are enabled.
+  bool enabled() const {
+    return F->getContext().getLLVMRemarkStreamer() ||
+           F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled();
+  }
+
   /// Output the remark via the diagnostic handler and to the
   /// optimization record file.
   void emit(DiagnosticInfoOptimizationBase &OptDiag);
@@ -73,9 +79,11 @@ public:
     // remarks enabled. We can't currently check whether remarks are requested
     // for the calling pass since that requires actually building the remark.
 
-    if (F->getContext().getLLVMRemarkStreamer() ||
-        F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) {
+    if (enabled()) {
       auto R = RemarkBuilder();
+      static_assert(
+          std::is_base_of<DiagnosticInfoOptimizationBase, decltype(R)>::value,
+          "the lambda passed to emit() must return a remark");
       emit((DiagnosticInfoOptimizationBase &)R);
     }
   }
@@ -166,4 +174,4 @@ public:
   Result run(Function &F, FunctionAnalysisManager &AM);
 };
 }
-#endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
+#endif // LLVM_ANALYSIS_OPTIMIZATIONREMARKEMITTER_H
diff --git a/llvm/include/llvm/Analysis/OverflowInstAnalysis.h b/llvm/include/llvm/Analysis/OverflowInstAnalysis.h
new file mode 100644
index 000000000000..7523fb9392cd
--- /dev/null
+++ b/llvm/include/llvm/Analysis/OverflowInstAnalysis.h
@@ -0,0 +1,45 @@
+//===-- OverflowInstAnalysis.h - Utils to fold overflow insts ----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file holds routines to help analyse overflow instructions
+// and fold them into constants or other overflow instructions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_OVERFLOWINSTANALYSIS_H
+#define LLVM_ANALYSIS_OVERFLOWINSTANALYSIS_H
+
+#include "llvm/IR/InstrTypes.h"
+
+namespace llvm {
+class Value;
+class Use;
+
+/// Match one of the patterns up to the select/logic op:
+///   %Op0 = icmp ne i4 %X, 0
+///   %Agg = call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %Y)
+///   %Op1 = extractvalue { i4, i1 } %Agg, 1
+///   %ret = select i1 %Op0, i1 %Op1, i1 false / %ret = and i1 %Op0, %Op1
+///
+///   %Op0 = icmp eq i4 %X, 0
+///   %Agg = call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %Y)
+///   %NotOp1 = extractvalue { i4, i1 } %Agg, 1
+///   %Op1 = xor i1 %NotOp1, true
+///   %ret = select i1 %Op0, i1 true, i1 %Op1 / %ret = or i1 %Op0, %Op1
+///
+/// Callers are expected to align that with the operands of the select/logic.
+/// IsAnd is set to true if the Op0 and Op1 are used as the first pattern.
+/// If Op0 and Op1 match one of the patterns above, return true and fill Y's
+/// use.
+
+bool isCheckForZeroAndMulWithOverflow(Value *Op0, Value *Op1, bool IsAnd,
+                                      Use *&Y);
+bool isCheckForZeroAndMulWithOverflow(Value *Op0, Value *Op1, bool IsAnd);
+} // end namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index a4e6ffc3dd58..c95404d96f4e 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_PROFILE_SUMMARY_INFO_H
-#define LLVM_ANALYSIS_PROFILE_SUMMARY_INFO_H
+#ifndef LLVM_ANALYSIS_PROFILESUMMARYINFO_H
+#define LLVM_ANALYSIS_PROFILESUMMARYINFO_H
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/PassManager.h"
@@ -38,7 +38,7 @@ class Function;
 // units. This would require making this depend on BFI.
 class ProfileSummaryInfo {
 private:
-  const Module &M;
+  const Module *M;
   std::unique_ptr<ProfileSummary> Summary;
   void computeThresholds();
   // Count thresholds to answer isHotCount and isColdCount queries.
@@ -58,8 +58,7 @@ private:
   mutable DenseMap<int, uint64_t> ThresholdCache;
 
 public:
-  ProfileSummaryInfo(const Module &M) : M(M) { refresh(); }
-
+  ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); }
   ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default;
 
   /// If no summary is present, attempt to refresh.
diff --git a/llvm/include/llvm/Analysis/RegionIterator.h b/llvm/include/llvm/Analysis/RegionIterator.h
index 72bc5bbcb506..fecb28725dcc 100644
--- a/llvm/include/llvm/Analysis/RegionIterator.h
+++ b/llvm/include/llvm/Analysis/RegionIterator.h
@@ -35,10 +35,15 @@ class BasicBlock;
 ///
 /// For a subregion RegionNode there is just one successor. The RegionNode
 /// representing the exit of the subregion.
-template <class NodeRef, class BlockT, class RegionT>
-class RNSuccIterator
-    : public std::iterator<std::forward_iterator_tag, NodeRef> {
-  using super = std::iterator<std::forward_iterator_tag, NodeRef>;
+template <class NodeRef, class BlockT, class RegionT> class RNSuccIterator {
+public:
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = NodeRef;
+  using difference_type = std::ptrdiff_t;
+  using pointer = value_type *;
+  using reference = value_type &;
+
+private:
   using BlockTraits = GraphTraits<BlockT *>;
   using SuccIterTy = typename BlockTraits::ChildIteratorType;
 
@@ -99,7 +104,6 @@ class RNSuccIterator
 
 public:
   using Self = RNSuccIterator<NodeRef, BlockT, RegionT>;
-  using value_type = typename super::value_type;
 
   /// Create begin iterator of a RegionNode.
   inline RNSuccIterator(NodeRef node)
@@ -163,9 +167,7 @@ public:
 /// are contained in the Region and its subregions. This is close to a virtual
 /// control flow graph of the Region.
 template <class NodeRef, class BlockT, class RegionT>
-class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>
-    : public std::iterator<std::forward_iterator_tag, NodeRef> {
-  using super = std::iterator<std::forward_iterator_tag, NodeRef>;
+class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT> {
   using BlockTraits = GraphTraits<BlockT *>;
   using SuccIterTy = typename BlockTraits::ChildIteratorType;
 
@@ -173,8 +175,13 @@ class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>
   SuccIterTy Itor;
 
 public:
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = NodeRef;
+  using difference_type = std::ptrdiff_t;
+  using pointer = value_type *;
+  using reference = value_type &;
+
   using Self = RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>;
-  using value_type = typename super::value_type;
 
   /// Create the iterator from a RegionNode.
   ///
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index b3f199de2cfa..ae9c73fede96 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -505,6 +505,17 @@ public:
   /// Erase Value from ValueExprMap and ExprValueMap.
   void eraseValueFromMap(Value *V);
 
+  /// Is operation \p BinOp between \p LHS and \p RHS provably does not have
+  /// a signed/unsigned overflow (\p Signed)?
+  bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
+                       const SCEV *LHS, const SCEV *RHS);
+
+  /// Parse NSW/NUW flags from add/sub/mul IR binary operation \p Op into
+  /// SCEV no-wrap flags, and deduce flag[s] that aren't known yet.
+  /// Does not mutate the original instruction.
+  std::pair<SCEV::NoWrapFlags, bool /*Deduced*/>
+  getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO);
+
   /// Return a SCEV expression for the full generality of the specified
   /// expression.
   const SCEV *getSCEV(Value *V);
@@ -512,7 +523,8 @@ public:
   const SCEV *getConstant(ConstantInt *V);
   const SCEV *getConstant(const APInt &Val);
   const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
-  const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
+  const SCEV *getLosslessPtrToIntExpr(const SCEV *Op, unsigned Depth = 0);
+  const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty);
   const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
   const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
   const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
@@ -575,7 +587,6 @@ public:
   const SCEV *getGEPExpr(GEPOperator *GEP,
                          const SmallVectorImpl<const SCEV *> &IndexExprs);
   const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW);
-  const SCEV *getSignumExpr(const SCEV *Op);
   const SCEV *getMinMaxExpr(SCEVTypes Kind,
                             SmallVectorImpl<const SCEV *> &Operands);
   const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
@@ -622,10 +633,26 @@ public:
   const SCEV *getNotSCEV(const SCEV *V);
 
   /// Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
+  ///
+  /// If the LHS and RHS are pointers which don't share a common base
+  /// (according to getPointerBase()), this returns a SCEVCouldNotCompute.
+  /// To compute the difference between two unrelated pointers, you can
+  /// explicitly convert the arguments using getPtrToIntExpr(), for pointer
+  /// types that support it.
   const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
                            SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
                            unsigned Depth = 0);
 
+  /// Compute ceil(N / D). N and D are treated as unsigned values.
+  ///
+  /// Since SCEV doesn't have native ceiling division, this generates a
+  /// SCEV expression of the following form:
+  ///
+  /// umin(N, 1) + floor((N - umin(N, 1)) / D)
+  ///
+  /// A denominator of zero or poison is handled the same way as getUDivExpr().
+  const SCEV *getUDivCeilSCEV(const SCEV *N, const SCEV *D);
+
   /// Return a SCEV corresponding to a conversion of the input value to the
   /// specified type.  If the type must be extended, it is zero extended.
   const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
@@ -705,17 +732,25 @@ public:
   bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
                                    const SCEV *LHS, const SCEV *RHS);
 
-  /// Returns the maximum trip count of the loop if it is a single-exit
-  /// loop and we can compute a small maximum for that loop.
-  ///
-  /// Implemented in terms of the \c getSmallConstantTripCount overload with
-  /// the single exiting block passed to it. See that routine for details.
+  /// Convert from an "exit count" (i.e. "backedge taken count") to a "trip
+  /// count".  A "trip count" is the number of times the header of the loop
+  /// will execute if an exit is taken after the specified number of backedges
+  /// have been taken.  (e.g. TripCount = ExitCount + 1)  A zero result
+  /// must be interpreted as a loop having an unknown trip count.
+  const SCEV *getTripCountFromExitCount(const SCEV *ExitCount);
+
+  /// Returns the exact trip count of the loop if we can compute it, and
+  /// the result is a small constant.  '0' is used to represent an unknown
+  /// or non-constant trip count.  Note that a trip count is simply one more
+  /// than the backedge taken count for the loop.
   unsigned getSmallConstantTripCount(const Loop *L);
 
-  /// Returns the maximum trip count of this loop as a normal unsigned
-  /// value. Returns 0 if the trip count is unknown or not constant. This
-  /// "trip count" assumes that control exits via ExitingBlock. More
-  /// precisely, it is the number of times that control may reach ExitingBlock
+  /// Return the exact trip count for this loop if we exit through ExitingBlock.
+  /// '0' is used to represent an unknown or non-constant trip count.  Note
+  /// that a trip count is simply one more than the backedge taken count for
+  /// the same exit.
+  /// This "trip count" assumes that control exits via ExitingBlock. More
+  /// precisely, it is the number of times that control will reach ExitingBlock
   /// before taking the branch. For loops with multiple exits, it may not be
   /// the number times that the loop header executes if the loop exits
   /// prematurely via another branch.
@@ -727,12 +762,18 @@ public:
   /// Returns 0 if the trip count is unknown or not constant.
   unsigned getSmallConstantMaxTripCount(const Loop *L);
 
+  /// Returns the largest constant divisor of the trip count as a normal
+  /// unsigned value, if possible. This means that the actual trip count is
+  /// always a multiple of the returned value. Returns 1 if the trip count is
+  /// unknown or not guaranteed to be the multiple of a constant., Will also
+  /// return 1 if the trip count is very large (>= 2^32).
+  /// Note that the argument is an exit count for loop L, NOT a trip count.
+  unsigned getSmallConstantTripMultiple(const Loop *L,
+                                        const SCEV *ExitCount);
+
   /// Returns the largest constant divisor of the trip count of the
-  /// loop if it is a single-exit loop and we can compute a small maximum for
-  /// that loop.
-  ///
-  /// Implemented in terms of the \c getSmallConstantTripMultiple overload with
-  /// the single exiting block passed to it. See that routine for details.
+  /// loop.  Will return 1 if no trip count could be computed, or if a
+  /// divisor could not be found.
   unsigned getSmallConstantTripMultiple(const Loop *L);
 
   /// Returns the largest constant divisor of the trip count of this loop as a
@@ -938,11 +979,24 @@ public:
   bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
                         const SCEV *RHS);
 
+  /// Check whether the condition described by Pred, LHS, and RHS is true or
+  /// false. If we know it, return the evaluation of this condition. If neither
+  /// is proved, return None.
+  Optional<bool> evaluatePredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                   const SCEV *RHS);
+
   /// Test if the given expression is known to satisfy the condition described
   /// by Pred, LHS, and RHS in the given Context.
   bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
                         const SCEV *RHS, const Instruction *Context);
 
+  /// Check whether the condition described by Pred, LHS, and RHS is true or
+  /// false in the given \p Context. If we know it, return the evaluation of
+  /// this condition. If neither is proved, return None.
+  Optional<bool> evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                     const SCEV *RHS,
+                                     const Instruction *Context);
+
   /// Test if the condition described by Pred, LHS, RHS is known to be true on
   /// every iteration of the loop of the recurrency LHS.
   bool isKnownOnEveryIteration(ICmpInst::Predicate Pred,
@@ -1177,6 +1231,9 @@ public:
   /// sharpen it.
   void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags);
 
+  /// Try to apply information from loop guards for \p L to \p Expr.
+  const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L);
+
 private:
   /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
   /// Value is deleted.
@@ -1225,7 +1282,8 @@ private:
 
   /// The type for ExprValueMap.
   using ValueOffsetPair = std::pair<Value *, ConstantInt *>;
-  using ExprValueMapType = DenseMap<const SCEV *, SetVector<ValueOffsetPair>>;
+  using ValueOffsetPairSetVector = SmallSetVector<ValueOffsetPair, 4>;
+  using ExprValueMapType = DenseMap<const SCEV *, ValueOffsetPairSetVector>;
 
   /// ExprValueMap -- This map records the original values from which
   /// the SCEV expr is generated from.
@@ -1277,7 +1335,7 @@ private:
   DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache;
 
   /// Return the Value set from which the SCEV expr is generated.
-  SetVector<ValueOffsetPair> *getSCEVValues(const SCEV *S);
+  ValueOffsetPairSetVector *getSCEVValues(const SCEV *S);
 
   /// Private helper method for the GetMinTrailingZeros method
   uint32_t GetMinTrailingZerosImpl(const SCEV *S);
@@ -1324,8 +1382,6 @@ private:
              !isa<SCEVCouldNotCompute>(MaxNotTaken);
     }
 
-    bool hasOperand(const SCEV *S) const;
-
     /// Test whether this ExitLimit contains all information.
     bool hasFullInfo() const {
       return !isa<SCEVCouldNotCompute>(ExactNotTaken);
@@ -1376,6 +1432,9 @@ private:
     /// True iff the backedge is taken either exactly Max or zero times.
     bool MaxOrZero = false;
 
+    /// SCEV expressions used in any of the ExitNotTakenInfo counts.
+    SmallPtrSet<const SCEV *, 4> Operands;
+
     bool isComplete() const { return IsComplete; }
     const SCEV *getConstantMax() const { return ConstantMax; }
 
@@ -1444,10 +1503,7 @@ private:
 
     /// Return true if any backedge taken count expressions refer to the given
     /// subexpression.
-    bool hasOperand(const SCEV *S, ScalarEvolution *SE) const;
-
-    /// Invalidate this result and free associated memory.
-    void clear();
+    bool hasOperand(const SCEV *S) const;
   };
 
   /// Cache the backedge-taken count of the loops for this function as they
@@ -1502,6 +1558,10 @@ private:
     return getLoopProperties(L).HasNoAbnormalExits;
   }
 
+  /// Return true if this loop is finite by assumption.  That is,
+  /// to be infinite, it must also be undefined.
+  bool loopIsFiniteByAssumption(const Loop *L);
+
   /// Compute a LoopDisposition value.
   LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
 
@@ -1558,6 +1618,12 @@ private:
   ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop,
                                      const SCEV *MaxBECount, unsigned BitWidth);
 
+  /// If the unknown expression U corresponds to a simple recurrence, return
+  /// a constant range which represents the entire recurrence.  Note that
+  /// *add* recurrences with loop invariant steps aren't represented by
+  /// SCEVUnknowns and thus don't use this mechanism.
+  ConstantRange getRangeForUnknownRecurrence(const SCEVUnknown *U);
+
   /// We know that there is no SCEV for the specified value.  Analyze the
   /// expression.
   const SCEV *createSCEV(Value *V);
@@ -1966,11 +2032,6 @@ private:
   Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
   createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI);
 
-  /// Compute the backedge taken count knowing the interval difference, the
-  /// stride and presence of the equality in the comparison.
-  const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride,
-                             bool Equality);
-
   /// Compute the maximum backedge count based on the range of values
   /// permitted by Start, End, and Stride. This is for loops of the form
   /// {Start, +, Stride} LT End.
@@ -1983,15 +2044,13 @@ private:
 
   /// Verify if an linear IV with positive stride can overflow when in a
   /// less-than comparison, knowing the invariant term of the comparison,
-  /// the stride and the knowledge of NSW/NUW flags on the recurrence.
-  bool doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned,
-                          bool NoWrap);
+  /// the stride.
+  bool canIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned);
 
   /// Verify if an linear IV with negative stride can overflow when in a
   /// greater-than comparison, knowing the invariant term of the comparison,
-  /// the stride and the knowledge of NSW/NUW flags on the recurrence.
-  bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned,
-                          bool NoWrap);
+  /// the stride.
+  bool canIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned);
 
   /// Get add expr already created or create a new one.
   const SCEV *getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
@@ -2021,9 +2080,6 @@ private:
   /// Assign A and B to LHS and RHS, respectively.
   bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
 
-  /// Try to apply information from loop guards for \p L to \p Expr.
-  const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L);
-
   /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
   /// `UniqueSCEVs`.
   ///
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
index 98d53237d4a0..20acb407ead0 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
@@ -33,6 +33,9 @@ public:
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
                     AAQueryInfo &AAQI);
 
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &Inv);
+
 private:
   Value *GetBaseValue(const SCEV *S);
 };
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 37e675f08afc..c0da311e4e48 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -210,8 +210,6 @@ class Type;
       return make_range(op_begin(), op_end());
     }
 
-    Type *getType() const { return getOperand(0)->getType(); }
-
     NoWrapFlags getNoWrapFlags(NoWrapFlags Mask = NoWrapMask) const {
       return (NoWrapFlags)(SubclassData & Mask);
     }
@@ -293,6 +291,8 @@ class Type;
       : SCEVCommutativeExpr(ID, scMulExpr, O, N) {}
 
   public:
+    Type *getType() const { return getOperand(0)->getType(); }
+
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static bool classof(const SCEV *S) {
       return S->getSCEVType() == scMulExpr;
@@ -359,6 +359,7 @@ class Type;
       : SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {}
 
   public:
+    Type *getType() const { return getStart()->getType(); }
     const SCEV *getStart() const { return Operands[0]; }
     const Loop *getLoop() const { return L; }
 
@@ -401,6 +402,11 @@ class Type;
     /// iteration number.
     const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const;
 
+    /// Return the value of this chain of recurrences at the specified iteration
+    /// number. Takes an explicit list of operands to represent an AddRec.
+    static const SCEV *evaluateAtIteration(ArrayRef<const SCEV *> Operands,
+                                           const SCEV *It, ScalarEvolution &SE);
+
     /// Return the number of iterations of this loop that produce
     /// values in the specified constant range.  Another way of
     /// looking at this is that it returns the first iteration number
@@ -440,6 +446,8 @@ class Type;
     }
 
   public:
+    Type *getType() const { return getOperand(0)->getType(); }
+
     static bool classof(const SCEV *S) {
       return isMinMaxType(S->getSCEVType());
     }
@@ -895,13 +903,10 @@ class Type;
         Operands.push_back(visit(Op));
 
       const Loop *L = Expr->getLoop();
-      const SCEV *Res = SE.getAddRecExpr(Operands, L, Expr->getNoWrapFlags());
-
       if (0 == Map.count(L))
-        return Res;
+        return SE.getAddRecExpr(Operands, L, Expr->getNoWrapFlags());
 
-      const SCEVAddRecExpr *Rec = cast<SCEVAddRecExpr>(Res);
-      return Rec->evaluateAtIteration(Map[L], SE);
+      return SCEVAddRecExpr::evaluateAtIteration(Operands, Map[L], SE);
     }
 
   private:
diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h
index 81a2533152de..27c58c0afa8a 100644
--- a/llvm/include/llvm/Analysis/SparsePropagation.h
+++ b/llvm/include/llvm/Analysis/SparsePropagation.h
@@ -470,8 +470,7 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::Solve() {
   while (!BBWorkList.empty() || !ValueWorkList.empty()) {
     // Process the value work list.
     while (!ValueWorkList.empty()) {
-      Value *V = ValueWorkList.back();
-      ValueWorkList.pop_back();
+      Value *V = ValueWorkList.pop_back_val();
 
       LLVM_DEBUG(dbgs() << "\nPopped off V-WL: " << *V << "\n");
 
diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
index 9838d629e93e..92459ea79ab4 100644
--- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
-#define LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
+#ifndef LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
+#define LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
@@ -89,4 +89,4 @@ private:
 
 } // namespace llvm
 
-#endif // LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
+#endif // LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
index 358f757314ee..f9bac739cee6 100644
--- a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
+++ b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
-#define LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
+#ifndef LLVM_ANALYSIS_SYNTHETICCOUNTSUTILS_H
+#define LLVM_ANALYSIS_SYNTHETICCOUNTSUTILS_H
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/CallGraph.h"
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index defc95d0062a..ded53617b304 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -329,6 +329,12 @@ TLI_DEFINE_STRING_INTERNAL("__isoc99_scanf")
 /// int __isoc99_sscanf(const char *s, const char *format, ...)
 TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_sscanf)
 TLI_DEFINE_STRING_INTERNAL("__isoc99_sscanf")
+/// void __kmpc_alloc_shared(size_t nbyte);
+TLI_DEFINE_ENUM_INTERNAL(__kmpc_alloc_shared)
+TLI_DEFINE_STRING_INTERNAL("__kmpc_alloc_shared")
+/// void __kmpc_free_shared(void *ptr, size_t nbyte);
+TLI_DEFINE_ENUM_INTERNAL(__kmpc_free_shared)
+TLI_DEFINE_STRING_INTERNAL("__kmpc_free_shared")
 /// double __log10_finite(double x);
 TLI_DEFINE_ENUM_INTERNAL(log10_finite)
 TLI_DEFINE_STRING_INTERNAL("__log10_finite")
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 34a8a1e3407c..22bfeda0efd0 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -28,7 +28,7 @@ class Triple;
 struct VecDesc {
   StringRef ScalarFnName;
   StringRef VectorFnName;
-  unsigned VectorizationFactor;
+  ElementCount VectorizationFactor;
 };
 
   enum LibFunc : unsigned {
@@ -52,6 +52,7 @@ class TargetLibraryInfoImpl {
   llvm::DenseMap<unsigned, std::string> CustomNames;
   static StringLiteral const StandardNames[NumLibFuncs];
   bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param;
+  unsigned SizeOfInt;
 
   enum AvailabilityState {
     StandardName = 3, // (memset to all ones)
@@ -86,11 +87,12 @@ public:
   /// addVectorizableFunctionsFromVecLib for filling up the tables of
   /// vectorizable functions.
   enum VectorLibrary {
-    NoLibrary,  // Don't use any vector library.
-    Accelerate, // Use Accelerate framework.
-    LIBMVEC_X86,// GLIBC Vector Math library.
-    MASSV,      // IBM MASS vector library.
-    SVML        // Intel short vector math library.
+    NoLibrary,        // Don't use any vector library.
+    Accelerate,       // Use Accelerate framework.
+    DarwinLibSystemM, // Use Darwin's libsystem_m.
+    LIBMVEC_X86,      // GLIBC Vector Math library.
+    MASSV,            // IBM MASS vector library.
+    SVML              // Intel short vector math library.
   };
 
   TargetLibraryInfoImpl();
@@ -152,7 +154,7 @@ public:
 
   /// Return true if the function F has a vector equivalent with vectorization
   /// factor VF.
-  bool isFunctionVectorizable(StringRef F, unsigned VF) const {
+  bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
     return !getVectorizedFunction(F, VF).empty();
   }
 
@@ -162,19 +164,7 @@ public:
 
   /// Return the name of the equivalent of F, vectorized with factor VF. If no
   /// such mapping exists, return the empty string.
-  StringRef getVectorizedFunction(StringRef F, unsigned VF) const;
-
-  /// Return true if the function F has a scalar equivalent, and set VF to be
-  /// the vectorization factor.
-  bool isFunctionScalarizable(StringRef F, unsigned &VF) const {
-    return !getScalarizedFunction(F, VF).empty();
-  }
-
-  /// Return the name of the equivalent of F, scalarized. If no such mapping
-  /// exists, return the empty string.
-  ///
-  /// Set VF to the vectorization factor.
-  StringRef getScalarizedFunction(StringRef F, unsigned &VF) const;
+  StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const;
 
   /// Set to true iff i32 parameters to library functions should have signext
   /// or zeroext attributes if they correspond to C-level int or unsigned int,
@@ -200,9 +190,25 @@ public:
   /// This queries the 'wchar_size' metadata.
   unsigned getWCharSize(const Module &M) const;
 
+  /// Get size of a C-level int or unsigned int, in bits.
+  unsigned getIntSize() const {
+    return SizeOfInt;
+  }
+
+  /// Initialize the C-level size of an integer.
+  void setIntSize(unsigned Bits) {
+    SizeOfInt = Bits;
+  }
+
   /// Returns the largest vectorization factor used in the list of
   /// vector functions.
-  unsigned getWidestVF(StringRef ScalarF) const;
+  void getWidestVF(StringRef ScalarF, ElementCount &FixedVF,
+                   ElementCount &Scalable) const;
+
+  /// Returns true if call site / callee has cdecl-compatible calling
+  /// conventions.
+  static bool isCallingConvCCompatible(CallBase *CI);
+  static bool isCallingConvCCompatible(Function *Callee);
 };
 
 /// Provides information about what library functions are available for
@@ -317,13 +323,13 @@ public:
   bool has(LibFunc F) const {
     return getState(F) != TargetLibraryInfoImpl::Unavailable;
   }
-  bool isFunctionVectorizable(StringRef F, unsigned VF) const {
+  bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
     return Impl->isFunctionVectorizable(F, VF);
   }
   bool isFunctionVectorizable(StringRef F) const {
     return Impl->isFunctionVectorizable(F);
   }
-  StringRef getVectorizedFunction(StringRef F, unsigned VF) const {
+  StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
     return Impl->getVectorizedFunction(F, VF);
   }
 
@@ -395,6 +401,11 @@ public:
     return Impl->getWCharSize(M);
   }
 
+  /// \copydoc TargetLibraryInfoImpl::getIntSize()
+  unsigned getIntSize() const {
+    return Impl->getIntSize();
+  }
+
   /// Handle invalidation from the pass manager.
   ///
   /// If we try to invalidate this info, just return false. It cannot become
@@ -409,8 +420,9 @@ public:
   }
   /// Returns the largest vectorization factor used in the list of
   /// vector functions.
-  unsigned getWidestVF(StringRef ScalarF) const {
-    return Impl->getWidestVF(ScalarF);
+  void getWidestVF(StringRef ScalarF, ElementCount &FixedVF,
+                   ElementCount &ScalableVF) const {
+    Impl->getWidestVF(ScalarF, FixedVF, ScalableVF);
   }
 
   /// Check if the function "F" is listed in a library known to LLVM.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index cdfb04424e56..628058142e48 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -21,11 +21,13 @@
 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
 
+#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/InstructionCost.h"
 #include <functional>
@@ -59,6 +61,7 @@ class TargetLibraryInfo;
 class Type;
 class User;
 class Value;
+class VPIntrinsic;
 struct KnownBits;
 template <typename T> class Optional;
 
@@ -118,46 +121,34 @@ class IntrinsicCostAttributes {
   SmallVector<Type *, 4> ParamTys;
   SmallVector<const Value *, 4> Arguments;
   FastMathFlags FMF;
-  ElementCount VF = ElementCount::getFixed(1);
   // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
   // arguments and the return value will be computed based on types.
-  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
+  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
 
 public:
-  IntrinsicCostAttributes(const IntrinsicInst &I);
+  IntrinsicCostAttributes(
+      Intrinsic::ID Id, const CallBase &CI,
+      InstructionCost ScalarCost = InstructionCost::getInvalid());
 
-  IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
-
-  IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
-                          ElementCount Factor);
-
-  IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
-                          ElementCount Factor, unsigned ScalarCost);
-
-  IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
-                          ArrayRef<Type *> Tys, FastMathFlags Flags);
-
-  IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
-                          ArrayRef<Type *> Tys, FastMathFlags Flags,
-                          unsigned ScalarCost);
-
-  IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
-                          ArrayRef<Type *> Tys, FastMathFlags Flags,
-                          unsigned ScalarCost,
-                          const IntrinsicInst *I);
-
-  IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
-                          ArrayRef<Type *> Tys);
+  IntrinsicCostAttributes(
+      Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
+      FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
+      InstructionCost ScalarCost = InstructionCost::getInvalid());
 
   IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
                           ArrayRef<const Value *> Args);
 
+  IntrinsicCostAttributes(
+      Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
+      ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
+      const IntrinsicInst *I = nullptr,
+      InstructionCost ScalarCost = InstructionCost::getInvalid());
+
   Intrinsic::ID getID() const { return IID; }
   const IntrinsicInst *getInst() const { return II; }
   Type *getReturnType() const { return RetTy; }
-  ElementCount getVectorFactor() const { return VF; }
   FastMathFlags getFlags() const { return FMF; }
-  unsigned getScalarizationCost() const { return ScalarizationCost; }
+  InstructionCost getScalarizationCost() const { return ScalarizationCost; }
   const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
   const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
 
@@ -165,9 +156,7 @@ public:
     return Arguments.empty();
   }
 
-  bool skipScalarizationCost() const {
-    return ScalarizationCost != std::numeric_limits<unsigned>::max();
-  }
+  bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
 };
 
 class TargetTransformInfo;
@@ -247,8 +236,6 @@ public:
       Cost = getUserCost(I, kind);
       break;
     }
-    if (Cost == -1)
-      Cost.setInvalid();
     return Cost;
   }
 
@@ -277,9 +264,10 @@ public:
   };
 
   /// Estimate the cost of a GEP operation when lowered.
-  int getGEPCost(Type *PointeeType, const Value *Ptr,
-                 ArrayRef<const Value *> Operands,
-                 TargetCostKind CostKind = TCK_SizeAndLatency) const;
+  InstructionCost
+  getGEPCost(Type *PointeeType, const Value *Ptr,
+             ArrayRef<const Value *> Operands,
+             TargetCostKind CostKind = TCK_SizeAndLatency) const;
 
   /// \returns A value by which our inlining threshold should be multiplied.
   /// This is primarily used to bump up the inlining threshold wholesale on
@@ -306,7 +294,7 @@ public:
 
   /// \return the expected cost of a memcpy, which could e.g. depend on the
   /// source/destination type and alignment and the number of bytes copied.
-  int getMemcpyCost(const Instruction *I) const;
+  InstructionCost getMemcpyCost(const Instruction *I) const;
 
   /// \return The estimated number of case clusters when lowering \p 'SI'.
   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
@@ -329,16 +317,20 @@ public:
   ///
   /// The returned cost is defined in terms of \c TargetCostConstants, see its
   /// comments for a detailed explanation of the cost values.
-  int getUserCost(const User *U, ArrayRef<const Value *> Operands,
-                  TargetCostKind CostKind) const;
+  InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+                              TargetCostKind CostKind) const;
 
   /// This is a helper function which calls the two-argument getUserCost
   /// with \p Operands which are the current operands U has.
-  int getUserCost(const User *U, TargetCostKind CostKind) const {
+  InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const {
     SmallVector<const Value *, 4> Operands(U->operand_values());
     return getUserCost(U, Operands, CostKind);
   }
 
+  /// If a branch or a select condition is skewed in one direction by more than
+  /// this factor, it is very likely to be predicted correctly.
+  BranchProbability getPredictableBranchThreshold() const;
+
   /// Return true if branch divergence exists.
   ///
   /// Branch divergence has a significantly negative impact on GPU performance
@@ -638,13 +630,15 @@ public:
                   DominatorTree *DT, AssumptionCache *AC,
                   TargetLibraryInfo *LibInfo) const;
 
-  /// \return True is LSR should make efforts to create/preserve post-inc
-  /// addressing mode expressions.
-  bool shouldFavorPostInc() const;
+  enum AddressingModeKind {
+    AMK_PreIndexed,
+    AMK_PostIndexed,
+    AMK_None
+  };
 
-  /// Return true if LSR should make efforts to generate indexed addressing
-  /// modes that operate across loop iterations.
-  bool shouldFavorBackedgeIndex(const Loop *L) const;
+  /// Return the preferred addressing mode LSR should make efforts to generate.
+  AddressingModeKind getPreferredAddressingMode(const Loop *L,
+                                                ScalarEvolution *SE) const;
 
   /// Return true if the target supports masked store.
   bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
@@ -689,9 +683,10 @@ public:
   /// If the AM is supported, the return value must be >= 0.
   /// If the AM is not supported, it returns a negative value.
   /// TODO: Handle pre/postinc as well.
-  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
-                           bool HasBaseReg, int64_t Scale,
-                           unsigned AddrSpace = 0) const;
+  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                       int64_t BaseOffset, bool HasBaseReg,
+                                       int64_t Scale,
+                                       unsigned AddrSpace = 0) const;
 
   /// Return true if the loop strength reduce pass should make
   /// Instruction* based TTI queries to isLegalAddressingMode(). This is
@@ -714,7 +709,7 @@ public:
   bool isTypeLegal(Type *Ty) const;
 
   /// Returns the estimated number of registers required to represent \p Ty.
-  unsigned getRegUsageForType(Type *Ty) const;
+  InstructionCost getRegUsageForType(Type *Ty) const;
 
   /// Return true if switches should be turned into lookup tables for the
   /// target.
@@ -724,6 +719,9 @@ public:
   /// containing this constant value for the target.
   bool shouldBuildLookupTablesForConstant(Constant *C) const;
 
+  /// Return true if lookup tables should be turned into relative lookup tables.
+  bool shouldBuildRelLookupTables() const;
+
   /// Return true if the input function which is cold at all call sites,
   ///  should use coldcc calling convention.
   bool useColdCCForColdCall(Function &F) const;
@@ -731,14 +729,15 @@ public:
   /// Estimate the overhead of scalarizing an instruction. Insert and Extract
   /// are set if the demanded result elements need to be inserted and/or
   /// extracted from vectors.
-  unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
-                                    bool Insert, bool Extract) const;
+  InstructionCost getScalarizationOverhead(VectorType *Ty,
+                                           const APInt &DemandedElts,
+                                           bool Insert, bool Extract) const;
 
   /// Estimate the overhead of scalarizing an instructions unique
-  /// non-constant operands. The types of the arguments are ordinarily
-  /// scalar, in which case the costs are multiplied with VF.
-  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                            unsigned VF) const;
+  /// non-constant operands. The (potentially vector) types to use for each of
+  /// argument are passes via Tys.
+  InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                                   ArrayRef<Type *> Tys) const;
 
   /// If target has efficient vector element load/store instructions, it can
   /// return true here so that insertion/extraction costs are not added to
@@ -798,7 +797,7 @@ public:
   /// Determine if the target supports unaligned memory accesses.
   bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
                                       unsigned AddressSpace = 0,
-                                      unsigned Alignment = 1,
+                                      Align Alignment = Align(1),
                                       bool *Fast = nullptr) const;
 
   /// Return hardware support for population count.
@@ -815,20 +814,23 @@ public:
 
   /// Return the expected cost of supporting the floating point operation
   /// of the specified type.
-  int getFPOpCost(Type *Ty) const;
+  InstructionCost getFPOpCost(Type *Ty) const;
 
   /// Return the expected cost of materializing for the given integer
   /// immediate of the specified type.
-  int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const;
+  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
+                                TargetCostKind CostKind) const;
 
   /// Return the expected cost of materialization for the given integer
   /// immediate of the specified type for a given instruction. The cost can be
   /// zero if the immediate can be folded into the specified instruction.
-  int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
-                        TargetCostKind CostKind,
-                        Instruction *Inst = nullptr) const;
-  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                          Type *Ty, TargetCostKind CostKind) const;
+  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
+                                    const APInt &Imm, Type *Ty,
+                                    TargetCostKind CostKind,
+                                    Instruction *Inst = nullptr) const;
+  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+                                      const APInt &Imm, Type *Ty,
+                                      TargetCostKind CostKind) const;
 
   /// Return the expected cost for the given integer when optimising
   /// for size. This is different than the other integer immediate cost
@@ -837,8 +839,8 @@ public:
   /// with another such as Thumb. This return value is used as a penalty when
   /// the total costs for a constant is calculated (the bigger the cost, the
   /// more beneficial constant hoisting is).
-  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
-                            Type *Ty) const;
+  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
+                                        const APInt &Imm, Type *Ty) const;
   /// @}
 
   /// \name Vector Target Information
@@ -856,44 +858,13 @@ public:
     SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
     SK_PermuteTwoSrc,    ///< Merge elements from two source vectors into one
                          ///< with any shuffle mask.
-    SK_PermuteSingleSrc  ///< Shuffle elements of single source vector with any
+    SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
                          ///< shuffle mask.
+    SK_Splice            ///< Concatenates elements from the first input vector
+                         ///< with elements of the second input vector. Returning
+                         ///< a vector of the same type as the input vectors.
   };
 
-  /// Kind of the reduction data.
-  enum ReductionKind {
-    RK_None,           /// Not a reduction.
-    RK_Arithmetic,     /// Binary reduction data.
-    RK_MinMax,         /// Min/max reduction data.
-    RK_UnsignedMinMax, /// Unsigned min/max reduction data.
-  };
-
-  /// Contains opcode + LHS/RHS parts of the reduction operations.
-  struct ReductionData {
-    ReductionData() = delete;
-    ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
-        : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
-      assert(Kind != RK_None && "expected binary or min/max reduction only.");
-    }
-    unsigned Opcode = 0;
-    Value *LHS = nullptr;
-    Value *RHS = nullptr;
-    ReductionKind Kind = RK_None;
-    bool hasSameData(ReductionData &RD) const {
-      return Kind == RD.Kind && Opcode == RD.Opcode;
-    }
-  };
-
-  static ReductionKind matchPairwiseReduction(
-    const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
-
-  static ReductionKind matchVectorSplittingReduction(
-    const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
-
-  static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot,
-                                            unsigned &Opcode, VectorType *&Ty,
-                                            bool &IsPairwise);
-
   /// Additional information about an operand's possible values.
   enum OperandValueKind {
     OK_AnyValue,               // Operand can have any value.
@@ -924,8 +895,10 @@ public:
   /// \return the target-provided register class name
   const char *getRegisterClassName(unsigned ClassID) const;
 
+  enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };
+
   /// \return The width of the largest scalar or vector register type.
-  unsigned getRegisterBitWidth(bool Vector) const;
+  TypeSize getRegisterBitWidth(RegisterKind K) const;
 
   /// \return The width of the smallest vector register type.
   unsigned getMinVectorRegisterBitWidth() const;
@@ -940,12 +913,13 @@ public:
   /// creating vectors that span multiple vector registers.
   /// If false, the vectorization factor will be chosen based on the
   /// size of the widest element type.
-  bool shouldMaximizeVectorBandwidth(bool OptSize) const;
+  bool shouldMaximizeVectorBandwidth() const;
 
   /// \return The minimum vectorization factor for types of given element
   /// bit width, or 0 if there is no minimum VF. The returned value only
   /// applies when shouldMaximizeVectorBandwidth returns true.
-  unsigned getMinimumVF(unsigned ElemWidth) const;
+  /// If IsScalable is true, the returned ElementCount must be a scalable VF.
+  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
 
   /// \return The maximum vectorization factor for types of given element
   /// bit width and opcode, or 0 if there is no maximum VF.
@@ -1036,7 +1010,7 @@ public:
   /// cases or optimizations based on those values.
   /// \p CxtI is the optional original context instruction, if one exists, to
   /// provide even more information.
-  int getArithmeticInstrCost(
+  InstructionCost getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
       OperandValueKind Opd1Info = OK_AnyValue,
@@ -1047,12 +1021,14 @@ public:
       const Instruction *CxtI = nullptr) const;
 
   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
+  /// The exact mask may be passed as Mask, or else the array will be empty.
   /// The index and subtype parameters are used by the subvector insertion and
   /// extraction shuffle kinds to show the insert/extract point and the type of
   /// the subvector being inserted/extracted.
   /// NOTE: For subvector extractions Tp represents the source type.
-  int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0,
-                     VectorType *SubTp = nullptr) const;
+  InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
+                                 ArrayRef<int> Mask = None, int Index = 0,
+                                 VectorType *SubTp = nullptr) const;
 
   /// Represents a hint about the context in which a cast is used.
   ///
@@ -1093,44 +1069,50 @@ public:
   /// \return The expected cost of cast instructions, such as bitcast, trunc,
   /// zext, etc. If there is an existing instruction that holds Opcode, it
   /// may be passed in the 'I' parameter.
-  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
-                       TTI::CastContextHint CCH,
-                       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
-                       const Instruction *I = nullptr) const;
+  InstructionCost
+  getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                   TTI::CastContextHint CCH,
+                   TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+                   const Instruction *I = nullptr) const;
 
   /// \return The expected cost of a sign- or zero-extended vector extract. Use
   /// -1 to indicate that there is no information about the index value.
-  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
-                               unsigned Index = -1) const;
+  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+                                           VectorType *VecTy,
+                                           unsigned Index = -1) const;
 
   /// \return The expected cost of control-flow related instructions such as
-  /// Phi, Ret, Br.
-  int getCFInstrCost(unsigned Opcode,
-                     TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
+  /// Phi, Ret, Br, Switch.
+  InstructionCost
+  getCFInstrCost(unsigned Opcode,
+                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+                 const Instruction *I = nullptr) const;
 
   /// \returns The expected cost of compare and select instructions. If there
   /// is an existing instruction that holds Opcode, it may be passed in the
   /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
   /// is using a compare with the specified predicate as condition. When vector
   /// types are passed, \p VecPred must be used for all lanes.
-  int getCmpSelInstrCost(
-      unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
-      CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
-      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
-      const Instruction *I = nullptr) const;
+  InstructionCost
+  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
+                     CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
+                     TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+                     const Instruction *I = nullptr) const;
 
   /// \return The expected cost of vector Insert and Extract.
   /// Use -1 to indicate that there is no information on the index value.
-  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     unsigned Index = -1) const;
 
   /// \return The cost of Load and Store instructions.
-  int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
-                      unsigned AddressSpace,
-                      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
-                      const Instruction *I = nullptr) const;
+  InstructionCost
+  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+                  unsigned AddressSpace,
+                  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+                  const Instruction *I = nullptr) const;
 
   /// \return The cost of masked Load and Store instructions.
-  int getMaskedMemoryOpCost(
+  InstructionCost getMaskedMemoryOpCost(
       unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
 
@@ -1143,7 +1125,7 @@ public:
   /// \p Alignment - alignment of single element
   /// \p I - the optional original context instruction, if one exists, e.g. the
   ///        load/store to transform or the call to the gather/scatter intrinsic
-  int getGatherScatterOpCost(
+  InstructionCost getGatherScatterOpCost(
       unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
       Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
       const Instruction *I = nullptr) const;
@@ -1158,32 +1140,49 @@ public:
   /// \p AddressSpace is address space of the pointer.
   /// \p UseMaskForCond indicates if the memory access is predicated.
   /// \p UseMaskForGaps indicates if gaps should be masked.
-  int getInterleavedMemoryOpCost(
+  InstructionCost getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
       Align Alignment, unsigned AddressSpace,
       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
       bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
 
-  /// Calculate the cost of performing a vector reduction.
+  /// A helper function to determine the type of reduction algorithm used
+  /// for a given \p Opcode and set of FastMathFlags \p FMF.
+  static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) {
+    return FMF != None && !(*FMF).allowReassoc();
+  }
+
+  /// Calculate the cost of vector reduction intrinsics.
   ///
   /// This is the cost of reducing the vector value of type \p Ty to a scalar
-  /// value using the operation denoted by \p Opcode. The form of the reduction
-  /// can either be a pairwise reduction or a reduction that splits the vector
-  /// at every reduction level.
+  /// value using the operation denoted by \p Opcode. The FastMathFlags
+  /// parameter \p FMF indicates what type of reduction we are performing:
+  ///   1. Tree-wise. This is the typical 'fast' reduction performed that
+  ///   involves successively splitting a vector into half and doing the
+  ///   operation on the pair of halves until you have a scalar value. For
+  ///   example:
+  ///     (v0, v1, v2, v3)
+  ///     ((v0+v2), (v1+v3), undef, undef)
+  ///     ((v0+v2+v1+v3), undef, undef, undef)
+  ///   This is the default behaviour for integer operations, whereas for
+  ///   floating point we only do this if \p FMF indicates that
+  ///   reassociation is allowed.
+  ///   2. Ordered. For a vector with N elements this involves performing N
+  ///   operations in lane order, starting with an initial scalar value, i.e.
+  ///     result = InitVal + v0
+  ///     result = result + v1
+  ///     result = result + v2
+  ///     result = result + v3
+  ///   This is only the case for FP operations and when reassociation is not
+  ///   allowed.
   ///
-  /// Pairwise:
-  ///  (v0, v1, v2, v3)
-  ///  ((v0+v1), (v2+v3), undef, undef)
-  /// Split:
-  ///  (v0, v1, v2, v3)
-  ///  ((v0+v2), (v1+v3), undef, undef)
-  int getArithmeticReductionCost(
-    unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,
-    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
-
-  int getMinMaxReductionCost(
-    VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
-    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+  InstructionCost getArithmeticReductionCost(
+      unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
+      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+
+  InstructionCost getMinMaxReductionCost(
+      VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
+      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
 
   /// Calculate the cost of an extended reduction pattern, similar to
   /// getArithmeticReductionCost of an Add reduction with an extension and
@@ -1198,12 +1197,13 @@ public:
   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
   /// Three cases are handled: 1. scalar instruction 2. vector instruction
   /// 3. scalar instruction which is to be vectorized.
-  int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
-                            TTI::TargetCostKind CostKind) const;
+  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                                        TTI::TargetCostKind CostKind) const;
 
   /// \returns The cost of Call instructions.
-  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
-                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
+  InstructionCost getCallInstrCost(
+      Function *F, Type *RetTy, ArrayRef<Type *> Tys,
+      TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
 
   /// \returns The number of pieces into which the provided type must be
   /// split during legalization. Zero is returned when the answer is unknown.
@@ -1216,15 +1216,16 @@ public:
   /// The 'SE' parameter holds pointer for the scalar evolution object which
   /// is used in order to get the Ptr step value in case of constant stride.
   /// The 'Ptr' parameter holds SCEV of the access pointer.
-  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
-                                const SCEV *Ptr = nullptr) const;
+  InstructionCost getAddressComputationCost(Type *Ty,
+                                            ScalarEvolution *SE = nullptr,
+                                            const SCEV *Ptr = nullptr) const;
 
   /// \returns The cost, if any, of keeping values of the given types alive
   /// over a callsite.
   ///
   /// Some types may require the use of register classes that do not have
   /// any callee-saved registers, so would require a spill and fill.
-  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
+  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
 
   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
   /// will contain additional information - whether the intrinsic may write
@@ -1305,6 +1306,13 @@ public:
   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
                                     unsigned AddrSpace) const;
 
+  /// \returns True if it is legal to vectorize the given reduction kind.
+  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
+                                   ElementCount VF) const;
+
+  /// \returns True if the given type is supported for scalable vectors
+  bool isElementTypeLegalForScalableVector(Type *Ty) const;
+
   /// \returns The new vector factor value if the target doesn't support \p
   /// SizeInBytes loads or has a better vector factor.
   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@@ -1325,11 +1333,6 @@ public:
     bool NoNaN;    ///< If op is an fp min/max, whether NaNs may be present.
   };
 
-  /// \returns True if the target wants to handle the given reduction idiom in
-  /// the intrinsics form instead of the shuffle form.
-  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
-                             ReductionFlags Flags) const;
-
   /// \returns True if the target prefers reductions in loop.
   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
                              ReductionFlags Flags) const;
@@ -1366,6 +1369,38 @@ public:
   /// Intrinsics") Use of %evl is discouraged when that is not the case.
   bool hasActiveVectorLength() const;
 
+  struct VPLegalization {
+    enum VPTransform {
+      // keep the predicating parameter
+      Legal = 0,
+      // where legal, discard the predicate parameter
+      Discard = 1,
+      // transform into something else that is also predicating
+      Convert = 2
+    };
+
+    // How to transform the EVL parameter.
+    // Legal:   keep the EVL parameter as it is.
+    // Discard: Ignore the EVL parameter where it is safe to do so.
+    // Convert: Fold the EVL into the mask parameter.
+    VPTransform EVLParamStrategy;
+
+    // How to transform the operator.
+    // Legal:   The target supports this operator.
+    // Convert: Convert this to a non-VP operation.
+    // The 'Discard' strategy is invalid.
+    VPTransform OpStrategy;
+
+    bool shouldDoNothing() const {
+      return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
+    }
+    VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
+        : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
+  };
+
+  /// \returns How the target needs this vector-predicated operation to be
+  /// transformed.
+  VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
   /// @}
 
   /// @}
@@ -1373,11 +1408,11 @@ public:
 private:
   /// Estimate the latency of specified instruction.
   /// Returns 1 as the default value.
-  int getInstructionLatency(const Instruction *I) const;
+  InstructionCost getInstructionLatency(const Instruction *I) const;
 
   /// Returns the expected throughput cost of the instruction.
   /// Returns -1 if the cost is unknown.
-  int getInstructionThroughput(const Instruction *I) const;
+  InstructionCost getInstructionThroughput(const Instruction *I) const;
 
   /// The abstract base class used to type erase specific TTI
   /// implementations.
@@ -1394,19 +1429,21 @@ class TargetTransformInfo::Concept {
 public:
   virtual ~Concept() = 0;
   virtual const DataLayout &getDataLayout() const = 0;
-  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
-                         ArrayRef<const Value *> Operands,
-                         TTI::TargetCostKind CostKind) = 0;
+  virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
+                                     ArrayRef<const Value *> Operands,
+                                     TTI::TargetCostKind CostKind) = 0;
   virtual unsigned getInliningThresholdMultiplier() = 0;
   virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
   virtual int getInlinerVectorBonusPercent() = 0;
-  virtual int getMemcpyCost(const Instruction *I) = 0;
+  virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
   virtual unsigned
   getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
                                    ProfileSummaryInfo *PSI,
                                    BlockFrequencyInfo *BFI) = 0;
-  virtual int getUserCost(const User *U, ArrayRef<const Value *> Operands,
-                          TargetCostKind CostKind) = 0;
+  virtual InstructionCost getUserCost(const User *U,
+                                      ArrayRef<const Value *> Operands,
+                                      TargetCostKind CostKind) = 0;
+  virtual BranchProbability getPredictableBranchThreshold() = 0;
   virtual bool hasBranchDivergence() = 0;
   virtual bool useGPUDivergenceAnalysis() = 0;
   virtual bool isSourceOfDivergence(const Value *V) = 0;
@@ -1458,8 +1495,8 @@ public:
   virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
                           LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
                           TargetLibraryInfo *LibInfo) = 0;
-  virtual bool shouldFavorPostInc() const = 0;
-  virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
+  virtual AddressingModeKind
+    getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
   virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
   virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
   virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
@@ -1471,24 +1508,27 @@ public:
   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
   virtual bool prefersVectorizedAddressing() = 0;
-  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
-                                   int64_t BaseOffset, bool HasBaseReg,
-                                   int64_t Scale, unsigned AddrSpace) = 0;
+  virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                               int64_t BaseOffset,
+                                               bool HasBaseReg, int64_t Scale,
+                                               unsigned AddrSpace) = 0;
   virtual bool LSRWithInstrQueries() = 0;
   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
   virtual bool isProfitableToHoist(Instruction *I) = 0;
   virtual bool useAA() = 0;
   virtual bool isTypeLegal(Type *Ty) = 0;
-  virtual unsigned getRegUsageForType(Type *Ty) = 0;
+  virtual InstructionCost getRegUsageForType(Type *Ty) = 0;
   virtual bool shouldBuildLookupTables() = 0;
   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
+  virtual bool shouldBuildRelLookupTables() = 0;
   virtual bool useColdCCForColdCall(Function &F) = 0;
-  virtual unsigned getScalarizationOverhead(VectorType *Ty,
-                                            const APInt &DemandedElts,
-                                            bool Insert, bool Extract) = 0;
-  virtual unsigned
+  virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
+                                                   const APInt &DemandedElts,
+                                                   bool Insert,
+                                                   bool Extract) = 0;
+  virtual InstructionCost
   getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                   unsigned VF) = 0;
+                                   ArrayRef<Type *> Tys) = 0;
   virtual bool supportsEfficientVectorElementLoadStore() = 0;
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
   virtual MemCmpExpansionOptions
@@ -1499,31 +1539,33 @@ public:
   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
                                               unsigned BitWidth,
                                               unsigned AddressSpace,
-                                              unsigned Alignment,
+                                              Align Alignment,
                                               bool *Fast) = 0;
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
   virtual bool haveFastSqrt(Type *Ty) = 0;
   virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
-  virtual int getFPOpCost(Type *Ty) = 0;
-  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
-                                    const APInt &Imm, Type *Ty) = 0;
-  virtual int getIntImmCost(const APInt &Imm, Type *Ty,
-                            TargetCostKind CostKind) = 0;
-  virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
-                                Type *Ty, TargetCostKind CostKind,
-                                Instruction *Inst = nullptr) = 0;
-  virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-                                  const APInt &Imm, Type *Ty,
-                                  TargetCostKind CostKind) = 0;
+  virtual InstructionCost getFPOpCost(Type *Ty) = 0;
+  virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
+                                                const APInt &Imm, Type *Ty) = 0;
+  virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
+                                        TargetCostKind CostKind) = 0;
+  virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
+                                            const APInt &Imm, Type *Ty,
+                                            TargetCostKind CostKind,
+                                            Instruction *Inst = nullptr) = 0;
+  virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+                                              const APInt &Imm, Type *Ty,
+                                              TargetCostKind CostKind) = 0;
   virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
   virtual unsigned getRegisterClassForType(bool Vector,
                                            Type *Ty = nullptr) const = 0;
   virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
-  virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
-  virtual unsigned getMinVectorRegisterBitWidth() = 0;
+  virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
+  virtual unsigned getMinVectorRegisterBitWidth() const = 0;
   virtual Optional<unsigned> getMaxVScale() const = 0;
-  virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
-  virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
+  virtual bool shouldMaximizeVectorBandwidth() const = 0;
+  virtual ElementCount getMinimumVF(unsigned ElemWidth,
+                                    bool IsScalable) const = 0;
   virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
   virtual bool shouldConsiderAddressTypePromotion(
       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
@@ -1556,64 +1598,71 @@ public:
   virtual bool enableWritePrefetching() const = 0;
 
   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
-  virtual unsigned getArithmeticInstrCost(
-      unsigned Opcode, Type *Ty,
-      TTI::TargetCostKind CostKind,
-      OperandValueKind Opd1Info,
-      OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
-      OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
-      const Instruction *CxtI = nullptr) = 0;
-  virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
-                             VectorType *SubTp) = 0;
-  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
-                               CastContextHint CCH,
-                               TTI::TargetCostKind CostKind,
-                               const Instruction *I) = 0;
-  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
-                                       VectorType *VecTy, unsigned Index) = 0;
-  virtual int getCFInstrCost(unsigned Opcode,
-                             TTI::TargetCostKind CostKind) = 0;
-  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
-                                 CmpInst::Predicate VecPred,
-                                 TTI::TargetCostKind CostKind,
-                                 const Instruction *I) = 0;
-  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
-                                 unsigned Index) = 0;
-  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
-                              unsigned AddressSpace,
-                              TTI::TargetCostKind CostKind,
-                              const Instruction *I) = 0;
-  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
-                                    unsigned AddressSpace,
-                                    TTI::TargetCostKind CostKind) = 0;
-  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
-                                     const Value *Ptr, bool VariableMask,
-                                     Align Alignment,
-                                     TTI::TargetCostKind CostKind,
-                                     const Instruction *I = nullptr) = 0;
+  virtual InstructionCost getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+      OperandValueKind Opd1Info, OperandValueKind Opd2Info,
+      OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
+      ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
+  virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
+                                         ArrayRef<int> Mask, int Index,
+                                         VectorType *SubTp) = 0;
+  virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
+                                           Type *Src, CastContextHint CCH,
+                                           TTI::TargetCostKind CostKind,
+                                           const Instruction *I) = 0;
+  virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+                                                   VectorType *VecTy,
+                                                   unsigned Index) = 0;
+  virtual InstructionCost getCFInstrCost(unsigned Opcode,
+                                         TTI::TargetCostKind CostKind,
+                                         const Instruction *I = nullptr) = 0;
+  virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                             Type *CondTy,
+                                             CmpInst::Predicate VecPred,
+                                             TTI::TargetCostKind CostKind,
+                                             const Instruction *I) = 0;
+  virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                             unsigned Index) = 0;
+  virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
+                                          Align Alignment,
+                                          unsigned AddressSpace,
+                                          TTI::TargetCostKind CostKind,
+                                          const Instruction *I) = 0;
+  virtual InstructionCost
+  getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+                        unsigned AddressSpace,
+                        TTI::TargetCostKind CostKind) = 0;
+  virtual InstructionCost
+  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+                         bool VariableMask, Align Alignment,
+                         TTI::TargetCostKind CostKind,
+                         const Instruction *I = nullptr) = 0;
 
-  virtual int getInterleavedMemoryOpCost(
+  virtual InstructionCost getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
       bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
-  virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                         bool IsPairwiseForm,
-                                         TTI::TargetCostKind CostKind) = 0;
-  virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
-                                     bool IsPairwiseForm, bool IsUnsigned,
-                                     TTI::TargetCostKind CostKind) = 0;
+  virtual InstructionCost
+  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+                             Optional<FastMathFlags> FMF,
+                             TTI::TargetCostKind CostKind) = 0;
+  virtual InstructionCost
+  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
+                         TTI::TargetCostKind CostKind) = 0;
   virtual InstructionCost getExtendedAddReductionCost(
       bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
-  virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
-                                    TTI::TargetCostKind CostKind) = 0;
-  virtual int getCallInstrCost(Function *F, Type *RetTy,
-                               ArrayRef<Type *> Tys,
-                               TTI::TargetCostKind CostKind) = 0;
+  virtual InstructionCost
+  getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                        TTI::TargetCostKind CostKind) = 0;
+  virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
+                                           ArrayRef<Type *> Tys,
+                                           TTI::TargetCostKind CostKind) = 0;
   virtual unsigned getNumberOfParts(Type *Tp) = 0;
-  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
-                                        const SCEV *Ptr) = 0;
-  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
+  virtual InstructionCost
+  getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
+  virtual InstructionCost
+  getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
                                   MemIntrinsicInfo &Info) = 0;
   virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
@@ -1644,14 +1693,15 @@ public:
   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
                                             Align Alignment,
                                             unsigned AddrSpace) const = 0;
+  virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
+                                           ElementCount VF) const = 0;
+  virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
                                        unsigned ChainSizeInBytes,
                                        VectorType *VecTy) const = 0;
   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
                                         unsigned ChainSizeInBytes,
                                         VectorType *VecTy) const = 0;
-  virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
-                                     ReductionFlags) const = 0;
   virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
                                      ReductionFlags) const = 0;
   virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
@@ -1660,7 +1710,9 @@ public:
   virtual unsigned getGISelRematGlobalCost() const = 0;
   virtual bool supportsScalableVectors() const = 0;
   virtual bool hasActiveVectorLength() const = 0;
-  virtual int getInstructionLatency(const Instruction *I) = 0;
+  virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
+  virtual VPLegalization
+  getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
 };
 
 template <typename T>
@@ -1675,9 +1727,10 @@ public:
     return Impl.getDataLayout();
   }
 
-  int getGEPCost(Type *PointeeType, const Value *Ptr,
-                 ArrayRef<const Value *> Operands,
-                 enum TargetTransformInfo::TargetCostKind CostKind) override {
+  InstructionCost
+  getGEPCost(Type *PointeeType, const Value *Ptr,
+             ArrayRef<const Value *> Operands,
+             enum TargetTransformInfo::TargetCostKind CostKind) override {
     return Impl.getGEPCost(PointeeType, Ptr, Operands);
   }
   unsigned getInliningThresholdMultiplier() override {
@@ -1689,13 +1742,16 @@ public:
   int getInlinerVectorBonusPercent() override {
     return Impl.getInlinerVectorBonusPercent();
   }
-  int getMemcpyCost(const Instruction *I) override {
+  InstructionCost getMemcpyCost(const Instruction *I) override {
     return Impl.getMemcpyCost(I);
   }
-  int getUserCost(const User *U, ArrayRef<const Value *> Operands,
-                  TargetCostKind CostKind) override {
+  InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+                              TargetCostKind CostKind) override {
     return Impl.getUserCost(U, Operands, CostKind);
   }
+  BranchProbability getPredictableBranchThreshold() override {
+    return Impl.getPredictableBranchThreshold();
+  }
   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
   bool useGPUDivergenceAnalysis() override {
     return Impl.useGPUDivergenceAnalysis();
@@ -1801,9 +1857,10 @@ public:
                   TargetLibraryInfo *LibInfo) override {
     return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
   }
-  bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); }
-  bool shouldFavorBackedgeIndex(const Loop *L) const override {
-    return Impl.shouldFavorBackedgeIndex(L);
+  AddressingModeKind
+    getPreferredAddressingMode(const Loop *L,
+                               ScalarEvolution *SE) const override {
+    return Impl.getPreferredAddressingMode(L, SE);
   }
   bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
     return Impl.isLegalMaskedStore(DataType, Alignment);
@@ -1838,9 +1895,10 @@ public:
   bool prefersVectorizedAddressing() override {
     return Impl.prefersVectorizedAddressing();
   }
-  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
-                           bool HasBaseReg, int64_t Scale,
-                           unsigned AddrSpace) override {
+  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                       int64_t BaseOffset, bool HasBaseReg,
+                                       int64_t Scale,
+                                       unsigned AddrSpace) override {
     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
                                      AddrSpace);
   }
@@ -1853,7 +1911,7 @@ public:
   }
   bool useAA() override { return Impl.useAA(); }
   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
-  unsigned getRegUsageForType(Type *Ty) override {
+  InstructionCost getRegUsageForType(Type *Ty) override {
     return Impl.getRegUsageForType(Ty);
   }
   bool shouldBuildLookupTables() override {
@@ -1862,17 +1920,22 @@ public:
   bool shouldBuildLookupTablesForConstant(Constant *C) override {
     return Impl.shouldBuildLookupTablesForConstant(C);
   }
+  bool shouldBuildRelLookupTables() override {
+    return Impl.shouldBuildRelLookupTables();
+  }
   bool useColdCCForColdCall(Function &F) override {
     return Impl.useColdCCForColdCall(F);
   }
 
-  unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
-                                    bool Insert, bool Extract) override {
+  InstructionCost getScalarizationOverhead(VectorType *Ty,
+                                           const APInt &DemandedElts,
+                                           bool Insert, bool Extract) override {
     return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
   }
-  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                            unsigned VF) override {
-    return Impl.getOperandsScalarizationOverhead(Args, VF);
+  InstructionCost
+  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                   ArrayRef<Type *> Tys) override {
+    return Impl.getOperandsScalarizationOverhead(Args, Tys);
   }
 
   bool supportsEfficientVectorElementLoadStore() override {
@@ -1896,7 +1959,7 @@ public:
     return Impl.isFPVectorizationPotentiallyUnsafe();
   }
   bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
-                                      unsigned AddressSpace, unsigned Alignment,
+                                      unsigned AddressSpace, Align Alignment,
                                       bool *Fast) override {
     return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
                                                Alignment, Fast);
@@ -1910,23 +1973,27 @@ public:
     return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
   }
 
-  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
+  InstructionCost getFPOpCost(Type *Ty) override {
+    return Impl.getFPOpCost(Ty);
+  }
 
-  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
-                            Type *Ty) override {
+  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
+                                        const APInt &Imm, Type *Ty) override {
     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
   }
-  int getIntImmCost(const APInt &Imm, Type *Ty,
-                    TargetCostKind CostKind) override {
+  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
+                                TargetCostKind CostKind) override {
     return Impl.getIntImmCost(Imm, Ty, CostKind);
   }
-  int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
-                        TargetCostKind CostKind,
-                        Instruction *Inst = nullptr) override {
+  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
+                                    const APInt &Imm, Type *Ty,
+                                    TargetCostKind CostKind,
+                                    Instruction *Inst = nullptr) override {
     return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
   }
-  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                          Type *Ty, TargetCostKind CostKind) override {
+  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+                                      const APInt &Imm, Type *Ty,
+                                      TargetCostKind CostKind) override {
     return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
   }
   unsigned getNumberOfRegisters(unsigned ClassID) const override {
@@ -1939,20 +2006,21 @@ public:
   const char *getRegisterClassName(unsigned ClassID) const override {
     return Impl.getRegisterClassName(ClassID);
   }
-  unsigned getRegisterBitWidth(bool Vector) const override {
-    return Impl.getRegisterBitWidth(Vector);
+  TypeSize getRegisterBitWidth(RegisterKind K) const override {
+    return Impl.getRegisterBitWidth(K);
   }
-  unsigned getMinVectorRegisterBitWidth() override {
+  unsigned getMinVectorRegisterBitWidth() const override {
     return Impl.getMinVectorRegisterBitWidth();
   }
   Optional<unsigned> getMaxVScale() const override {
     return Impl.getMaxVScale();
   }
-  bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
-    return Impl.shouldMaximizeVectorBandwidth(OptSize);
+  bool shouldMaximizeVectorBandwidth() const override {
+    return Impl.shouldMaximizeVectorBandwidth();
   }
-  unsigned getMinimumVF(unsigned ElemWidth) const override {
-    return Impl.getMinimumVF(ElemWidth);
+  ElementCount getMinimumVF(unsigned ElemWidth,
+                            bool IsScalable) const override {
+    return Impl.getMinimumVF(ElemWidth, IsScalable);
   }
   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
     return Impl.getMaximumVF(ElemWidth, Opcode);
@@ -2008,82 +2076,84 @@ public:
                                             BlockFrequencyInfo *BFI) override {
     return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
   }
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                  TTI::TargetCostKind CostKind,
-                                  OperandValueKind Opd1Info,
-                                  OperandValueKind Opd2Info,
-                                  OperandValueProperties Opd1PropInfo,
-                                  OperandValueProperties Opd2PropInfo,
-                                  ArrayRef<const Value *> Args,
-                                  const Instruction *CxtI = nullptr) override {
+  InstructionCost getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+      OperandValueKind Opd1Info, OperandValueKind Opd2Info,
+      OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
+      ArrayRef<const Value *> Args,
+      const Instruction *CxtI = nullptr) override {
     return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
                                        Opd1PropInfo, Opd2PropInfo, Args, CxtI);
   }
-  int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
-                     VectorType *SubTp) override {
-    return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
+  InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
+                                 ArrayRef<int> Mask, int Index,
+                                 VectorType *SubTp) override {
+    return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
   }
-  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
-                       CastContextHint CCH, TTI::TargetCostKind CostKind,
-                       const Instruction *I) override {
+  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                                   CastContextHint CCH,
+                                   TTI::TargetCostKind CostKind,
+                                   const Instruction *I) override {
     return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
   }
-  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
-                               unsigned Index) override {
+  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+                                           VectorType *VecTy,
+                                           unsigned Index) override {
     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
   }
-  int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override {
-    return Impl.getCFInstrCost(Opcode, CostKind);
+  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+                                 const Instruction *I = nullptr) override {
+    return Impl.getCFInstrCost(Opcode, CostKind, I);
   }
-  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
-                         CmpInst::Predicate VecPred,
-                         TTI::TargetCostKind CostKind,
-                         const Instruction *I) override {
+  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                                     CmpInst::Predicate VecPred,
+                                     TTI::TargetCostKind CostKind,
+                                     const Instruction *I) override {
     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
   }
-  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     unsigned Index) override {
     return Impl.getVectorInstrCost(Opcode, Val, Index);
   }
-  int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
-                      unsigned AddressSpace, TTI::TargetCostKind CostKind,
-                      const Instruction *I) override {
+  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+                                  unsigned AddressSpace,
+                                  TTI::TargetCostKind CostKind,
+                                  const Instruction *I) override {
     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                 CostKind, I);
   }
-  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
-                            unsigned AddressSpace,
-                            TTI::TargetCostKind CostKind) override {
+  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+                                        Align Alignment, unsigned AddressSpace,
+                                        TTI::TargetCostKind CostKind) override {
     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                       CostKind);
   }
-  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
-                             bool VariableMask, Align Alignment,
-                             TTI::TargetCostKind CostKind,
-                             const Instruction *I = nullptr) override {
+  InstructionCost
+  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+                         bool VariableMask, Align Alignment,
+                         TTI::TargetCostKind CostKind,
+                         const Instruction *I = nullptr) override {
     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
                                        Alignment, CostKind, I);
   }
-  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
-                                 ArrayRef<unsigned> Indices, Align Alignment,
-                                 unsigned AddressSpace,
-                                 TTI::TargetCostKind CostKind,
-                                 bool UseMaskForCond,
-                                 bool UseMaskForGaps) override {
+  InstructionCost getInterleavedMemoryOpCost(
+      unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+      Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+      bool UseMaskForCond, bool UseMaskForGaps) override {
     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
                                            Alignment, AddressSpace, CostKind,
                                            UseMaskForCond, UseMaskForGaps);
   }
-  int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                 bool IsPairwiseForm,
-                                 TTI::TargetCostKind CostKind) override {
-    return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
-                                           CostKind);
-  }
-  int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
-                             bool IsPairwiseForm, bool IsUnsigned,
+  InstructionCost
+  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+                             Optional<FastMathFlags> FMF,
                              TTI::TargetCostKind CostKind) override {
-    return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
-                                       CostKind);
+    return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
+  }
+  InstructionCost
+  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
+                         TTI::TargetCostKind CostKind) override {
+    return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
   }
   InstructionCost getExtendedAddReductionCost(
       bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
@@ -2091,23 +2161,23 @@ public:
     return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
                                             CostKind);
   }
-  int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
-                            TTI::TargetCostKind CostKind) override {
+  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                                        TTI::TargetCostKind CostKind) override {
     return Impl.getIntrinsicInstrCost(ICA, CostKind);
   }
-  int getCallInstrCost(Function *F, Type *RetTy,
-                       ArrayRef<Type *> Tys,
-                       TTI::TargetCostKind CostKind) override {
+  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
+                                   ArrayRef<Type *> Tys,
+                                   TTI::TargetCostKind CostKind) override {
     return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
   }
   unsigned getNumberOfParts(Type *Tp) override {
     return Impl.getNumberOfParts(Tp);
   }
-  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
-                                const SCEV *Ptr) override {
+  InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+                                            const SCEV *Ptr) override {
     return Impl.getAddressComputationCost(Ty, SE, Ptr);
   }
-  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
+  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
     return Impl.getCostOfKeepingLiveOverCall(Tys);
   }
   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
@@ -2170,6 +2240,13 @@ public:
     return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
                                              AddrSpace);
   }
+  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
+                                   ElementCount VF) const override {
+    return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
+  }
+  bool isElementTypeLegalForScalableVector(Type *Ty) const override {
+    return Impl.isElementTypeLegalForScalableVector(Ty);
+  }
   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
                                unsigned ChainSizeInBytes,
                                VectorType *VecTy) const override {
@@ -2180,10 +2257,6 @@ public:
                                 VectorType *VecTy) const override {
     return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
   }
-  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
-                             ReductionFlags Flags) const override {
-    return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
-  }
   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
                              ReductionFlags Flags) const override {
     return Impl.preferInLoopReduction(Opcode, Ty, Flags);
@@ -2208,9 +2281,14 @@ public:
     return Impl.hasActiveVectorLength();
   }
 
-  int getInstructionLatency(const Instruction *I) override {
+  InstructionCost getInstructionLatency(const Instruction *I) override {
     return Impl.getInstructionLatency(I);
   }
+
+  VPLegalization
+  getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
+    return Impl.getVPLegalizationStrategy(PI);
+  }
 };
 
 template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 7e31cb365a87..c07a33c9f155 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -22,8 +22,11 @@
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 
+using namespace llvm::PatternMatch;
+
 namespace llvm {
 
 /// Base class for use as a mix-in that aids implementing
@@ -44,9 +47,10 @@ public:
 
   const DataLayout &getDataLayout() const { return DL; }
 
-  int getGEPCost(Type *PointeeType, const Value *Ptr,
-                 ArrayRef<const Value *> Operands,
-                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const {
+  InstructionCost
+  getGEPCost(Type *PointeeType, const Value *Ptr,
+             ArrayRef<const Value *> Operands,
+             TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const {
     // In the basic model, we just assume that all-constant GEPs will be folded
     // into their uses via addressing modes.
     for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@@ -71,10 +75,20 @@ public:
 
   int getInlinerVectorBonusPercent() const { return 150; }
 
-  unsigned getMemcpyCost(const Instruction *I) const {
+  InstructionCost getMemcpyCost(const Instruction *I) const {
     return TTI::TCC_Expensive;
   }
 
+  // Although this default value is arbitrary, it is not random. It is assumed
+  // that a condition that evaluates the same way by a higher percentage than
+  // this is best represented as control flow. Therefore, the default value N
+  // should be set such that the win from N% correct executions is greater than
+  // the loss from (100 - N)% mispredicted executions for the majority of
+  //  intended targets.
+  BranchProbability getPredictableBranchThreshold() const {
+    return BranchProbability(99, 100);
+  }
+
   bool hasBranchDivergence() const { return false; }
 
   bool useGPUDivergenceAnalysis() const { return false; }
@@ -209,9 +223,10 @@ public:
     return false;
   }
 
-  bool shouldFavorPostInc() const { return false; }
-
-  bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
+  TTI::AddressingModeKind
+    getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const {
+    return TTI::AMK_None;
+  }
 
   bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
     return false;
@@ -255,9 +270,10 @@ public:
 
   bool prefersVectorizedAddressing() const { return true; }
 
-  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
-                           bool HasBaseReg, int64_t Scale,
-                           unsigned AddrSpace) const {
+  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                       int64_t BaseOffset, bool HasBaseReg,
+                                       int64_t Scale,
+                                       unsigned AddrSpace) const {
     // Guess that all legal addressing mode are free.
     if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
                               AddrSpace))
@@ -275,20 +291,24 @@ public:
 
   bool isTypeLegal(Type *Ty) const { return false; }
 
-  unsigned getRegUsageForType(Type *Ty) const { return 1; }
+  InstructionCost getRegUsageForType(Type *Ty) const { return 1; }
 
   bool shouldBuildLookupTables() const { return true; }
+
   bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
 
+  bool shouldBuildRelLookupTables() const { return false; }
+
   bool useColdCCForColdCall(Function &F) const { return false; }
 
-  unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
-                                    bool Insert, bool Extract) const {
+  InstructionCost getScalarizationOverhead(VectorType *Ty,
+                                           const APInt &DemandedElts,
+                                           bool Insert, bool Extract) const {
     return 0;
   }
 
-  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                            unsigned VF) const {
+  InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                                   ArrayRef<Type *> Tys) const {
     return 0;
   }
 
@@ -310,7 +330,7 @@ public:
   bool isFPVectorizationPotentiallyUnsafe() const { return false; }
 
   bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
-                                      unsigned AddressSpace, unsigned Alignment,
+                                      unsigned AddressSpace, Align Alignment,
                                       bool *Fast) const {
     return false;
   }
@@ -323,29 +343,30 @@ public:
 
   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
 
-  unsigned getFPOpCost(Type *Ty) const {
+  InstructionCost getFPOpCost(Type *Ty) const {
     return TargetTransformInfo::TCC_Basic;
   }
 
-  int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                            Type *Ty) const {
+  InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
+                                        const APInt &Imm, Type *Ty) const {
     return 0;
   }
 
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty,
-                         TTI::TargetCostKind CostKind) const {
+  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
+                                TTI::TargetCostKind CostKind) const {
     return TTI::TCC_Basic;
   }
 
-  unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                             Type *Ty, TTI::TargetCostKind CostKind,
-                             Instruction *Inst = nullptr) const {
+  InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
+                                    const APInt &Imm, Type *Ty,
+                                    TTI::TargetCostKind CostKind,
+                                    Instruction *Inst = nullptr) const {
     return TTI::TCC_Free;
   }
 
-  unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
-                               const APInt &Imm, Type *Ty,
-                               TTI::TargetCostKind CostKind) const {
+  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+                                      const APInt &Imm, Type *Ty,
+                                      TTI::TargetCostKind CostKind) const {
     return TTI::TCC_Free;
   }
 
@@ -366,15 +387,19 @@ public:
     }
   }
 
-  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
+  TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
+    return TypeSize::getFixed(32);
+  }
 
   unsigned getMinVectorRegisterBitWidth() const { return 128; }
 
   Optional<unsigned> getMaxVScale() const { return None; }
 
-  bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
+  bool shouldMaximizeVectorBandwidth() const { return false; }
 
-  unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
+  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
+    return ElementCount::get(0, IsScalable);
+  }
 
   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
 
@@ -420,14 +445,12 @@ public:
 
   unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
 
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                  TTI::TargetCostKind CostKind,
-                                  TTI::OperandValueKind Opd1Info,
-                                  TTI::OperandValueKind Opd2Info,
-                                  TTI::OperandValueProperties Opd1PropInfo,
-                                  TTI::OperandValueProperties Opd2PropInfo,
-                                  ArrayRef<const Value *> Args,
-                                  const Instruction *CxtI = nullptr) const {
+  InstructionCost getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+      TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
+      TTI::OperandValueProperties Opd1PropInfo,
+      TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+      const Instruction *CxtI = nullptr) const {
     // FIXME: A number of transformation tests seem to require these values
     // which seems a little odd for how arbitary there are.
     switch (Opcode) {
@@ -445,15 +468,16 @@ public:
     return 1;
   }
 
-  unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index,
-                          VectorType *SubTp) const {
+  InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
+                                 ArrayRef<int> Mask, int Index,
+                                 VectorType *SubTp) const {
     return 1;
   }
 
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
-                            TTI::CastContextHint CCH,
-                            TTI::TargetCostKind CostKind,
-                            const Instruction *I) const {
+  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                                   TTI::CastContextHint CCH,
+                                   TTI::TargetCostKind CostKind,
+                                   const Instruction *I) const {
     switch (Opcode) {
     default:
       break;
@@ -488,12 +512,14 @@ public:
     return 1;
   }
 
-  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
-                                    VectorType *VecTy, unsigned Index) const {
+  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+                                           VectorType *VecTy,
+                                           unsigned Index) const {
     return 1;
   }
 
-  unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const {
+  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+                                 const Instruction *I = nullptr) const {
     // A phi would be free, unless we're costing the throughput because it
     // will require a register.
     if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
@@ -501,34 +527,36 @@ public:
     return 1;
   }
 
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
-                              CmpInst::Predicate VecPred,
-                              TTI::TargetCostKind CostKind,
-                              const Instruction *I) const {
+  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                                     CmpInst::Predicate VecPred,
+                                     TTI::TargetCostKind CostKind,
+                                     const Instruction *I) const {
     return 1;
   }
 
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                              unsigned Index) const {
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     unsigned Index) const {
     return 1;
   }
 
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
-                           unsigned AddressSpace, TTI::TargetCostKind CostKind,
-                           const Instruction *I) const {
+  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+                                  unsigned AddressSpace,
+                                  TTI::TargetCostKind CostKind,
+                                  const Instruction *I) const {
     return 1;
   }
 
-  unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
-                                 unsigned AddressSpace,
-                                 TTI::TargetCostKind CostKind) const {
+  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+                                        Align Alignment, unsigned AddressSpace,
+                                        TTI::TargetCostKind CostKind) const {
     return 1;
   }
 
-  unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
-                                  const Value *Ptr, bool VariableMask,
-                                  Align Alignment, TTI::TargetCostKind CostKind,
-                                  const Instruction *I = nullptr) const {
+  InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+                                         const Value *Ptr, bool VariableMask,
+                                         Align Alignment,
+                                         TTI::TargetCostKind CostKind,
+                                         const Instruction *I = nullptr) const {
     return 1;
   }
 
@@ -539,8 +567,8 @@ public:
     return 1;
   }
 
-  unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
-                                 TTI::TargetCostKind CostKind) const {
+  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                                        TTI::TargetCostKind CostKind) const {
     switch (ICA.getID()) {
     default:
       break;
@@ -548,6 +576,7 @@ public:
     case Intrinsic::assume:
     case Intrinsic::sideeffect:
     case Intrinsic::pseudoprobe:
+    case Intrinsic::arithmetic_fence:
     case Intrinsic::dbg_declare:
     case Intrinsic::dbg_value:
     case Intrinsic::dbg_label:
@@ -579,25 +608,27 @@ public:
     return 1;
   }
 
-  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
-                            TTI::TargetCostKind CostKind) const {
+  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
+                                   ArrayRef<Type *> Tys,
+                                   TTI::TargetCostKind CostKind) const {
     return 1;
   }
 
   unsigned getNumberOfParts(Type *Tp) const { return 0; }
 
-  unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
-                                     const SCEV *) const {
+  InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
+                                            const SCEV *) const {
     return 0;
   }
 
-  unsigned getArithmeticReductionCost(unsigned, VectorType *, bool,
-                                      TTI::TargetCostKind) const {
+  InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
+                                             Optional<FastMathFlags> FMF,
+                                             TTI::TargetCostKind) const {
     return 1;
   }
 
-  unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool,
-                                  TTI::TargetCostKind) const {
+  InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool,
+                                         TTI::TargetCostKind) const {
     return 1;
   }
 
@@ -607,7 +638,7 @@ public:
     return 1;
   }
 
-  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
+  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
     return 0;
   }
 
@@ -686,6 +717,13 @@ public:
     return true;
   }
 
+  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
+                                   ElementCount VF) const {
+    return true;
+  }
+
+  bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
+
   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
                                unsigned ChainSizeInBytes,
                                VectorType *VecTy) const {
@@ -698,11 +736,6 @@ public:
     return VF;
   }
 
-  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
-                             TTI::ReductionFlags Flags) const {
-    return false;
-  }
-
   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
                              TTI::ReductionFlags Flags) const {
     return false;
@@ -721,6 +754,13 @@ public:
 
   bool hasActiveVectorLength() const { return false; }
 
+  TargetTransformInfo::VPLegalization
+  getVPLegalizationStrategy(const VPIntrinsic &PI) const {
+    return TargetTransformInfo::VPLegalization(
+        /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
+        /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
+  }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
@@ -816,13 +856,13 @@ protected:
 public:
   using BaseT::getGEPCost;
 
-  int getGEPCost(Type *PointeeType, const Value *Ptr,
-                 ArrayRef<const Value *> Operands,
-                 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
+  InstructionCost
+  getGEPCost(Type *PointeeType, const Value *Ptr,
+             ArrayRef<const Value *> Operands,
+             TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
     assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
-    // TODO: will remove this when pointers have an opaque type.
-    assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
-               PointeeType &&
+    assert(cast<PointerType>(Ptr->getType()->getScalarType())
+               ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
            "explicit pointee type doesn't match operand's pointee type");
     auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
     bool HasBaseReg = (BaseGV == nullptr);
@@ -880,8 +920,8 @@ public:
     return TTI::TCC_Basic;
   }
 
-  int getUserCost(const User *U, ArrayRef<const Value *> Operands,
-                  TTI::TargetCostKind CostKind) {
+  InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+                              TTI::TargetCostKind CostKind) {
     auto *TargetTTI = static_cast<T *>(this);
     // Handle non-intrinsic calls, invokes, and callbr.
     // FIXME: Unlikely to be true for anything but CodeSize.
@@ -914,7 +954,8 @@ public:
     case Instruction::Br:
     case Instruction::Ret:
     case Instruction::PHI:
-      return TargetTTI->getCFInstrCost(Opcode, CostKind);
+    case Instruction::Switch:
+      return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
     case Instruction::ExtractValue:
     case Instruction::Freeze:
       return TTI::TCC_Free;
@@ -987,6 +1028,23 @@ public:
                                         CostKind, I);
     }
     case Instruction::Select: {
+      const Value *Op0, *Op1;
+      if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
+          match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
+        // select x, y, false --> x & y
+        // select x, true, y --> x | y
+        TTI::OperandValueProperties Op1VP = TTI::OP_None;
+        TTI::OperandValueProperties Op2VP = TTI::OP_None;
+        TTI::OperandValueKind Op1VK = TTI::getOperandInfo(Op0, Op1VP);
+        TTI::OperandValueKind Op2VK = TTI::getOperandInfo(Op1, Op2VP);
+        assert(Op0->getType()->getScalarSizeInBits() == 1 &&
+               Op1->getType()->getScalarSizeInBits() == 1);
+
+        SmallVector<const Value *, 2> Operands{Op0, Op1};
+        return TargetTTI->getArithmeticInstrCost(
+            match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
+            CostKind, Op1VK, Op2VK, Op1VP, Op2VP, Operands, I);
+      }
       Type *CondTy = U->getOperand(0)->getType();
       return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
                                            CmpInst::BAD_ICMP_PREDICATE,
@@ -1020,25 +1078,30 @@ public:
       int SubIndex;
       if (Shuffle->isExtractSubvectorMask(SubIndex))
         return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
-                                         SubIndex, VecTy);
+                                         Shuffle->getShuffleMask(), SubIndex,
+                                         VecTy);
       else if (Shuffle->changesLength())
         return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
       else if (Shuffle->isIdentity())
         return 0;
       else if (Shuffle->isReverse())
-        return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 0, nullptr);
+        return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
+                                         Shuffle->getShuffleMask(), 0, nullptr);
       else if (Shuffle->isSelect())
-        return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 0, nullptr);
+        return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
+                                         Shuffle->getShuffleMask(), 0, nullptr);
       else if (Shuffle->isTranspose())
-        return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 0, nullptr);
+        return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
+                                         Shuffle->getShuffleMask(), 0, nullptr);
       else if (Shuffle->isZeroEltSplat())
-        return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 0, nullptr);
+        return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
+                                         Shuffle->getShuffleMask(), 0, nullptr);
       else if (Shuffle->isSingleSource())
-        return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 0,
-                                         nullptr);
+        return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
+                                         Shuffle->getShuffleMask(), 0, nullptr);
 
-      return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 0,
-                                       nullptr);
+      return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
+                                       Shuffle->getShuffleMask(), 0, nullptr);
     }
     case Instruction::ExtractElement: {
       unsigned Idx = -1;
@@ -1050,26 +1113,6 @@ public:
       if (CI)
         Idx = CI->getZExtValue();
 
-      // Try to match a reduction (a series of shufflevector and vector ops
-      // followed by an extractelement).
-      unsigned RdxOpcode;
-      VectorType *RdxType;
-      bool IsPairwise;
-      switch (TTI::matchVectorReduction(EEI, RdxOpcode, RdxType, IsPairwise)) {
-      case TTI::RK_Arithmetic:
-        return TargetTTI->getArithmeticReductionCost(RdxOpcode, RdxType,
-                                                     IsPairwise, CostKind);
-      case TTI::RK_MinMax:
-        return TargetTTI->getMinMaxReductionCost(
-            RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)),
-            IsPairwise, /*IsUnsigned=*/false, CostKind);
-      case TTI::RK_UnsignedMinMax:
-        return TargetTTI->getMinMaxReductionCost(
-            RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)),
-            IsPairwise, /*IsUnsigned=*/true, CostKind);
-      case TTI::RK_None:
-        break;
-      }
       return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(),
                                            Idx);
     }
@@ -1078,7 +1121,7 @@ public:
     return TTI::TCC_Basic;
   }
 
-  int getInstructionLatency(const Instruction *I) {
+  InstructionCost getInstructionLatency(const Instruction *I) {
     SmallVector<const Value *, 4> Operands(I->operand_values());
     if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free)
       return 0;
diff --git a/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h b/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h
index d02bcd0e335b..45ef4dbe2155 100644
--- a/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h
+++ b/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h
@@ -9,13 +9,13 @@
 // ThinLTO.
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
-#define LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
+#ifndef LLVM_ANALYSIS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
+#define LLVM_ANALYSIS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include <string>
+#include <memory>
 #include <vector>
 
 namespace llvm {
@@ -109,4 +109,4 @@ enum class InlinerFunctionImportStatsOpts {
 
 } // llvm
 
-#endif // LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
+#endif // LLVM_ANALYSIS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
diff --git a/llvm/include/llvm/Analysis/Utils/Local.h b/llvm/include/llvm/Analysis/Utils/Local.h
index bd82b34165d6..031938c6f9c7 100644
--- a/llvm/include/llvm/Analysis/Utils/Local.h
+++ b/llvm/include/llvm/Analysis/Utils/Local.h
@@ -100,4 +100,4 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
 
 }
 
-#endif // LLVM_TRANSFORMS_UTILS_LOCAL_H
+#endif // LLVM_ANALYSIS_UTILS_LOCAL_H
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
index ea6bc2cf19ee..47ee23e06000 100644
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -12,6 +12,7 @@
 #include "llvm/Config/llvm-config.h"
 
 #ifdef LLVM_HAVE_TF_API
+#include "llvm/ADT/StringMap.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/JSON.h"
 
@@ -120,56 +121,62 @@ loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
 /// The assumption is that, for an event to be logged (i.e. a set of feature
 /// values and a reward), the user calls the log* API for each feature exactly
 /// once, providing the index matching the position in the feature spec list
-/// provided at construction:
+/// provided at construction. The example assumes the first feature's element
+/// type is float, the second is int64, and the reward is float:
+///
 /// event 0:
-///   logTensorValue(0, ...)
-///   logTensorValue(1, ...)
+///   logFloatValue(0, ...)
+///   logInt64Value(1, ...)
 ///   ...
-///   logReward(...)
+///   logFloatReward(...)
 /// event 1:
-///   logTensorValue(0, ...)
-///   logTensorValue(1, ...)
+///   logFloatValue(0, ...)
+///   logInt64Value(1, ...)
 ///   ...
-///   logReward(...)
+///   logFloatReward(...)
 ///
 /// At the end, call print to generate the protobuf.
+/// Alternatively, don't call logReward at the end of each event, just
+/// log{Float|Int32|Int64}FinalReward at the end.
+class LoggerDataImpl;
 class Logger final {
 public:
-  /// Construct a Logger. If IncludeReward is false, then logReward shouldn't
-  /// be called, and the reward feature won't be printed out.
+  /// Construct a Logger. If IncludeReward is false, then logReward or
+  /// logFinalReward shouldn't be called, and the reward feature won't be
+  /// printed out.
   Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
-         const TensorSpec &RewardSpec, bool IncludeReward)
-      : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
-        RawLogData(FeatureSpecs.size() + IncludeReward),
-        IncludeReward(IncludeReward) {}
-
-  template <typename T> void logReward(T Value) {
-    assert(IncludeReward);
-    logTensorValue(RawLogData.size() - 1, &Value);
-  }
+         const TensorSpec &RewardSpec, bool IncludeReward);
 
-  template <typename T> void logFinalReward(T Value) {
-    assert(RawLogData.back().empty());
-    logReward(Value);
-  }
+  ~Logger();
 
-  template <typename T>
-  void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) {
-    const char *Start = reinterpret_cast<const char *>(Value);
-    const char *End = Start + sizeof(T) * Size;
-    RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End);
-  }
+  void logFloatReward(float Value);
+  void logInt32Reward(int32_t Value);
+  void logInt64Reward(int64_t Value);
+
+  void logFloatFinalReward(float Value);
+  void logInt32FinalReward(int32_t Value);
+  void logInt64FinalReward(int64_t Value);
+
+  void logFloatValue(size_t FeatureID, const float *Value);
+  void logInt32Value(size_t FeatureID, const int32_t *Value);
+  void logInt64Value(size_t FeatureID, const int64_t *Value);
+
+  void logSpecifiedTensorValue(size_t FeatureID, const char *RawData);
+
+  // Warning! For int32_t, the return is set up for int64_t, so the caller needs
+  // to piecemeal cast their int32_t values.
+  // FIXME: let's drop int32_t support. While it's supported by evaluator, it's
+  // not supported by the tensorflow::SequenceExample proto. For small values,
+  // we can consider using bytes.
+  char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID);
 
   void print(raw_ostream &OS);
 
 private:
   std::vector<LoggedFeatureSpec> FeatureSpecs;
   TensorSpec RewardSpec;
-  /// RawData has one entry per feature, plus one more for the reward.
-  /// Each feature's values are then stored in a vector, in succession.
-  /// This means the ith event is stored at [*][i]
-  std::vector<std::vector<char>> RawLogData;
   const bool IncludeReward;
+  std::unique_ptr<LoggerDataImpl> LoggerData;
 };
 
 class TFModelEvaluator final {
diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h
index 108d08033ac3..1b32fca50697 100644
--- a/llvm/include/llvm/Analysis/ValueLattice.h
+++ b/llvm/include/llvm/Analysis/ValueLattice.h
@@ -17,13 +17,13 @@
 //                               ValueLatticeElement
 //===----------------------------------------------------------------------===//
 
+namespace llvm {
+
 /// This class represents lattice values for constants.
 ///
 /// FIXME: This is basically just for bringup, this can be made a lot more rich
 /// in the future.
 ///
-
-namespace llvm {
 class ValueLatticeElement {
   enum ValueLatticeElementTy {
     /// This Value has no known value yet.  As a result, this implies the
@@ -474,11 +474,9 @@ public:
 
     const auto &CR = getConstantRange();
     const auto &OtherCR = Other.getConstantRange();
-    if (ConstantRange::makeSatisfyingICmpRegion(Pred, OtherCR).contains(CR))
+    if (CR.icmp(Pred, OtherCR))
       return ConstantInt::getTrue(Ty);
-    if (ConstantRange::makeSatisfyingICmpRegion(
-            CmpInst::getInversePredicate(Pred), OtherCR)
-            .contains(CR))
+    if (CR.icmp(CmpInst::getInversePredicate(Pred), OtherCR))
       return ConstantInt::getFalse(Ty);
 
     return nullptr;
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 86c0991451c5..90ec742f18e6 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -370,10 +370,11 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
   /// that the returned value has pointer type if the specified value does. If
   /// the MaxLookup value is non-zero, it limits the number of instructions to
   /// be stripped off.
-  Value *getUnderlyingObject(Value *V, unsigned MaxLookup = 6);
-  inline const Value *getUnderlyingObject(const Value *V,
-                                          unsigned MaxLookup = 6) {
-    return getUnderlyingObject(const_cast<Value *>(V), MaxLookup);
+  const Value *getUnderlyingObject(const Value *V, unsigned MaxLookup = 6);
+  inline Value *getUnderlyingObject(Value *V, unsigned MaxLookup = 6) {
+    // Force const to avoid infinite recursion.
+    const Value *VConst = V;
+    return const_cast<Value *>(getUnderlyingObject(VConst, MaxLookup));
   }
 
   /// This method is similar to getUnderlyingObject except that it can
@@ -460,7 +461,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
   /// for such instructions, moving them may change the resulting value.
   bool isSafeToSpeculativelyExecute(const Value *V,
                                     const Instruction *CtxI = nullptr,
-                                    const DominatorTree *DT = nullptr);
+                                    const DominatorTree *DT = nullptr,
+                                    const TargetLibraryInfo *TLI = nullptr);
 
   /// Returns true if the result or effects of the given instructions \p I
   /// depend on or influence global memory.
@@ -582,6 +584,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
   /// poison.
   /// Formally, given I = `r = op v1 v2 .. vN`, propagatesPoison returns true
   /// if, for all i, r is evaluated to poison or op raises UB if vi = poison.
+  /// If vi is a vector or an aggregate and r is a single value, any poison
+  /// element in vi should make r poison or raise UB.
   /// To filter out operands that raise UB on poison, you can use
   /// getGuaranteedNonPoisonOp.
   bool propagatesPoison(const Operator *I);
@@ -590,6 +594,11 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
   /// if I is executed and that operand has a poison value.
   void getGuaranteedNonPoisonOps(const Instruction *I,
                                  SmallPtrSetImpl<const Value *> &Ops);
+  /// Insert operands of I into Ops such that I will trigger undefined behavior
+  /// if I is executed and that operand is not a well-defined value
+  /// (i.e. has undef bits or poison).
+  void getGuaranteedWellDefinedOps(const Instruction *I,
+                                   SmallPtrSetImpl<const Value *> &Ops);
 
   /// Return true if the given instruction must trigger undefined behavior
   /// when I is executed with any operands which appear in KnownPoison holding
@@ -729,6 +738,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
   /// For example, signed minimum is the inverse of signed maximum.
   SelectPatternFlavor getInverseMinMaxFlavor(SelectPatternFlavor SPF);
 
+  Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID);
+
   /// Return the canonical inverse comparison predicate for the specified
   /// minimum/maximum flavor.
   CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF);
@@ -741,6 +752,37 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
   std::pair<Intrinsic::ID, bool>
   canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL);
 
+  /// Attempt to match a simple first order recurrence cycle of the form:
+  ///   %iv = phi Ty [%Start, %Entry], [%Inc, %backedge]
+  ///   %inc = binop %iv, %step
+  /// OR
+  ///   %iv = phi Ty [%Start, %Entry], [%Inc, %backedge]
+  ///   %inc = binop %step, %iv
+  ///
+  /// A first order recurrence is a formula with the form: X_n = f(X_(n-1))
+  ///
+  /// A couple of notes on subtleties in that definition:
+  /// * The Step does not have to be loop invariant.  In math terms, it can
+  ///   be a free variable.  We allow recurrences with both constant and
+  ///   variable coefficients. Callers may wish to filter cases where Step
+  ///   does not dominate P.
+  /// * For non-commutative operators, we will match both forms.  This
+  ///   results in some odd recurrence structures.  Callers may wish to filter
+  ///   out recurrences where the phi is not the LHS of the returned operator.
+  /// * Because of the structure matched, the caller can assume as a post
+  ///   condition of the match the presence of a Loop with P's parent as it's
+  ///   header *except* in unreachable code.  (Dominance decays in unreachable
+  ///   code.)
+  ///
+  /// NOTE: This is intentional simple.  If you want the ability to analyze
+  /// non-trivial loop conditons, see ScalarEvolution instead.
+  bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
+                             Value *&Start, Value *&Step);
+
+  /// Analogous to the above, but starting from the binary operator
+  bool matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P,
+                                    Value *&Start, Value *&Step);
+
   /// Return true if RHS is known to be implied true by LHS.  Return false if
   /// RHS is known to be implied false by LHS.  Otherwise, return None if no
   /// implication can be made.
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index cfc3d6115866..8a1ebec4c727 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -17,402 +17,454 @@
 #define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) VEC,
 #endif
 
+#define FIXED(NL) ElementCount::getFixed(NL)
+#define SCALABLE(NL) ElementCount::getScalable(NL)
+
 #if !(defined(TLI_DEFINE_VECFUNC))
 #define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
-#endif 
+#endif
 
 #if defined(TLI_DEFINE_ACCELERATE_VECFUNCS)
 // Accelerate framework's Vector Functions
 
 // Floating-Point Arithmetic and Auxiliary Functions
-TLI_DEFINE_VECFUNC("ceilf", "vceilf", 4)
-TLI_DEFINE_VECFUNC("fabsf", "vfabsf", 4)
-TLI_DEFINE_VECFUNC("llvm.fabs.f32", "vfabsf", 4)
-TLI_DEFINE_VECFUNC("floorf", "vfloorf", 4)
-TLI_DEFINE_VECFUNC("sqrtf", "vsqrtf", 4)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "vsqrtf", 4)
+TLI_DEFINE_VECFUNC("ceilf", "vceilf", FIXED(4))
+TLI_DEFINE_VECFUNC("fabsf", "vfabsf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.fabs.f32", "vfabsf", FIXED(4))
+TLI_DEFINE_VECFUNC("floorf", "vfloorf", FIXED(4))
+TLI_DEFINE_VECFUNC("sqrtf", "vsqrtf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "vsqrtf", FIXED(4))
 
 // Exponential and Logarithmic Functions
-TLI_DEFINE_VECFUNC("expf", "vexpf", 4)
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "vexpf", 4)
-TLI_DEFINE_VECFUNC("expm1f", "vexpm1f", 4)
-TLI_DEFINE_VECFUNC("logf", "vlogf", 4)
-TLI_DEFINE_VECFUNC("llvm.log.f32", "vlogf", 4)
-TLI_DEFINE_VECFUNC("log1pf", "vlog1pf", 4)
-TLI_DEFINE_VECFUNC("log10f", "vlog10f", 4)
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "vlog10f", 4)
-TLI_DEFINE_VECFUNC("logbf", "vlogbf", 4)
+TLI_DEFINE_VECFUNC("expf", "vexpf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "vexpf", FIXED(4))
+TLI_DEFINE_VECFUNC("expm1f", "vexpm1f", FIXED(4))
+TLI_DEFINE_VECFUNC("logf", "vlogf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log.f32", "vlogf", FIXED(4))
+TLI_DEFINE_VECFUNC("log1pf", "vlog1pf", FIXED(4))
+TLI_DEFINE_VECFUNC("log10f", "vlog10f", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "vlog10f", FIXED(4))
+TLI_DEFINE_VECFUNC("logbf", "vlogbf", FIXED(4))
 
 // Trigonometric Functions
-TLI_DEFINE_VECFUNC("sinf", "vsinf", 4)
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", 4)
-TLI_DEFINE_VECFUNC("cosf", "vcosf", 4)
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", 4)
-TLI_DEFINE_VECFUNC("tanf", "vtanf", 4)
-TLI_DEFINE_VECFUNC("asinf", "vasinf", 4)
-TLI_DEFINE_VECFUNC("acosf", "vacosf", 4)
-TLI_DEFINE_VECFUNC("atanf", "vatanf", 4)
+TLI_DEFINE_VECFUNC("sinf", "vsinf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", FIXED(4))
+TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4))
+TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4))
+TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4))
+TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4))
+TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4))
 
 // Hyperbolic Functions
-TLI_DEFINE_VECFUNC("sinhf", "vsinhf", 4)
-TLI_DEFINE_VECFUNC("coshf", "vcoshf", 4)
-TLI_DEFINE_VECFUNC("tanhf", "vtanhf", 4)
-TLI_DEFINE_VECFUNC("asinhf", "vasinhf", 4)
-TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4)
-TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4)
+TLI_DEFINE_VECFUNC("sinhf", "vsinhf", FIXED(4))
+TLI_DEFINE_VECFUNC("coshf", "vcoshf", FIXED(4))
+TLI_DEFINE_VECFUNC("tanhf", "vtanhf", FIXED(4))
+TLI_DEFINE_VECFUNC("asinhf", "vasinhf", FIXED(4))
+TLI_DEFINE_VECFUNC("acoshf", "vacoshf", FIXED(4))
+TLI_DEFINE_VECFUNC("atanhf", "vatanhf", FIXED(4))
+
+#elif defined(TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS)
+// Darwin libsystem_m vector functions.
+
+// Exponential and Logarithmic Functions
+TLI_DEFINE_VECFUNC("exp", "_simd_exp_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_simd_exp_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("expf", "_simd_exp_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_simd_exp_f4", FIXED(4))
 
+// Trigonometric Functions
+TLI_DEFINE_VECFUNC("acos", "_simd_acos_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("acosf", "_simd_acos_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("asin", "_simd_asin_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("asinf", "_simd_asin_f4", FIXED(4))
+
+TLI_DEFINE_VECFUNC("atan", "_simd_atan_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4))
+
+TLI_DEFINE_VECFUNC("cos", "_simd_cos_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_simd_cos_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("cosf", "_simd_cos_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_simd_cos_f4", FIXED(4))
+
+TLI_DEFINE_VECFUNC("sin", "_simd_sin_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_simd_sin_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("sinf", "_simd_sin_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_simd_sin_f4", FIXED(4))
+
+// Floating-Point Arithmetic and Auxiliary Functions
+TLI_DEFINE_VECFUNC("cbrt", "_simd_cbrt_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("cbrtf", "_simd_cbrt_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("erf", "_simd_erf_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("erff", "_simd_erf_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("pow", "_simd_pow_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_simd_pow_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("powf", "_simd_pow_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_simd_pow_f4", FIXED(4))
+
+// Hyperbolic Functions
+TLI_DEFINE_VECFUNC("sinh", "_simd_sinh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("sinhf", "_simd_sinh_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("cosh", "_simd_cosh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("coshf", "_simd_cosh_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("tanh", "_simd_tanh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("tanhf", "_simd_tanh_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("asinh", "_simd_asinh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("asinhf", "_simd_asinh_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("acosh", "_simd_acosh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("acoshf", "_simd_acosh_f4", FIXED(4))
+TLI_DEFINE_VECFUNC("atanh", "_simd_atanh_d2", FIXED(2))
+TLI_DEFINE_VECFUNC("atanhf", "_simd_atanh_f4", FIXED(4))
 
 #elif defined(TLI_DEFINE_LIBMVEC_X86_VECFUNCS)
 // GLIBC Vector math Functions
 
-TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", 2)
-TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", 4)
+TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", FIXED(2))
+TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", FIXED(4))
 
-TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", 4)
-TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", 8)
+TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", FIXED(4))
+TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", 2)
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", FIXED(4))
 
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", 4)
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", 8)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", FIXED(8))
 
-TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", 2)
-TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", 4)
+TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", FIXED(2))
+TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", FIXED(4))
 
-TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", 4)
-TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", 8)
+TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", FIXED(4))
+TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", 2)
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", FIXED(4))
 
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", 4)
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", 8)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8))
 
-TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", 2)
-TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", 4)
+TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2))
+TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4))
 
-TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", 4)
-TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", 8)
+TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", FIXED(4))
+TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", FIXED(8))
 
-TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", 2)
-TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", 4)
+TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", FIXED(2))
+TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", FIXED(4))
 
-TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", 4)
-TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", 8)
+TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", FIXED(4))
+TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", 2)
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", FIXED(4))
 
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", 4)
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", 8)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", FIXED(8))
 
-TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", 2)
-TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", 4)
+TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", FIXED(2))
+TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", FIXED(4))
 
-TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", 4)
-TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", 8)
+TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", FIXED(4))
+TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", FIXED(8))
 
-TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", 2)
-TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", 4)
+TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", FIXED(2))
+TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", FIXED(4))
 
-TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", 4)
-TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", 8)
+TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", FIXED(4))
+TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", 2)
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", FIXED(4))
 
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", 4)
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", 8)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", FIXED(8))
 
-TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", 2)
-TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", 4)
+TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", FIXED(2))
+TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", FIXED(4))
 
-TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", 4)
-TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", 8)
+TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", FIXED(4))
+TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", FIXED(8))
 
-TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", 2)
-TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", 4)
+TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", FIXED(2))
+TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", FIXED(4))
 
-TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", 4)
-TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", 8)
+TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", FIXED(4))
+TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", 2)
-TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", FIXED(4))
 
-TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", 4)
-TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", 8)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", FIXED(8))
 
 #elif defined(TLI_DEFINE_MASSV_VECFUNCS)
 // IBM MASS library's vector Functions
 
 // Floating-Point Arithmetic and Auxiliary Functions
-TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2_massv", 2)
-TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4_massv", 4)
-TLI_DEFINE_VECFUNC("pow", "__powd2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2_massv", 2)
-TLI_DEFINE_VECFUNC("powf", "__powf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4_massv", 4)
-TLI_DEFINE_VECFUNC("sqrt", "__sqrtd2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__sqrtd2_massv", 2)
-TLI_DEFINE_VECFUNC("sqrtf", "__sqrtf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__sqrtf4_massv", 4)
+TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2", FIXED(2))
+TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4", FIXED(4))
+TLI_DEFINE_VECFUNC("pow", "__powd2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2", FIXED(2))
+TLI_DEFINE_VECFUNC("powf", "__powf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4", FIXED(4))
 
 // Exponential and Logarithmic Functions
-TLI_DEFINE_VECFUNC("exp", "__expd2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2_massv", 2)
-TLI_DEFINE_VECFUNC("expf", "__expf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4_massv", 4)
-TLI_DEFINE_VECFUNC("exp2", "__exp2d2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2_massv", 2)
-TLI_DEFINE_VECFUNC("exp2f", "__exp2f4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4_massv", 4)
-TLI_DEFINE_VECFUNC("expm1", "__expm1d2_massv", 2)
-TLI_DEFINE_VECFUNC("expm1f", "__expm1f4_massv", 4)
-TLI_DEFINE_VECFUNC("log", "__logd2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2_massv", 2)
-TLI_DEFINE_VECFUNC("logf", "__logf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4_massv", 4)
-TLI_DEFINE_VECFUNC("log1p", "__log1pd2_massv", 2)
-TLI_DEFINE_VECFUNC("log1pf", "__log1pf4_massv", 4)
-TLI_DEFINE_VECFUNC("log10", "__log10d2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2_massv", 2)
-TLI_DEFINE_VECFUNC("log10f", "__log10f4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4_massv", 4)
-TLI_DEFINE_VECFUNC("log2", "__log2d2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2_massv", 2)
-TLI_DEFINE_VECFUNC("log2f", "__log2f4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4_massv", 4)
+TLI_DEFINE_VECFUNC("exp", "__expd2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2", FIXED(2))
+TLI_DEFINE_VECFUNC("expf", "__expf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4", FIXED(4))
+TLI_DEFINE_VECFUNC("exp2", "__exp2d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2", FIXED(2))
+TLI_DEFINE_VECFUNC("exp2f", "__exp2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("expm1", "__expm1d2", FIXED(2))
+TLI_DEFINE_VECFUNC("expm1f", "__expm1f4", FIXED(4))
+TLI_DEFINE_VECFUNC("log", "__logd2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2", FIXED(2))
+TLI_DEFINE_VECFUNC("logf", "__logf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4", FIXED(4))
+TLI_DEFINE_VECFUNC("log1p", "__log1pd2", FIXED(2))
+TLI_DEFINE_VECFUNC("log1pf", "__log1pf4", FIXED(4))
+TLI_DEFINE_VECFUNC("log10", "__log10d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2", FIXED(2))
+TLI_DEFINE_VECFUNC("log10f", "__log10f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4", FIXED(4))
+TLI_DEFINE_VECFUNC("log2", "__log2d2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2", FIXED(2))
+TLI_DEFINE_VECFUNC("log2f", "__log2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4", FIXED(4))
 
 // Trigonometric Functions
-TLI_DEFINE_VECFUNC("sin", "__sind2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2_massv", 2)
-TLI_DEFINE_VECFUNC("sinf", "__sinf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4_massv", 4)
-TLI_DEFINE_VECFUNC("cos", "__cosd2_massv", 2)
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2_massv", 2)
-TLI_DEFINE_VECFUNC("cosf", "__cosf4_massv", 4)
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4_massv", 4)
-TLI_DEFINE_VECFUNC("tan", "__tand2_massv", 2)
-TLI_DEFINE_VECFUNC("tanf", "__tanf4_massv", 4)
-TLI_DEFINE_VECFUNC("asin", "__asind2_massv", 2)
-TLI_DEFINE_VECFUNC("asinf", "__asinf4_massv", 4)
-TLI_DEFINE_VECFUNC("acos", "__acosd2_massv", 2)
-TLI_DEFINE_VECFUNC("acosf", "__acosf4_massv", 4)
-TLI_DEFINE_VECFUNC("atan", "__atand2_massv", 2)
-TLI_DEFINE_VECFUNC("atanf", "__atanf4_massv", 4)
-TLI_DEFINE_VECFUNC("atan2", "__atan2d2_massv", 2)
-TLI_DEFINE_VECFUNC("atan2f", "__atan2f4_massv", 4)
+TLI_DEFINE_VECFUNC("sin", "__sind2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2", FIXED(2))
+TLI_DEFINE_VECFUNC("sinf", "__sinf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4", FIXED(4))
+TLI_DEFINE_VECFUNC("cos", "__cosd2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2", FIXED(2))
+TLI_DEFINE_VECFUNC("cosf", "__cosf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4", FIXED(4))
+TLI_DEFINE_VECFUNC("tan", "__tand2", FIXED(2))
+TLI_DEFINE_VECFUNC("tanf", "__tanf4", FIXED(4))
+TLI_DEFINE_VECFUNC("asin", "__asind2", FIXED(2))
+TLI_DEFINE_VECFUNC("asinf", "__asinf4", FIXED(4))
+TLI_DEFINE_VECFUNC("acos", "__acosd2", FIXED(2))
+TLI_DEFINE_VECFUNC("acosf", "__acosf4", FIXED(4))
+TLI_DEFINE_VECFUNC("atan", "__atand2", FIXED(2))
+TLI_DEFINE_VECFUNC("atanf", "__atanf4", FIXED(4))
+TLI_DEFINE_VECFUNC("atan2", "__atan2d2", FIXED(2))
+TLI_DEFINE_VECFUNC("atan2f", "__atan2f4", FIXED(4))
 
 // Hyperbolic Functions
-TLI_DEFINE_VECFUNC("sinh", "__sinhd2_massv", 2)
-TLI_DEFINE_VECFUNC("sinhf", "__sinhf4_massv", 4)
-TLI_DEFINE_VECFUNC("cosh", "__coshd2_massv", 2)
-TLI_DEFINE_VECFUNC("coshf", "__coshf4_massv", 4)
-TLI_DEFINE_VECFUNC("tanh", "__tanhd2_massv", 2)
-TLI_DEFINE_VECFUNC("tanhf", "__tanhf4_massv", 4)
-TLI_DEFINE_VECFUNC("asinh", "__asinhd2_massv", 2)
-TLI_DEFINE_VECFUNC("asinhf", "__asinhf4_massv", 4)
-TLI_DEFINE_VECFUNC("acosh", "__acoshd2_massv", 2)
-TLI_DEFINE_VECFUNC("acoshf", "__acoshf4_massv", 4)
-TLI_DEFINE_VECFUNC("atanh", "__atanhd2_massv", 2)
-TLI_DEFINE_VECFUNC("atanhf", "__atanhf4_massv", 4)
+TLI_DEFINE_VECFUNC("sinh", "__sinhd2", FIXED(2))
+TLI_DEFINE_VECFUNC("sinhf", "__sinhf4", FIXED(4))
+TLI_DEFINE_VECFUNC("cosh", "__coshd2", FIXED(2))
+TLI_DEFINE_VECFUNC("coshf", "__coshf4", FIXED(4))
+TLI_DEFINE_VECFUNC("tanh", "__tanhd2", FIXED(2))
+TLI_DEFINE_VECFUNC("tanhf", "__tanhf4", FIXED(4))
+TLI_DEFINE_VECFUNC("asinh", "__asinhd2", FIXED(2))
+TLI_DEFINE_VECFUNC("asinhf", "__asinhf4", FIXED(4))
+TLI_DEFINE_VECFUNC("acosh", "__acoshd2", FIXED(2))
+TLI_DEFINE_VECFUNC("acoshf", "__acoshf4", FIXED(4))
+TLI_DEFINE_VECFUNC("atanh", "__atanhd2", FIXED(2))
+TLI_DEFINE_VECFUNC("atanhf", "__atanhf4", FIXED(4))
 
 
 #elif defined(TLI_DEFINE_SVML_VECFUNCS)
 // Intel SVM library's Vector Functions
 
-TLI_DEFINE_VECFUNC("sin", "__svml_sin2", 2)
-TLI_DEFINE_VECFUNC("sin", "__svml_sin4", 4)
-TLI_DEFINE_VECFUNC("sin", "__svml_sin8", 8)
+TLI_DEFINE_VECFUNC("sin", "__svml_sin2", FIXED(2))
+TLI_DEFINE_VECFUNC("sin", "__svml_sin4", FIXED(4))
+TLI_DEFINE_VECFUNC("sin", "__svml_sin8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("sinf", "__svml_sinf4", 4)
-TLI_DEFINE_VECFUNC("sinf", "__svml_sinf8", 8)
-TLI_DEFINE_VECFUNC("sinf", "__svml_sinf16", 16)
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf4", FIXED(4))
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf8", FIXED(8))
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin2", 2)
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin4", 4)
-TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin8", 8)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf4", 4)
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf8", 8)
-TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf16", 16)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("cos", "__svml_cos2", 2)
-TLI_DEFINE_VECFUNC("cos", "__svml_cos4", 4)
-TLI_DEFINE_VECFUNC("cos", "__svml_cos8", 8)
+TLI_DEFINE_VECFUNC("cos", "__svml_cos2", FIXED(2))
+TLI_DEFINE_VECFUNC("cos", "__svml_cos4", FIXED(4))
+TLI_DEFINE_VECFUNC("cos", "__svml_cos8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("cosf", "__svml_cosf4", 4)
-TLI_DEFINE_VECFUNC("cosf", "__svml_cosf8", 8)
-TLI_DEFINE_VECFUNC("cosf", "__svml_cosf16", 16)
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf4", FIXED(4))
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf8", FIXED(8))
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos2", 2)
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos4", 4)
-TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos8", 8)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", 4)
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", 8)
-TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", 16)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("pow", "__svml_pow2", 2)
-TLI_DEFINE_VECFUNC("pow", "__svml_pow4", 4)
-TLI_DEFINE_VECFUNC("pow", "__svml_pow8", 8)
+TLI_DEFINE_VECFUNC("pow", "__svml_pow2", FIXED(2))
+TLI_DEFINE_VECFUNC("pow", "__svml_pow4", FIXED(4))
+TLI_DEFINE_VECFUNC("pow", "__svml_pow8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("powf", "__svml_powf4", 4)
-TLI_DEFINE_VECFUNC("powf", "__svml_powf8", 8)
-TLI_DEFINE_VECFUNC("powf", "__svml_powf16", 16)
+TLI_DEFINE_VECFUNC("powf", "__svml_powf4", FIXED(4))
+TLI_DEFINE_VECFUNC("powf", "__svml_powf8", FIXED(8))
+TLI_DEFINE_VECFUNC("powf", "__svml_powf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow2", 2)
-TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow4", 4)
-TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow8", 8)
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow2", FIXED(2))
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow4", FIXED(4))
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf4", 4)
-TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf8", 8)
-TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf16", 16)
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf4", FIXED(4))
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf8", FIXED(8))
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow2", 2)
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow4", 4)
-TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow8", 8)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf4", 4)
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf8", 8)
-TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf16", 16)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("exp", "__svml_exp2", 2)
-TLI_DEFINE_VECFUNC("exp", "__svml_exp4", 4)
-TLI_DEFINE_VECFUNC("exp", "__svml_exp8", 8)
+TLI_DEFINE_VECFUNC("exp", "__svml_exp2", FIXED(2))
+TLI_DEFINE_VECFUNC("exp", "__svml_exp4", FIXED(4))
+TLI_DEFINE_VECFUNC("exp", "__svml_exp8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("expf", "__svml_expf4", 4)
-TLI_DEFINE_VECFUNC("expf", "__svml_expf8", 8)
-TLI_DEFINE_VECFUNC("expf", "__svml_expf16", 16)
+TLI_DEFINE_VECFUNC("expf", "__svml_expf4", FIXED(4))
+TLI_DEFINE_VECFUNC("expf", "__svml_expf8", FIXED(8))
+TLI_DEFINE_VECFUNC("expf", "__svml_expf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp2", 2)
-TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp4", 4)
-TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp8", 8)
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp2", FIXED(2))
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp4", FIXED(4))
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf4", 4)
-TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf8", 8)
-TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf16", 16)
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf4", FIXED(4))
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf8", FIXED(8))
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp2", 2)
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp4", 4)
-TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp8", 8)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf4", 4)
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf8", 8)
-TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf16", 16)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("log", "__svml_log2", 2)
-TLI_DEFINE_VECFUNC("log", "__svml_log4", 4)
-TLI_DEFINE_VECFUNC("log", "__svml_log8", 8)
+TLI_DEFINE_VECFUNC("log", "__svml_log2", FIXED(2))
+TLI_DEFINE_VECFUNC("log", "__svml_log4", FIXED(4))
+TLI_DEFINE_VECFUNC("log", "__svml_log8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("logf", "__svml_logf4", 4)
-TLI_DEFINE_VECFUNC("logf", "__svml_logf8", 8)
-TLI_DEFINE_VECFUNC("logf", "__svml_logf16", 16)
+TLI_DEFINE_VECFUNC("logf", "__svml_logf4", FIXED(4))
+TLI_DEFINE_VECFUNC("logf", "__svml_logf8", FIXED(8))
+TLI_DEFINE_VECFUNC("logf", "__svml_logf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("__log_finite", "__svml_log2", 2)
-TLI_DEFINE_VECFUNC("__log_finite", "__svml_log4", 4)
-TLI_DEFINE_VECFUNC("__log_finite", "__svml_log8", 8)
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log2", FIXED(2))
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log4", FIXED(4))
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf4", 4)
-TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf8", 8)
-TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf16", 16)
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf4", FIXED(4))
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf8", FIXED(8))
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log2", 2)
-TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log4", 4)
-TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log8", 8)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log2", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4)
-TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
-TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("log2", "__svml_log22", 2)
-TLI_DEFINE_VECFUNC("log2", "__svml_log24", 4)
-TLI_DEFINE_VECFUNC("log2", "__svml_log28", 8)
+TLI_DEFINE_VECFUNC("log2", "__svml_log22", FIXED(2))
+TLI_DEFINE_VECFUNC("log2", "__svml_log24", FIXED(4))
+TLI_DEFINE_VECFUNC("log2", "__svml_log28", FIXED(8))
 
-TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", 4)
-TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", 8)
-TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", 16)
+TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", 2)
-TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", 4)
-TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", 8)
+TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", FIXED(2))
+TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", FIXED(4))
+TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", FIXED(8))
 
-TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", 4)
-TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", 8)
-TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", 16)
+TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", 2)
-TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", 4)
-TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", 8)
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4)
-TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8)
-TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("log10", "__svml_log102", 2)
-TLI_DEFINE_VECFUNC("log10", "__svml_log104", 4)
-TLI_DEFINE_VECFUNC("log10", "__svml_log108", 8)
+TLI_DEFINE_VECFUNC("log10", "__svml_log102", FIXED(2))
+TLI_DEFINE_VECFUNC("log10", "__svml_log104", FIXED(4))
+TLI_DEFINE_VECFUNC("log10", "__svml_log108", FIXED(8))
 
-TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", 4)
-TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", 8)
-TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", 16)
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", FIXED(4))
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", FIXED(8))
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", 2)
-TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", 4)
-TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", 8)
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", FIXED(2))
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", FIXED(4))
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", FIXED(8))
 
-TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", 4)
-TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", 8)
-TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", 16)
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", FIXED(4))
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", FIXED(8))
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", 2)
-TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", 4)
-TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", 8)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", 4)
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", 8)
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", 16)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", 2)
-TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", 4)
-TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", 8)
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", FIXED(2))
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", FIXED(4))
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", 4)
-TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", 8)
-TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", 16)
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", FIXED(4))
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", FIXED(8))
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", 2)
-TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", 4)
-TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", 8)
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", FIXED(2))
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", FIXED(4))
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", FIXED(8))
 
-TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4)
-TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8)
-TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16)
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", FIXED(4))
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", FIXED(8))
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
-TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
-TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)
+TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", FIXED(2))
+TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", FIXED(4))
+TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", FIXED(8))
 
-TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f4", 4)
-TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f8", 8)
-TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f16", 16)
+TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("exp2f", "__svml_exp2f16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp22", 2)
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp24", 4)
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp28", 8)
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp22", FIXED(2))
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp24", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__svml_exp28", FIXED(8))
 
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f4", 4)
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f8", 8)
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f16", 16)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__svml_exp2f16", FIXED(16))
 
-TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp22", 2)
-TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp24", 4)
-TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp28", 8)
+TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp22", FIXED(2))
+TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp24", FIXED(4))
+TLI_DEFINE_VECFUNC("__exp2_finite", "__svml_exp28", FIXED(8))
 
-TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f4", 4)
-TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f8", 8)
-TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16)
+TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f4", FIXED(4))
+TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f8", FIXED(8))
+TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16))
 
 #else
 #error "Must choose which vector library functions are to be defined."
@@ -420,6 +472,7 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16)
 
 #undef TLI_DEFINE_VECFUNC
 #undef TLI_DEFINE_ACCELERATE_VECFUNCS
+#undef TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS
 #undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS
 #undef TLI_DEFINE_MASSV_VECFUNCS
 #undef TLI_DEFINE_SVML_VECFUNCS
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 26cb0e456ed4..c890216c9e01 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -31,7 +31,7 @@ enum class VFParamKind {
   OMP_LinearPos,     // declare simd linear(i:c) uniform(c)
   OMP_LinearValPos,  // declare simd linear(val(i:c)) uniform(c)
   OMP_LinearRefPos,  // declare simd linear(ref(i:c)) uniform(c)
-  OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c
+  OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c)
   OMP_Uniform,       // declare simd uniform(i)
   GlobalPredicate,   // Global logical predicate that acts on all lanes
                      // of the input and output mask concurrently. For
@@ -80,13 +80,11 @@ struct VFParameter {
 /// represent vector functions. in particular, it is not attached to
 /// any target-specific ABI.
 struct VFShape {
-  unsigned VF;     // Vectorization factor.
-  bool IsScalable; // True if the function is a scalable function.
+  ElementCount VF;                        // Vectorization factor.
   SmallVector<VFParameter, 8> Parameters; // List of parameter information.
   // Comparison operator.
   bool operator==(const VFShape &Other) const {
-    return std::tie(VF, IsScalable, Parameters) ==
-           std::tie(Other.VF, Other.IsScalable, Other.Parameters);
+    return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters);
   }
 
   /// Update the parameter in position P.ParamPos to P.
@@ -115,7 +113,7 @@ struct VFShape {
       Parameters.push_back(
           VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate}));
 
-    return {EC.getKnownMinValue(), EC.isScalable(), Parameters};
+    return {EC, Parameters};
   }
   /// Sanity check on the Parameters in the VFShape.
   bool hasValidParameterList() const;
@@ -127,12 +125,6 @@ struct VFInfo {
   std::string ScalarName; /// Scalar Function Name.
   std::string VectorName; /// Vector Function Name associated to this VFInfo.
   VFISAKind ISA;          /// Instruction Set Architecture.
-
-  // Comparison operator.
-  bool operator==(const VFInfo &Other) const {
-    return std::tie(Shape, ScalarName, VectorName, ISA) ==
-           std::tie(Shape, Other.ScalarName, Other.VectorName, Other.ISA);
-  }
 };
 
 namespace VFABI {
@@ -186,12 +178,13 @@ Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName, const Module &M);
 /// <isa> = "_LLVM_"
 /// <mask> = "N". Note: TLI does not support masked interfaces.
 /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
-///          field of the `VecDesc` struct.
+///          field of the `VecDesc` struct. If the number of lanes is scalable
+///          then 'x' is printed instead.
 /// <vparams> = "v", as many as are the numArgs.
 /// <scalarname> = the name of the scalar function.
 /// <vectorname> = the name of the vector function.
 std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
-                                unsigned numArgs, unsigned VF);
+                                unsigned numArgs, ElementCount VF);
 
 /// Retrieve the `VFParamKind` from a string token.
 VFParamKind getVFParamKindFromString(const StringRef Token);
@@ -322,6 +315,11 @@ bool isTriviallyVectorizable(Intrinsic::ID ID);
 /// Identifies if the vector form of the intrinsic has a scalar operand.
 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
 
+/// Identifies if the vector form of the intrinsic has a scalar operand that has
+/// an overloaded type.
+bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID,
+                                            unsigned ScalarOpdIdx);
+
 /// Returns intrinsic ID for call.
 /// For the input call instruction it finds mapping intrinsic and returns
 /// its intrinsic ID, in case it does not found it return not_intrinsic.
@@ -601,10 +599,6 @@ public:
 
   bool isReverse() const { return Reverse; }
   uint32_t getFactor() const { return Factor; }
-  LLVM_ATTRIBUTE_DEPRECATED(uint32_t getAlignment() const,
-                            "Use getAlign instead.") {
-    return Alignment.value();
-  }
   Align getAlign() const { return Alignment; }
   uint32_t getNumMembers() const { return Members.size(); }