72 files changed, 6278 insertions, 4456 deletions
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index a05a01073049..5aaaaf3c396b 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -6,11 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
-/// \brief This file contains the simple types necessary to represent the
+/// This file contains the simple types necessary to represent the
 /// attributes associated with functions and their calls.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_IR_ATTRIBUTES_H
@@ -22,6 +22,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include <bitset>
 #include <cassert>
@@ -35,7 +36,6 @@ namespace llvm {
 class AttrBuilder;
 class AttributeImpl;
 class AttributeListImpl;
-class AttributeList;
 class AttributeSetNode;
 template<typename T> struct DenseMapInfo;
 class Function;
@@ -44,7 +44,7 @@ class Type;
 
 //===----------------------------------------------------------------------===//
 /// \class
-/// \brief Functions, function parameters, and return types can have attributes
+/// Functions, function parameters, and return types can have attributes
 /// to indicate how they should be treated by optimizations and code
 /// generation. This class represents one of those attributes. It's light-weight
 /// and should be passed around by-value.
@@ -71,7 +71,7 @@ public:
     // IR-Level Attributes
     None,                  ///< No attributes have been set
     #define GET_ATTR_ENUM
-    #include "llvm/IR/Attributes.gen"
+    #include "llvm/IR/Attributes.inc"
     EndAttrKinds           ///< Sentinal value useful for loops
   };
 
@@ -87,12 +87,12 @@ public:
   // Attribute Construction
   //===--------------------------------------------------------------------===//
 
-  /// \brief Return a uniquified Attribute object.
+  /// Return a uniquified Attribute object.
   static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0);
   static Attribute get(LLVMContext &Context, StringRef Kind,
                        StringRef Val = StringRef());
 
-  /// \brief Return a uniquified Attribute object that has the specific
+  /// Return a uniquified Attribute object that has the specific
   /// alignment set.
   static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align);
   static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align);
@@ -108,51 +108,51 @@ public:
   // Attribute Accessors
   //===--------------------------------------------------------------------===//
 
-  /// \brief Return true if the attribute is an Attribute::AttrKind type.
+  /// Return true if the attribute is an Attribute::AttrKind type.
   bool isEnumAttribute() const;
 
-  /// \brief Return true if the attribute is an integer attribute.
+  /// Return true if the attribute is an integer attribute.
   bool isIntAttribute() const;
 
-  /// \brief Return true if the attribute is a string (target-dependent)
+  /// Return true if the attribute is a string (target-dependent)
   /// attribute.
   bool isStringAttribute() const;
 
-  /// \brief Return true if the attribute is present.
+  /// Return true if the attribute is present.
   bool hasAttribute(AttrKind Val) const;
 
-  /// \brief Return true if the target-dependent attribute is present.
+  /// Return true if the target-dependent attribute is present.
   bool hasAttribute(StringRef Val) const;
 
-  /// \brief Return the attribute's kind as an enum (Attribute::AttrKind). This
+  /// Return the attribute's kind as an enum (Attribute::AttrKind). This
   /// requires the attribute to be an enum or integer attribute.
   Attribute::AttrKind getKindAsEnum() const;
 
-  /// \brief Return the attribute's value as an integer. This requires that the
+  /// Return the attribute's value as an integer. This requires that the
   /// attribute be an integer attribute.
   uint64_t getValueAsInt() const;
 
-  /// \brief Return the attribute's kind as a string. This requires the
+  /// Return the attribute's kind as a string. This requires the
   /// attribute to be a string attribute.
   StringRef getKindAsString() const;
 
-  /// \brief Return the attribute's value as a string. This requires the
+  /// Return the attribute's value as a string. This requires the
   /// attribute to be a string attribute.
   StringRef getValueAsString() const;
 
-  /// \brief Returns the alignment field of an attribute as a byte alignment
+  /// Returns the alignment field of an attribute as a byte alignment
   /// value.
   unsigned getAlignment() const;
 
-  /// \brief Returns the stack alignment field of an attribute as a byte
+  /// Returns the stack alignment field of an attribute as a byte
   /// alignment value.
   unsigned getStackAlignment() const;
 
-  /// \brief Returns the number of dereferenceable bytes from the
+  /// Returns the number of dereferenceable bytes from the
   /// dereferenceable attribute.
   uint64_t getDereferenceableBytes() const;
 
-  /// \brief Returns the number of dereferenceable_or_null bytes from the
+  /// Returns the number of dereferenceable_or_null bytes from the
   /// dereferenceable_or_null attribute.
   uint64_t getDereferenceableOrNullBytes() const;
 
@@ -160,27 +160,27 @@ public:
   /// if not known).
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
 
-  /// \brief The Attribute is converted to a string of equivalent mnemonic. This
+  /// The Attribute is converted to a string of equivalent mnemonic. This
   /// is, presumably, for writing out the mnemonics for the assembly writer.
   std::string getAsString(bool InAttrGrp = false) const;
 
-  /// \brief Equality and non-equality operators.
+  /// Equality and non-equality operators.
   bool operator==(Attribute A) const { return pImpl == A.pImpl; }
   bool operator!=(Attribute A) const { return pImpl != A.pImpl; }
 
-  /// \brief Less-than operator. Useful for sorting the attributes list.
+  /// Less-than operator. Useful for sorting the attributes list.
   bool operator<(Attribute A) const;
 
   void Profile(FoldingSetNodeID &ID) const {
     ID.AddPointer(pImpl);
   }
 
-  /// \brief Return a raw pointer that uniquely identifies this attribute.
+  /// Return a raw pointer that uniquely identifies this attribute.
   void *getRawPointer() const {
     return pImpl;
   }
 
-  /// \brief Get an attribute from a raw pointer created by getRawPointer.
+  /// Get an attribute from a raw pointer created by getRawPointer.
   static Attribute fromRawPointer(void *RawPtr) {
     return Attribute(reinterpret_cast<AttributeImpl*>(RawPtr));
   }
@@ -203,6 +203,9 @@ inline Attribute unwrap(LLVMAttributeRef Attr) {
 /// copy. Adding and removing enum attributes is intended to be fast, but adding
 /// and removing string or integer attributes involves a FoldingSet lookup.
 class AttributeSet {
+  friend AttributeListImpl;
+  template <typename Ty> friend struct DenseMapInfo;
+
   // TODO: Extract AvailableAttrs from AttributeSetNode and store them here.
   // This will allow an efficient implementation of addAttribute and
   // removeAttribute for enum attrs.
@@ -210,9 +213,6 @@ class AttributeSet {
   /// Private implementation pointer.
   AttributeSetNode *SetNode = nullptr;
 
-  friend AttributeListImpl;
-  template <typename Ty> friend struct DenseMapInfo;
-
 private:
   explicit AttributeSet(AttributeSetNode *ASN) : SetNode(ASN) {}
 
@@ -290,16 +290,16 @@ public:
 
 //===----------------------------------------------------------------------===//
 /// \class
-/// \brief Provide DenseMapInfo for AttributeSet.
+/// Provide DenseMapInfo for AttributeSet.
 template <> struct DenseMapInfo<AttributeSet> {
-  static inline AttributeSet getEmptyKey() {
-    uintptr_t Val = static_cast<uintptr_t>(-1);
+  static AttributeSet getEmptyKey() {
+    auto Val = static_cast<uintptr_t>(-1);
     Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
     return AttributeSet(reinterpret_cast<AttributeSetNode *>(Val));
   }
 
-  static inline AttributeSet getTombstoneKey() {
-    uintptr_t Val = static_cast<uintptr_t>(-2);
+  static AttributeSet getTombstoneKey() {
+    auto Val = static_cast<uintptr_t>(-2);
     Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
     return AttributeSet(reinterpret_cast<AttributeSetNode *>(Val));
   }
@@ -314,7 +314,7 @@ template <> struct DenseMapInfo<AttributeSet> {
 
 //===----------------------------------------------------------------------===//
 /// \class
-/// \brief This class holds the attributes for a function, its return value, and
+/// This class holds the attributes for a function, its return value, and
 /// its parameters. You access the attributes for each of them via an index into
 /// the AttributeList object. The function attributes are at index
 /// `AttributeList::FunctionIndex', the return value is at index
@@ -333,21 +333,20 @@ private:
   friend class AttributeListImpl;
   friend class AttributeSet;
   friend class AttributeSetNode;
-
   template <typename Ty> friend struct DenseMapInfo;
 
-  /// \brief The attributes that we are managing. This can be null to represent
+  /// The attributes that we are managing. This can be null to represent
   /// the empty attributes list.
   AttributeListImpl *pImpl = nullptr;
 
 public:
-  /// \brief Create an AttributeList with the specified parameters in it.
+  /// Create an AttributeList with the specified parameters in it.
   static AttributeList get(LLVMContext &C,
                            ArrayRef<std::pair<unsigned, Attribute>> Attrs);
   static AttributeList get(LLVMContext &C,
                            ArrayRef<std::pair<unsigned, AttributeSet>> Attrs);
 
-  /// \brief Create an AttributeList from attribute sets for a function, its
+  /// Create an AttributeList from attribute sets for a function, its
   /// return value, and all of its arguments.
   static AttributeList get(LLVMContext &C, AttributeSet FnAttrs,
                            AttributeSet RetAttrs,
@@ -365,7 +364,7 @@ public:
   // AttributeList Construction and Mutation
   //===--------------------------------------------------------------------===//
 
-  /// \brief Return an AttributeList with the specified parameters in it.
+  /// Return an AttributeList with the specified parameters in it.
   static AttributeList get(LLVMContext &C, ArrayRef<AttributeList> Attrs);
   static AttributeList get(LLVMContext &C, unsigned Index,
                            ArrayRef<Attribute::AttrKind> Kinds);
@@ -374,12 +373,12 @@ public:
   static AttributeList get(LLVMContext &C, unsigned Index,
                            const AttrBuilder &B);
 
-  /// \brief Add an attribute to the attribute set at the given index.
+  /// Add an attribute to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
   AttributeList addAttribute(LLVMContext &C, unsigned Index,
                              Attribute::AttrKind Kind) const;
 
-  /// \brief Add an attribute to the attribute set at the given index.
+  /// Add an attribute to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
   AttributeList addAttribute(LLVMContext &C, unsigned Index, StringRef Kind,
                              StringRef Value = StringRef()) const;
@@ -388,7 +387,7 @@ public:
   /// Returns a new list because attribute lists are immutable.
   AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute A) const;
 
-  /// \brief Add attributes to the attribute set at the given index.
+  /// Add attributes to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
   AttributeList addAttributes(LLVMContext &C, unsigned Index,
                               const AttrBuilder &B) const;
@@ -420,70 +419,70 @@ public:
     return addAttributes(C, ArgNo + FirstArgIndex, B);
   }
 
-  /// \brief Remove the specified attribute at the specified index from this
+  /// Remove the specified attribute at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   AttributeList removeAttribute(LLVMContext &C, unsigned Index,
                                 Attribute::AttrKind Kind) const;
 
-  /// \brief Remove the specified attribute at the specified index from this
+  /// Remove the specified attribute at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   AttributeList removeAttribute(LLVMContext &C, unsigned Index,
                                 StringRef Kind) const;
 
-  /// \brief Remove the specified attributes at the specified index from this
+  /// Remove the specified attributes at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   AttributeList removeAttributes(LLVMContext &C, unsigned Index,
                                  const AttrBuilder &AttrsToRemove) const;
 
-  /// \brief Remove all attributes at the specified index from this
+  /// Remove all attributes at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   AttributeList removeAttributes(LLVMContext &C, unsigned Index) const;
 
-  /// \brief Remove the specified attribute at the specified arg index from this
+  /// Remove the specified attribute at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo,
                                      Attribute::AttrKind Kind) const {
     return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
   }
 
-  /// \brief Remove the specified attribute at the specified arg index from this
+  /// Remove the specified attribute at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo,
                                      StringRef Kind) const {
     return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
   }
 
-  /// \brief Remove the specified attribute at the specified arg index from this
+  /// Remove the specified attribute at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo,
                                       const AttrBuilder &AttrsToRemove) const {
     return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove);
   }
 
-  /// \brief Remove all attributes at the specified arg index from this
+  /// Remove all attributes at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
   AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo) const {
     return removeAttributes(C, ArgNo + FirstArgIndex);
   }
 
-  /// \Brief Add the dereferenceable attribute to the attribute set at the given
+  /// \brief Add the dereferenceable attribute to the attribute set at the given
   /// index. Returns a new list because attribute lists are immutable.
   AttributeList addDereferenceableAttr(LLVMContext &C, unsigned Index,
                                        uint64_t Bytes) const;
 
-  /// \Brief Add the dereferenceable attribute to the attribute set at the given
+  /// \brief Add the dereferenceable attribute to the attribute set at the given
   /// arg index. Returns a new list because attribute lists are immutable.
   AttributeList addDereferenceableParamAttr(LLVMContext &C, unsigned ArgNo,
                                             uint64_t Bytes) const {
     return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes);
   }
 
-  /// \brief Add the dereferenceable_or_null attribute to the attribute set at
+  /// Add the dereferenceable_or_null attribute to the attribute set at
   /// the given index. Returns a new list because attribute lists are immutable.
   AttributeList addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index,
                                              uint64_t Bytes) const;
 
-  /// \brief Add the dereferenceable_or_null attribute to the attribute set at
+  /// Add the dereferenceable_or_null attribute to the attribute set at
   /// the given arg index. Returns a new list because attribute lists are
   /// immutable.
   AttributeList addDereferenceableOrNullParamAttr(LLVMContext &C,
@@ -510,102 +509,102 @@ public:
   // AttributeList Accessors
   //===--------------------------------------------------------------------===//
 
-  /// \brief Retrieve the LLVM context.
+  /// Retrieve the LLVM context.
   LLVMContext &getContext() const;
 
-  /// \brief The attributes for the specified index are returned.
+  /// The attributes for the specified index are returned.
   AttributeSet getAttributes(unsigned Index) const;
 
-  /// \brief The attributes for the argument or parameter at the given index are
+  /// The attributes for the argument or parameter at the given index are
   /// returned.
   AttributeSet getParamAttributes(unsigned ArgNo) const;
 
-  /// \brief The attributes for the ret value are returned.
+  /// The attributes for the ret value are returned.
   AttributeSet getRetAttributes() const;
 
-  /// \brief The function attributes are returned.
+  /// The function attributes are returned.
   AttributeSet getFnAttributes() const;
 
-  /// \brief Return true if the attribute exists at the given index.
+  /// Return true if the attribute exists at the given index.
   bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const;
 
-  /// \brief Return true if the attribute exists at the given index.
+  /// Return true if the attribute exists at the given index.
   bool hasAttribute(unsigned Index, StringRef Kind) const;
 
-  /// \brief Return true if attribute exists at the given index.
+  /// Return true if attribute exists at the given index.
   bool hasAttributes(unsigned Index) const;
 
-  /// \brief Return true if the attribute exists for the given argument
+  /// Return true if the attribute exists for the given argument
   bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
     return hasAttribute(ArgNo + FirstArgIndex, Kind);
   }
 
-  /// \brief Return true if the attribute exists for the given argument
+  /// Return true if the attribute exists for the given argument
   bool hasParamAttr(unsigned ArgNo, StringRef Kind) const {
     return hasAttribute(ArgNo + FirstArgIndex, Kind);
   }
 
-  /// \brief Return true if attributes exists for the given argument
+  /// Return true if attributes exists for the given argument
   bool hasParamAttrs(unsigned ArgNo) const {
     return hasAttributes(ArgNo + FirstArgIndex);
   }
 
-  /// \brief Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
+  /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
   /// may be faster.
   bool hasFnAttribute(Attribute::AttrKind Kind) const;
 
-  /// \brief Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
+  /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
   /// may be faster.
   bool hasFnAttribute(StringRef Kind) const;
 
-  /// \brief Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
+  /// Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
   bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
 
-  /// \brief Return true if the specified attribute is set for at least one
+  /// Return true if the specified attribute is set for at least one
   /// parameter or for the return value. If Index is not nullptr, the index
   /// of a parameter with the specified attribute is provided.
   bool hasAttrSomewhere(Attribute::AttrKind Kind,
                         unsigned *Index = nullptr) const;
 
-  /// \brief Return the attribute object that exists at the given index.
+  /// Return the attribute object that exists at the given index.
   Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const;
 
-  /// \brief Return the attribute object that exists at the given index.
+  /// Return the attribute object that exists at the given index.
   Attribute getAttribute(unsigned Index, StringRef Kind) const;
 
-  /// \brief Return the attribute object that exists at the arg index.
+  /// Return the attribute object that exists at the arg index.
   Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
     return getAttribute(ArgNo + FirstArgIndex, Kind);
   }
 
-  /// \brief Return the attribute object that exists at the given index.
+  /// Return the attribute object that exists at the given index.
   Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
     return getAttribute(ArgNo + FirstArgIndex, Kind);
   }
 
-  /// \brief Return the alignment of the return value.
+  /// Return the alignment of the return value.
   unsigned getRetAlignment() const;
 
-  /// \brief Return the alignment for the specified function parameter.
+  /// Return the alignment for the specified function parameter.
   unsigned getParamAlignment(unsigned ArgNo) const;
 
-  /// \brief Get the stack alignment.
+  /// Get the stack alignment.
   unsigned getStackAlignment(unsigned Index) const;
 
-  /// \brief Get the number of dereferenceable bytes (or zero if unknown).
+  /// Get the number of dereferenceable bytes (or zero if unknown).
   uint64_t getDereferenceableBytes(unsigned Index) const;
 
-  /// \brief Get the number of dereferenceable bytes (or zero if unknown) of an
+  /// Get the number of dereferenceable bytes (or zero if unknown) of an
   /// arg.
   uint64_t getParamDereferenceableBytes(unsigned ArgNo) const {
     return getDereferenceableBytes(ArgNo + FirstArgIndex);
   }
 
-  /// \brief Get the number of dereferenceable_or_null bytes (or zero if
+  /// Get the number of dereferenceable_or_null bytes (or zero if
   /// unknown).
   uint64_t getDereferenceableOrNullBytes(unsigned Index) const;
 
-  /// \brief Get the number of dereferenceable_or_null bytes (or zero if
+  /// Get the number of dereferenceable_or_null bytes (or zero if
   /// unknown) of an arg.
   uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const {
     return getDereferenceableOrNullBytes(ArgNo + FirstArgIndex);
@@ -615,7 +614,7 @@ public:
   std::pair<unsigned, Optional<unsigned>>
   getAllocSizeArgs(unsigned Index) const;
 
-  /// \brief Return the attributes at the index as a string.
+  /// Return the attributes at the index as a string.
   std::string getAsString(unsigned Index, bool InAttrGrp = false) const;
 
   //===--------------------------------------------------------------------===//
@@ -637,12 +636,12 @@ public:
   bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; }
   bool operator!=(const AttributeList &RHS) const { return pImpl != RHS.pImpl; }
 
-  /// \brief Return a raw pointer that uniquely identifies this attribute list.
+  /// Return a raw pointer that uniquely identifies this attribute list.
   void *getRawPointer() const {
     return pImpl;
   }
 
-  /// \brief Return true if there are no attributes.
+  /// Return true if there are no attributes.
   bool isEmpty() const { return pImpl == nullptr; }
 
   void dump() const;
@@ -650,16 +649,16 @@ public:
 
 //===----------------------------------------------------------------------===//
 /// \class
-/// \brief Provide DenseMapInfo for AttributeList.
+/// Provide DenseMapInfo for AttributeList.
 template <> struct DenseMapInfo<AttributeList> {
-  static inline AttributeList getEmptyKey() {
-    uintptr_t Val = static_cast<uintptr_t>(-1);
+  static AttributeList getEmptyKey() {
+    auto Val = static_cast<uintptr_t>(-1);
     Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
     return AttributeList(reinterpret_cast<AttributeListImpl *>(Val));
   }
 
-  static inline AttributeList getTombstoneKey() {
-    uintptr_t Val = static_cast<uintptr_t>(-2);
+  static AttributeList getTombstoneKey() {
+    auto Val = static_cast<uintptr_t>(-2);
     Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
     return AttributeList(reinterpret_cast<AttributeListImpl *>(Val));
   }
@@ -676,7 +675,7 @@ template <> struct DenseMapInfo<AttributeList> {
 
 //===----------------------------------------------------------------------===//
 /// \class
-/// \brief This class is used in conjunction with the Attribute::get method to
+/// This class is used in conjunction with the Attribute::get method to
 /// create an Attribute object. The object itself is uniquified. The Builder's
 /// value, however, is not. So this can be used as a quick way to test for
 /// equality, presence of attributes, etc.
@@ -691,73 +690,75 @@ class AttrBuilder {
 
 public:
   AttrBuilder() = default;
+
   AttrBuilder(const Attribute &A) {
     addAttribute(A);
   }
+
   AttrBuilder(AttributeList AS, unsigned Idx);
   AttrBuilder(AttributeSet AS);
 
   void clear();
 
-  /// \brief Add an attribute to the builder.
+  /// Add an attribute to the builder.
   AttrBuilder &addAttribute(Attribute::AttrKind Val);
 
-  /// \brief Add the Attribute object to the builder.
+  /// Add the Attribute object to the builder.
   AttrBuilder &addAttribute(Attribute A);
 
-  /// \brief Add the target-dependent attribute to the builder.
+  /// Add the target-dependent attribute to the builder.
   AttrBuilder &addAttribute(StringRef A, StringRef V = StringRef());
 
-  /// \brief Remove an attribute from the builder.
+  /// Remove an attribute from the builder.
   AttrBuilder &removeAttribute(Attribute::AttrKind Val);
 
-  /// \brief Remove the attributes from the builder.
+  /// Remove the attributes from the builder.
   AttrBuilder &removeAttributes(AttributeList A, uint64_t WithoutIndex);
 
-  /// \brief Remove the target-dependent attribute to the builder.
+  /// Remove the target-dependent attribute to the builder.
   AttrBuilder &removeAttribute(StringRef A);
 
-  /// \brief Add the attributes from the builder.
+  /// Add the attributes from the builder.
   AttrBuilder &merge(const AttrBuilder &B);
 
-  /// \brief Remove the attributes from the builder.
+  /// Remove the attributes from the builder.
   AttrBuilder &remove(const AttrBuilder &B);
 
-  /// \brief Return true if the builder has any attribute that's in the
+  /// Return true if the builder has any attribute that's in the
   /// specified builder.
   bool overlaps(const AttrBuilder &B) const;
 
-  /// \brief Return true if the builder has the specified attribute.
+  /// Return true if the builder has the specified attribute.
   bool contains(Attribute::AttrKind A) const {
     assert((unsigned)A < Attribute::EndAttrKinds && "Attribute out of range!");
     return Attrs[A];
   }
 
-  /// \brief Return true if the builder has the specified target-dependent
+  /// Return true if the builder has the specified target-dependent
   /// attribute.
   bool contains(StringRef A) const;
 
-  /// \brief Return true if the builder has IR-level attributes.
+  /// Return true if the builder has IR-level attributes.
   bool hasAttributes() const;
 
-  /// \brief Return true if the builder has any attribute that's in the
+  /// Return true if the builder has any attribute that's in the
   /// specified attribute.
   bool hasAttributes(AttributeList A, uint64_t Index) const;
 
-  /// \brief Return true if the builder has an alignment attribute.
+  /// Return true if the builder has an alignment attribute.
   bool hasAlignmentAttr() const;
 
-  /// \brief Retrieve the alignment attribute, if it exists.
+  /// Retrieve the alignment attribute, if it exists.
   uint64_t getAlignment() const { return Alignment; }
 
-  /// \brief Retrieve the stack alignment attribute, if it exists.
+  /// Retrieve the stack alignment attribute, if it exists.
   uint64_t getStackAlignment() const { return StackAlignment; }
 
-  /// \brief Retrieve the number of dereferenceable bytes, if the
+  /// Retrieve the number of dereferenceable bytes, if the
   /// dereferenceable attribute exists (zero is returned otherwise).
   uint64_t getDereferenceableBytes() const { return DerefBytes; }
 
-  /// \brief Retrieve the number of dereferenceable_or_null bytes, if the
+  /// Retrieve the number of dereferenceable_or_null bytes, if the
   /// dereferenceable_or_null attribute exists (zero is returned otherwise).
   uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
 
@@ -765,19 +766,19 @@ public:
   /// doesn't exist, pair(0, 0) is returned.
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
 
-  /// \brief This turns an int alignment (which must be a power of 2) into the
+  /// This turns an int alignment (which must be a power of 2) into the
   /// form used internally in Attribute.
   AttrBuilder &addAlignmentAttr(unsigned Align);
 
-  /// \brief This turns an int stack alignment (which must be a power of 2) into
+  /// This turns an int stack alignment (which must be a power of 2) into
   /// the form used internally in Attribute.
   AttrBuilder &addStackAlignmentAttr(unsigned Align);
 
-  /// \brief This turns the number of dereferenceable bytes into the form used
+  /// This turns the number of dereferenceable bytes into the form used
   /// internally in Attribute.
   AttrBuilder &addDereferenceableAttr(uint64_t Bytes);
 
-  /// \brief This turns the number of dereferenceable_or_null bytes into the
+  /// This turns the number of dereferenceable_or_null bytes into the
   /// form used internally in Attribute.
   AttrBuilder &addDereferenceableOrNullAttr(uint64_t Bytes);
 
@@ -789,7 +790,7 @@ public:
   /// Attribute.getIntValue().
   AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr);
 
-  /// \brief Return true if the builder contains no target-independent
+  /// Return true if the builder contains no target-independent
   /// attributes.
   bool empty() const { return Attrs.none(); }
 
@@ -800,18 +801,19 @@ public:
   using td_range = iterator_range<td_iterator>;
   using td_const_range = iterator_range<td_const_iterator>;
 
-  td_iterator td_begin()             { return TargetDepAttrs.begin(); }
-  td_iterator td_end()               { return TargetDepAttrs.end(); }
+  td_iterator td_begin() { return TargetDepAttrs.begin(); }
+  td_iterator td_end() { return TargetDepAttrs.end(); }
 
   td_const_iterator td_begin() const { return TargetDepAttrs.begin(); }
-  td_const_iterator td_end() const   { return TargetDepAttrs.end(); }
+  td_const_iterator td_end() const { return TargetDepAttrs.end(); }
 
   td_range td_attrs() { return td_range(td_begin(), td_end()); }
+
   td_const_range td_attrs() const {
     return td_const_range(td_begin(), td_end());
   }
 
-  bool td_empty() const              { return TargetDepAttrs.empty(); }
+  bool td_empty() const { return TargetDepAttrs.empty(); }
 
   bool operator==(const AttrBuilder &B);
   bool operator!=(const AttrBuilder &B) {
@@ -821,14 +823,14 @@ public:
 
 namespace AttributeFuncs {
 
-/// \brief Which attributes cannot be applied to a type.
+/// Which attributes cannot be applied to a type.
 AttrBuilder typeIncompatible(Type *Ty);
 
 /// \returns Return true if the two functions have compatible target-independent
 /// attributes for inlining purposes.
 bool areInlineCompatible(const Function &Caller, const Function &Callee);
 
-/// \brief Merge caller's and callee's attributes.
+/// Merge caller's and callee's attributes.
 void mergeAttributesForInlining(Function &Caller, const Function &Callee);
 
 } // end namespace AttributeFuncs
diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td
index ebe5c1985875..1019f867aab0 100644
--- a/include/llvm/IR/Attributes.td
+++ b/include/llvm/IR/Attributes.td
@@ -106,9 +106,15 @@ def NoRedZone : EnumAttr<"noredzone">;
 /// Mark the function as not returning.
 def NoReturn : EnumAttr<"noreturn">;
 
+/// Disable Indirect Branch Tracking.
+def NoCfCheck : EnumAttr<"nocf_check">;
+
 /// Function doesn't unwind stack.
 def NoUnwind : EnumAttr<"nounwind">;
 
+/// Select optimizations for best fuzzing signal.
+def OptForFuzzing : EnumAttr<"optforfuzzing">;
+
 /// opt_size.
 def OptimizeForSize : EnumAttr<"optsize">;
 
@@ -130,6 +136,9 @@ def ReturnsTwice : EnumAttr<"returns_twice">;
 /// Safe Stack protection.
 def SafeStack : EnumAttr<"safestack">;
 
+/// Shadow Call Stack protection.
+def ShadowCallStack : EnumAttr<"shadowcallstack">;
+
 /// Sign extended before/after call.
 def SExt : EnumAttr<"signext">;
 
@@ -205,6 +214,7 @@ def : CompatRule<"isEqual<SanitizeThreadAttr>">;
 def : CompatRule<"isEqual<SanitizeMemoryAttr>">;
 def : CompatRule<"isEqual<SanitizeHWAddressAttr>">;
 def : CompatRule<"isEqual<SafeStackAttr>">;
+def : CompatRule<"isEqual<ShadowCallStackAttr>">;
 
 class MergeRule<string F> {
   // The name of the function called to merge the attributes of the caller and
@@ -225,3 +235,4 @@ def : MergeRule<"setOR<ProfileSampleAccurateAttr>">;
 def : MergeRule<"adjustCallerSSPLevel">;
 def : MergeRule<"adjustCallerStackProbes">;
 def : MergeRule<"adjustCallerStackProbeSize">;
+def : MergeRule<"adjustMinLegalVectorWidth">;
diff --git a/include/llvm/IR/AutoUpgrade.h b/include/llvm/IR/AutoUpgrade.h
index 3f406f0cf196..8cf574c6a138 100644
--- a/include/llvm/IR/AutoUpgrade.h
+++ b/include/llvm/IR/AutoUpgrade.h
@@ -37,6 +37,10 @@ namespace llvm {
   /// intrinsic function with a call to the specified new function.
   void UpgradeIntrinsicCall(CallInst *CI, Function *NewFn);
 
+  // This upgrades the comment for objc retain release markers in inline asm
+  // calls
+  void UpgradeInlineAsmString(std::string *AsmStr);
+
   /// This is an auto-upgrade hook for any old intrinsic function syntaxes
   /// which need to have both the function updated as well as all calls updated
   /// to the new function. This should only be run in a post-processing fashion
@@ -51,6 +55,10 @@ namespace llvm {
   /// module is modified.
   bool UpgradeModuleFlags(Module &M);
 
+  /// This checks for objc retain release marker which should be upgraded. It
+  /// returns true if module is modified.
+  bool UpgradeRetainReleaseMarker(Module &M);
+
   void UpgradeSectionAttributes(Module &M);
 
   /// If the given TBAA tag uses the scalar TBAA format, create a new node
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
index 77cfc9776df0..1ee19975af75 100644
--- a/include/llvm/IR/BasicBlock.h
+++ b/include/llvm/IR/BasicBlock.h
@@ -41,7 +41,7 @@ class PHINode;
 class TerminatorInst;
 class ValueSymbolTable;
 
-/// \brief LLVM Basic Block Representation
+/// LLVM Basic Block Representation
 ///
 /// This represents a single basic block in LLVM. A basic block is simply a
 /// container of instructions that execute sequentially. Basic blocks are Values
@@ -70,7 +70,7 @@ private:
 
   void setParent(Function *parent);
 
-  /// \brief Constructor.
+  /// Constructor.
   ///
   /// If the function parameter is specified, the basic block is automatically
   /// inserted at either the end of the function (if InsertBefore is null), or
@@ -84,7 +84,7 @@ public:
   BasicBlock &operator=(const BasicBlock &) = delete;
   ~BasicBlock();
 
-  /// \brief Get the context in which this basic block lives.
+  /// Get the context in which this basic block lives.
   LLVMContext &getContext() const;
 
   /// Instruction iterators...
@@ -93,7 +93,7 @@ public:
   using reverse_iterator = InstListType::reverse_iterator;
   using const_reverse_iterator = InstListType::const_reverse_iterator;
 
-  /// \brief Creates a new BasicBlock.
+  /// Creates a new BasicBlock.
   ///
   /// If the Parent parameter is specified, the basic block is automatically
   /// inserted at either the end of the function (if InsertBefore is 0), or
@@ -104,12 +104,12 @@ public:
     return new BasicBlock(Context, Name, Parent, InsertBefore);
   }
 
-  /// \brief Return the enclosing method, or null if none.
+  /// Return the enclosing method, or null if none.
   const Function *getParent() const { return Parent; }
         Function *getParent()       { return Parent; }
 
-  /// \brief Return the module owning the function this basic block belongs to,
-  /// or nullptr it the function does not have a module.
+  /// Return the module owning the function this basic block belongs to, or
+  /// nullptr if the function does not have a module.
   ///
   /// Note: this is undefined behavior if the block does not have a parent.
   const Module *getModule() const;
@@ -118,34 +118,34 @@ public:
                             static_cast<const BasicBlock *>(this)->getModule());
   }
 
-  /// \brief Returns the terminator instruction if the block is well formed or
-  /// null if the block is not well formed.
+  /// Returns the terminator instruction if the block is well formed or null
+  /// if the block is not well formed.
   const TerminatorInst *getTerminator() const LLVM_READONLY;
   TerminatorInst *getTerminator() {
     return const_cast<TerminatorInst *>(
                         static_cast<const BasicBlock *>(this)->getTerminator());
   }
 
-  /// \brief Returns the call instruction calling @llvm.experimental.deoptimize
-  /// prior to the terminating return instruction of this basic block, if such a
-  /// call is present.  Otherwise, returns null.
+  /// Returns the call instruction calling \@llvm.experimental.deoptimize
+  /// prior to the terminating return instruction of this basic block, if such
+  /// a call is present.  Otherwise, returns null.
   const CallInst *getTerminatingDeoptimizeCall() const;
   CallInst *getTerminatingDeoptimizeCall() {
     return const_cast<CallInst *>(
          static_cast<const BasicBlock *>(this)->getTerminatingDeoptimizeCall());
   }
 
-  /// \brief Returns the call instruction marked 'musttail' prior to the
-  /// terminating return instruction of this basic block, if such a call is
-  /// present.  Otherwise, returns null.
+  /// Returns the call instruction marked 'musttail' prior to the terminating
+  /// return instruction of this basic block, if such a call is present.
+  /// Otherwise, returns null.
   const CallInst *getTerminatingMustTailCall() const;
   CallInst *getTerminatingMustTailCall() {
     return const_cast<CallInst *>(
            static_cast<const BasicBlock *>(this)->getTerminatingMustTailCall());
   }
 
-  /// \brief Returns a pointer to the first instruction in this block that is
-  /// not a PHINode instruction.
+  /// Returns a pointer to the first instruction in this block that is not a
+  /// PHINode instruction.
   ///
   /// When adding instructions to the beginning of the basic block, they should
   /// be added before the returned value, not before the first instruction,
@@ -156,23 +156,23 @@ public:
                        static_cast<const BasicBlock *>(this)->getFirstNonPHI());
   }
 
-  /// \brief Returns a pointer to the first instruction in this block that is not
-  /// a PHINode or a debug intrinsic.
+  /// Returns a pointer to the first instruction in this block that is not a
+  /// PHINode or a debug intrinsic.
   const Instruction* getFirstNonPHIOrDbg() const;
   Instruction* getFirstNonPHIOrDbg() {
     return const_cast<Instruction *>(
                   static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbg());
   }
 
-  /// \brief Returns a pointer to the first instruction in this block that is not
-  /// a PHINode, a debug intrinsic, or a lifetime intrinsic.
+  /// Returns a pointer to the first instruction in this block that is not a
+  /// PHINode, a debug intrinsic, or a lifetime intrinsic.
   const Instruction* getFirstNonPHIOrDbgOrLifetime() const;
   Instruction* getFirstNonPHIOrDbgOrLifetime() {
     return const_cast<Instruction *>(
         static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbgOrLifetime());
   }
 
-  /// \brief Returns an iterator to the first instruction in this block that is
+  /// Returns an iterator to the first instruction in this block that is
   /// suitable for inserting a non-PHI instruction.
   ///
   /// In particular, it skips all PHIs and LandingPad instructions.
@@ -182,23 +182,35 @@ public:
                                           ->getFirstInsertionPt().getNonConst();
   }
 
-  /// \brief Unlink 'this' from the containing function, but do not delete it.
+  /// Return a const iterator range over the instructions in the block, skipping
+  /// any debug instructions.
+  iterator_range<filter_iterator<BasicBlock::const_iterator,
+                                 std::function<bool(const Instruction &)>>>
+  instructionsWithoutDebug() const;
+
+  /// Return an iterator range over the instructions in the block, skipping any
+  /// debug instructions.
+  iterator_range<filter_iterator<BasicBlock::iterator,
+                                 std::function<bool(Instruction &)>>>
+  instructionsWithoutDebug();
+
+  /// Unlink 'this' from the containing function, but do not delete it.
   void removeFromParent();
 
-  /// \brief Unlink 'this' from the containing function and delete it.
+  /// Unlink 'this' from the containing function and delete it.
   ///
   // \returns an iterator pointing to the element after the erased one.
   SymbolTableList<BasicBlock>::iterator eraseFromParent();
 
-  /// \brief Unlink this basic block from its current function and insert it
-  /// into the function that \p MovePos lives in, right before \p MovePos.
+  /// Unlink this basic block from its current function and insert it into
+  /// the function that \p MovePos lives in, right before \p MovePos.
   void moveBefore(BasicBlock *MovePos);
 
-  /// \brief Unlink this basic block from its current function and insert it
+  /// Unlink this basic block from its current function and insert it
   /// right after \p MovePos in the function \p MovePos lives in.
   void moveAfter(BasicBlock *MovePos);
 
-  /// \brief Insert unlinked basic block into a function.
+  /// Insert unlinked basic block into a function.
   ///
   /// Inserts an unlinked basic block into \c Parent.  If \c InsertBefore is
   /// provided, inserts before that basic block, otherwise inserts at the end.
@@ -206,7 +218,7 @@ public:
   /// \pre \a getParent() is \c nullptr.
   void insertInto(Function *Parent, BasicBlock *InsertBefore = nullptr);
 
-  /// \brief Return the predecessor of this block if it has a single predecessor
+  /// Return the predecessor of this block if it has a single predecessor
   /// block. Otherwise return a null pointer.
   const BasicBlock *getSinglePredecessor() const;
   BasicBlock *getSinglePredecessor() {
@@ -214,7 +226,7 @@ public:
                  static_cast<const BasicBlock *>(this)->getSinglePredecessor());
   }
 
-  /// \brief Return the predecessor of this block if it has a unique predecessor
+  /// Return the predecessor of this block if it has a unique predecessor
   /// block. Otherwise return a null pointer.
   ///
   /// Note that unique predecessor doesn't mean single edge, there can be
@@ -226,7 +238,7 @@ public:
                  static_cast<const BasicBlock *>(this)->getUniquePredecessor());
   }
 
-  /// \brief Return the successor of this block if it has a single successor.
+  /// Return the successor of this block if it has a single successor.
   /// Otherwise return a null pointer.
   ///
   /// This method is analogous to getSinglePredecessor above.
@@ -236,7 +248,7 @@ public:
                    static_cast<const BasicBlock *>(this)->getSingleSuccessor());
   }
 
-  /// \brief Return the successor of this block if it has a unique successor.
+  /// Return the successor of this block if it has a unique successor.
   /// Otherwise return a null pointer.
   ///
   /// This method is analogous to getUniquePredecessor above.
@@ -310,28 +322,28 @@ public:
   }
   iterator_range<phi_iterator> phis();
 
-  /// \brief Return the underlying instruction list container.
+  /// Return the underlying instruction list container.
   ///
   /// Currently you need to access the underlying instruction list container
   /// directly if you want to modify it.
   const InstListType &getInstList() const { return InstList; }
         InstListType &getInstList()       { return InstList; }
 
-  /// \brief Returns a pointer to a member of the instruction list.
+  /// Returns a pointer to a member of the instruction list.
   static InstListType BasicBlock::*getSublistAccess(Instruction*) {
     return &BasicBlock::InstList;
   }
 
-  /// \brief Returns a pointer to the symbol table if one exists.
+  /// Returns a pointer to the symbol table if one exists.
   ValueSymbolTable *getValueSymbolTable();
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast.
+  /// Methods for support type inquiry through isa, cast, and dyn_cast.
   static bool classof(const Value *V) {
     return V->getValueID() == Value::BasicBlockVal;
   }
 
-  /// \brief Cause all subinstructions to "let go" of all the references that
-  /// said subinstructions are maintaining.
+  /// Cause all subinstructions to "let go" of all the references that said
+  /// subinstructions are maintaining.
   ///
   /// This allows one to 'delete' a whole class at a time, even though there may
   /// be circular references... first all references are dropped, and all use
@@ -340,8 +352,8 @@ public:
   /// except operator delete.
   void dropAllReferences();
 
-  /// \brief Notify the BasicBlock that the predecessor \p Pred is no longer
-  /// able to reach it.
+  /// Notify the BasicBlock that the predecessor \p Pred is no longer able to
+  /// reach it.
   ///
   /// This is actually not used to update the Predecessor list, but is actually
   /// used to update the PHI nodes that reside in the block.  Note that this
@@ -350,8 +362,7 @@ public:
 
   bool canSplitPredecessors() const;
 
-  /// \brief Split the basic block into two basic blocks at the specified
-  /// instruction.
+  /// Split the basic block into two basic blocks at the specified instruction.
   ///
   /// Note that all instructions BEFORE the specified iterator stay as part of
   /// the original basic block, an unconditional branch is added to the original
@@ -371,37 +382,37 @@ public:
     return splitBasicBlock(I->getIterator(), BBName);
   }
 
-  /// \brief Returns true if there are any uses of this basic block other than
+  /// Returns true if there are any uses of this basic block other than
   /// direct branches, switches, etc. to it.
   bool hasAddressTaken() const { return getSubclassDataFromValue() != 0; }
 
-  /// \brief Update all phi nodes in this basic block's successors to refer to
-  /// basic block \p New instead of to it.
+  /// Update all phi nodes in this basic block's successors to refer to basic
+  /// block \p New instead of to it.
   void replaceSuccessorsPhiUsesWith(BasicBlock *New);
 
-  /// \brief Return true if this basic block is an exception handling block.
+  /// Return true if this basic block is an exception handling block.
   bool isEHPad() const { return getFirstNonPHI()->isEHPad(); }
 
-  /// \brief Return true if this basic block is a landing pad.
+  /// Return true if this basic block is a landing pad.
   ///
   /// Being a ``landing pad'' means that the basic block is the destination of
   /// the 'unwind' edge of an invoke instruction.
   bool isLandingPad() const;
 
-  /// \brief Return the landingpad instruction associated with the landing pad.
+  /// Return the landingpad instruction associated with the landing pad.
   const LandingPadInst *getLandingPadInst() const;
   LandingPadInst *getLandingPadInst() {
     return const_cast<LandingPadInst *>(
                     static_cast<const BasicBlock *>(this)->getLandingPadInst());
   }
 
-  /// \brief Return true if it is legal to hoist instructions into this block.
+  /// Return true if it is legal to hoist instructions into this block.
   bool isLegalToHoistInto() const;
 
   Optional<uint64_t> getIrrLoopHeaderWeight() const;
 
 private:
-  /// \brief Increment the internal refcount of the number of BlockAddresses
+  /// Increment the internal refcount of the number of BlockAddresses
   /// referencing this BasicBlock by \p Amt.
   ///
   /// This is almost always 0, sometimes one possibly, but almost never 2, and
@@ -412,8 +423,8 @@ private:
            "Refcount wrap-around");
   }
 
-  /// \brief Shadow Value::setValueSubclassData with a private forwarding method
-  /// so that any future subclasses cannot accidentally use it.
+  /// Shadow Value::setValueSubclassData with a private forwarding method so
+  /// that any future subclasses cannot accidentally use it.
   void setValueSubclassData(unsigned short D) {
     Value::setValueSubclassData(D);
   }
@@ -422,6 +433,10 @@ private:
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock, LLVMBasicBlockRef)
 
+/// Advance \p It while it points to a debug instruction and return the result.
+/// This assumes that \p It is not at the end of a block.
+BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It);
+
 } // end namespace llvm
 
 #endif // LLVM_IR_BASICBLOCK_H
diff --git a/include/llvm/IR/CFG.h b/include/llvm/IR/CFG.h
index e259e42e1ce4..f4988e7f1fec 100644
--- a/include/llvm/IR/CFG.h
+++ b/include/llvm/IR/CFG.h
@@ -107,6 +107,9 @@ inline const_pred_iterator pred_end(const BasicBlock *BB) {
 inline bool pred_empty(const BasicBlock *BB) {
   return pred_begin(BB) == pred_end(BB);
 }
+inline unsigned pred_size(const BasicBlock *BB) {
+  return std::distance(pred_begin(BB), pred_end(BB));
+}
 inline pred_range predecessors(BasicBlock *BB) {
   return pred_range(pred_begin(BB), pred_end(BB));
 }
@@ -140,6 +143,9 @@ inline succ_const_iterator succ_end(const BasicBlock *BB) {
 inline bool succ_empty(const BasicBlock *BB) {
   return succ_begin(BB) == succ_end(BB);
 }
+inline unsigned succ_size(const BasicBlock *BB) {
+  return std::distance(succ_begin(BB), succ_end(BB));
+}
 inline succ_range successors(BasicBlock *BB) {
   return succ_range(succ_begin(BB), succ_end(BB));
 }
diff --git a/include/llvm/IR/CMakeLists.txt b/include/llvm/IR/CMakeLists.txt
index cf75d5800b74..830f3750c185 100644
--- a/include/llvm/IR/CMakeLists.txt
+++ b/include/llvm/IR/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(LLVM_TARGET_DEFINITIONS Attributes.td)
-tablegen(LLVM Attributes.gen -gen-attrs)
+tablegen(LLVM Attributes.inc -gen-attrs)
 
 set(LLVM_TARGET_DEFINITIONS Intrinsics.td)
-tablegen(LLVM Intrinsics.gen -gen-intrinsic)
+tablegen(LLVM IntrinsicEnums.inc -gen-intrinsic-enums)
+tablegen(LLVM IntrinsicImpl.inc -gen-intrinsic-impl)
 add_public_tablegen_target(intrinsics_gen)
diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h
index 5b10da8f2aee..2162ccb982b0 100644
--- a/include/llvm/IR/CallSite.h
+++ b/include/llvm/IR/CallSite.h
@@ -637,7 +637,8 @@ public:
     if (hasRetAttr(Attribute::NonNull))
       return true;
     else if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 &&
-             getType()->getPointerAddressSpace() == 0)
+             !NullPointerIsDefined(getCaller(),
+                                   getType()->getPointerAddressSpace()))
       return true;
 
     return false;
diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h
index 84fe836adc35..b9c02d7ed424 100644
--- a/include/llvm/IR/CallingConv.h
+++ b/include/llvm/IR/CallingConv.h
@@ -26,7 +26,7 @@ namespace CallingConv {
 
   /// A set of enums which specify the assigned numeric values for known llvm
   /// calling conventions.
-  /// @brief LLVM Calling Convention Representation
+  /// LLVM Calling Convention Representation
   enum {
     /// C - The default llvm calling convention, compatible with C.  This
     /// convention is the only calling convention that supports varargs calls.
@@ -139,11 +139,11 @@ namespace CallingConv {
     /// Intel_OCL_BI - Calling conventions for Intel OpenCL built-ins
     Intel_OCL_BI = 77,
 
-    /// \brief The C convention as specified in the x86-64 supplement to the
+    /// The C convention as specified in the x86-64 supplement to the
     /// System V ABI, used on most non-Windows systems.
     X86_64_SysV = 78,
 
-    /// \brief The C convention as implemented on Windows/x86-64 and
+    /// The C convention as implemented on Windows/x86-64 and
     /// AArch64. This convention differs from the more common
     /// \c X86_64_SysV convention in a number of ways, most notably in
     /// that XMM registers used to pass arguments are shadowed by GPRs,
@@ -153,17 +153,17 @@ namespace CallingConv {
     /// registers to variadic functions.
     Win64 = 79,
 
-    /// \brief MSVC calling convention that passes vectors and vector aggregates
+    /// MSVC calling convention that passes vectors and vector aggregates
     /// in SSE registers.
     X86_VectorCall = 80,
 
-    /// \brief Calling convention used by HipHop Virtual Machine (HHVM) to
+    /// Calling convention used by HipHop Virtual Machine (HHVM) to
     /// perform calls to and from translation cache, and for calling PHP
     /// functions.
     /// HHVM calling convention supports tail/sibling call elimination.
     HHVM = 81,
 
-    /// \brief HHVM calling convention for invoking C/C++ helpers.
+    /// HHVM calling convention for invoking C/C++ helpers.
     HHVM_C = 82,
 
     /// X86_INTR - x86 hardware interrupt context. Callee may take one or two
diff --git a/include/llvm/IR/Comdat.h b/include/llvm/IR/Comdat.h
index fa87093ca50a..555121e928f7 100644
--- a/include/llvm/IR/Comdat.h
+++ b/include/llvm/IR/Comdat.h
@@ -16,6 +16,9 @@
 #ifndef LLVM_IR_COMDAT_H
 #define LLVM_IR_COMDAT_H
 
+#include "llvm-c/Types.h"
+#include "llvm/Support/CBindingWrapping.h"
+
 namespace llvm {
 
 class raw_ostream;
@@ -55,6 +58,9 @@ private:
   SelectionKind SK = Any;
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Comdat, LLVMComdatRef)
+
 inline raw_ostream &operator<<(raw_ostream &OS, const Comdat &C) {
   C.print(OS);
   return OS;
diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h
index 0c94b58a3112..5fdf0ea00f00 100644
--- a/include/llvm/IR/Constant.h
+++ b/include/llvm/IR/Constant.h
@@ -38,7 +38,7 @@ class APInt;
 /// structurally equivalent constants will always have the same address.
 /// Constants are created on demand as needed and never deleted: thus clients
 /// don't have to worry about the lifetime of the objects.
-/// @brief LLVM Constant Representation
+/// LLVM Constant Representation
 class Constant : public User {
 protected:
   Constant(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps)
@@ -71,6 +71,26 @@ public:
   /// Return true if the value is the smallest signed value.
   bool isMinSignedValue() const;
 
+  /// Return true if this is a finite and non-zero floating-point scalar
+  /// constant or a vector constant with all finite and non-zero elements.
+  bool isFiniteNonZeroFP() const;
+
+  /// Return true if this is a normal (as opposed to denormal) floating-point
+  /// scalar constant or a vector constant with all normal elements.
+  bool isNormalFP() const;
+
+  /// Return true if this scalar has an exact multiplicative inverse or this
+  /// vector has an exact multiplicative inverse for each element in the vector.
+  bool hasExactInverseFP() const;
+
+  /// Return true if this is a floating-point NaN constant or a vector
+  /// floating-point constant with all NaN elements.
+  bool isNaN() const;
+
+  /// Return true if this is a vector constant that includes any undefined
+  /// elements.
+  bool containsUndefElement() const;
+
   /// Return true if evaluation of this constant could trap. This is true for
   /// things like constant expressions that could divide by zero.
   bool canTrap() const;
@@ -137,7 +157,7 @@ public:
 
   /// @returns the value for an integer or vector of integer constant of the
   /// given type that has all its bits set to true.
-  /// @brief Get the all ones value
+  /// Get the all ones value
   static Constant *getAllOnesValue(Type* Ty);
 
   /// Return the value for an integer or pointer constant, or a vector thereof,
diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h
index 6889e2658244..1adda3269abc 100644
--- a/include/llvm/IR/ConstantRange.h
+++ b/include/llvm/IR/ConstantRange.h
@@ -54,7 +54,7 @@ public:
   /// Initialize a range to hold the single specified value.
   ConstantRange(APInt Value);
 
-  /// @brief Initialize a range of values explicitly. This will assert out if
+  /// Initialize a range of values explicitly. This will assert out if
   /// Lower==Upper and Lower != Min or Max value for its type. It will also
   /// assert out if the two APInt's are not the same bit width.
   ConstantRange(APInt Lower, APInt Upper);
diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h
index 0094fd54992a..f9d5ebc560c7 100644
--- a/include/llvm/IR/Constants.h
+++ b/include/llvm/IR/Constants.h
@@ -80,7 +80,7 @@ public:
 //===----------------------------------------------------------------------===//
 /// This is the shared class of boolean and integer constants. This class
 /// represents both boolean and integral constants.
-/// @brief Class for constant integers.
+/// Class for constant integers.
 class ConstantInt final : public ConstantData {
   friend class Constant;
 
@@ -107,7 +107,7 @@ public:
   /// to fit the type, unless isSigned is true, in which case the value will
   /// be interpreted as a 64-bit signed integer and sign-extended to fit
   /// the type.
-  /// @brief Get a ConstantInt for a specific value.
+  /// Get a ConstantInt for a specific value.
   static ConstantInt *get(IntegerType *Ty, uint64_t V,
                           bool isSigned = false);
 
@@ -115,7 +115,7 @@ public:
   /// value V will be canonicalized to a an unsigned APInt. Accessing it with
   /// either getSExtValue() or getZExtValue() will yield a correctly sized and
   /// signed value for the type Ty.
-  /// @brief Get a ConstantInt for a specific signed value.
+  /// Get a ConstantInt for a specific signed value.
   static ConstantInt *getSigned(IntegerType *Ty, int64_t V);
   static Constant *getSigned(Type *Ty, int64_t V);
 
@@ -134,7 +134,7 @@ public:
 
   /// Return the constant as an APInt value reference. This allows clients to
   /// obtain a full-precision copy of the value.
-  /// @brief Return the constant's value.
+  /// Return the constant's value.
   inline const APInt &getValue() const {
     return Val;
   }
@@ -145,7 +145,7 @@ public:
   /// Return the constant as a 64-bit unsigned integer value after it
   /// has been zero extended as appropriate for the type of this constant. Note
   /// that this method can assert if the value does not fit in 64 bits.
-  /// @brief Return the zero extended value.
+  /// Return the zero extended value.
   inline uint64_t getZExtValue() const {
     return Val.getZExtValue();
   }
@@ -153,7 +153,7 @@ public:
   /// Return the constant as a 64-bit integer value after it has been sign
   /// extended as appropriate for the type of this constant. Note that
   /// this method can assert if the value does not fit in 64 bits.
-  /// @brief Return the sign extended value.
+  /// Return the sign extended value.
   inline int64_t getSExtValue() const {
     return Val.getSExtValue();
   }
@@ -161,7 +161,7 @@ public:
   /// A helper method that can be used to determine if the constant contained
   /// within is equal to a constant.  This only works for very small values,
   /// because this is all that can be represented with all types.
-  /// @brief Determine if this constant's value is same as an unsigned char.
+  /// Determine if this constant's value is same as an unsigned char.
   bool equalsInt(uint64_t V) const {
     return Val == V;
   }
@@ -181,7 +181,7 @@ public:
   /// the signed version avoids callers having to convert a signed quantity
   /// to the appropriate unsigned type before calling the method.
   /// @returns true if V is a valid value for type Ty
-  /// @brief Determine if the value is in range for the given type.
+  /// Determine if the value is in range for the given type.
   static bool isValueValidForType(Type *Ty, uint64_t V);
   static bool isValueValidForType(Type *Ty, int64_t V);
 
@@ -197,7 +197,7 @@ public:
   /// This is just a convenience method to make client code smaller for a
   /// common case. It also correctly performs the comparison without the
   /// potential for an assertion from getZExtValue().
-  /// @brief Determine if the value is one.
+  /// Determine if the value is one.
   bool isOne() const {
     return Val.isOneValue();
   }
@@ -205,7 +205,7 @@ public:
   /// This function will return true iff every bit in this constant is set
   /// to true.
   /// @returns true iff this constant's bits are all set to true.
-  /// @brief Determine if the value is all ones.
+  /// Determine if the value is all ones.
   bool isMinusOne() const {
     return Val.isAllOnesValue();
   }
@@ -214,7 +214,7 @@ public:
   /// value that may be represented by the constant's type.
   /// @returns true iff this is the largest value that may be represented
   /// by this type.
-  /// @brief Determine if the value is maximal.
+  /// Determine if the value is maximal.
   bool isMaxValue(bool isSigned) const {
     if (isSigned)
       return Val.isMaxSignedValue();
@@ -226,7 +226,7 @@ public:
   /// value that may be represented by this constant's type.
   /// @returns true if this is the smallest value that may be represented by
   /// this type.
-  /// @brief Determine if the value is minimal.
+  /// Determine if the value is minimal.
   bool isMinValue(bool isSigned) const {
     if (isSigned)
       return Val.isMinSignedValue();
@@ -238,7 +238,7 @@ public:
   /// active bits bigger than 64 bits or a value greater than the given uint64_t
   /// value.
   /// @returns true iff this constant is greater or equal to the given number.
-  /// @brief Determine if the value is greater or equal to the given number.
+  /// Determine if the value is greater or equal to the given number.
   bool uge(uint64_t Num) const {
     return Val.uge(Num);
   }
@@ -247,12 +247,12 @@ public:
   /// return it, otherwise return the limit value.  This causes the value
   /// to saturate to the limit.
   /// @returns the min of the value of the constant and the specified value
-  /// @brief Get the constant's value with a saturation limit
+  /// Get the constant's value with a saturation limit
   uint64_t getLimitedValue(uint64_t Limit = ~0ULL) const {
     return Val.getLimitedValue(Limit);
   }
 
-  /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+  /// Methods to support type inquiry through isa, cast, and dyn_cast.
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantIntVal;
   }
@@ -283,6 +283,11 @@ public:
   /// for simple constant values like 2.0/1.0 etc, that are known-valid both as
   /// host double and as the target format.
   static Constant *get(Type* Ty, double V);
+
+  /// If Ty is a vector type, return a Constant with a splat of the given
+  /// value. Otherwise return a ConstantFP for the given value.
+  static Constant *get(Type *Ty, const APFloat &V);
+
   static Constant *get(Type* Ty, StringRef Str);
   static ConstantFP *get(LLVMContext &Context, const APFloat &V);
   static Constant *getNaN(Type *Ty, bool Negative = false, unsigned type = 0);
@@ -687,15 +692,33 @@ class ConstantDataArray final : public ConstantDataSequential {
 public:
   ConstantDataArray(const ConstantDataArray &) = delete;
 
-  /// get() constructors - Return a constant with array type with an element
+  /// get() constructor - Return a constant with array type with an element
   /// count and element type matching the ArrayRef passed in.  Note that this
   /// can return a ConstantAggregateZero object.
-  static Constant *get(LLVMContext &Context, ArrayRef<uint8_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<uint16_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<uint32_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<uint64_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<float> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<double> Elts);
+  template <typename ElementTy>
+  static Constant *get(LLVMContext &Context, ArrayRef<ElementTy> Elts) {
+    const char *Data = reinterpret_cast<const char *>(Elts.data());
+    return getRaw(StringRef(Data, Elts.size() * sizeof(ElementTy)), Elts.size(),
+                  Type::getScalarTy<ElementTy>(Context));
+  }
+
+  /// get() constructor - ArrayTy needs to be compatible with
+  /// ArrayRef<ElementTy>. Calls get(LLVMContext, ArrayRef<ElementTy>).
+  template <typename ArrayTy>
+  static Constant *get(LLVMContext &Context, ArrayTy &Elts) {
+    return ConstantDataArray::get(Context, makeArrayRef(Elts));
+  }
+
+  /// get() constructor - Return a constant with array type with an element
+  /// count and element type matching the NumElements and ElementTy parameters
+  /// passed in. Note that this can return a ConstantAggregateZero object.
+  /// ElementTy needs to be one of i8/i16/i32/i64/float/double. Data is the
+  /// buffer containing the elements. Be careful to make sure Data uses the
+  /// right endianness, the buffer will be used as-is.
+  static Constant *getRaw(StringRef Data, uint64_t NumElements, Type *ElementTy) {
+    Type *Ty = ArrayType::get(ElementTy, NumElements);
+    return getImpl(Data, Ty);
+  }
 
   /// getFP() constructors - Return a constant with array type with an element
   /// count and element type of float with precision matching the number of
@@ -802,7 +825,7 @@ public:
   /// Return the ConstantTokenNone.
   static ConstantTokenNone *get(LLVMContext &Context);
 
-  /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+  /// Methods to support type inquiry through isa, cast, and dyn_cast.
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantTokenNoneVal;
   }
@@ -995,10 +1018,15 @@ public:
     return getLShr(C1, C2, true);
   }
 
-  /// Return the identity for the given binary operation,
-  /// i.e. a constant C such that X op C = X and C op X = X for every X.  It
-  /// returns null if the operator doesn't have an identity.
-  static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty);
+  /// Return the identity constant for a binary opcode.
+  /// The identity constant C is defined as X op C = X and C op X = X for every
+  /// X when the binary operation is commutative. If the binop is not
+  /// commutative, callers can acquire the operand 1 identity constant by
+  /// setting AllowRHSConstant to true. For example, any shift has a zero
+  /// identity constant for operand 1: X shift 0 = X.
+  /// Return nullptr if the operator does not have an identity constant.
+  static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty,
+                                    bool AllowRHSConstant = false);
 
   /// Return the absorbing element for the given binary
   /// operation, i.e. a constant C such that X op C = C and C op X = C for
@@ -1009,7 +1037,7 @@ public:
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
 
-  /// \brief Convenience function for getting a Cast operation.
+  /// Convenience function for getting a Cast operation.
   ///
   /// \param ops The opcode for the conversion
   /// \param C  The constant to be converted
@@ -1018,62 +1046,62 @@ public:
   static Constant *getCast(unsigned ops, Constant *C, Type *Ty,
                            bool OnlyIfReduced = false);
 
-  // @brief Create a ZExt or BitCast cast constant expression
+  // Create a ZExt or BitCast cast constant expression
   static Constant *getZExtOrBitCast(
     Constant *C,   ///< The constant to zext or bitcast
     Type *Ty ///< The type to zext or bitcast C to
   );
 
-  // @brief Create a SExt or BitCast cast constant expression
+  // Create a SExt or BitCast cast constant expression
   static Constant *getSExtOrBitCast(
     Constant *C,   ///< The constant to sext or bitcast
     Type *Ty ///< The type to sext or bitcast C to
   );
 
-  // @brief Create a Trunc or BitCast cast constant expression
+  // Create a Trunc or BitCast cast constant expression
   static Constant *getTruncOrBitCast(
     Constant *C,   ///< The constant to trunc or bitcast
     Type *Ty ///< The type to trunc or bitcast C to
   );
 
-  /// @brief Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant
+  /// Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant
   /// expression.
   static Constant *getPointerCast(
     Constant *C,   ///< The pointer value to be casted (operand 0)
     Type *Ty ///< The type to which cast should be made
   );
 
-  /// @brief Create a BitCast or AddrSpaceCast for a pointer type depending on
+  /// Create a BitCast or AddrSpaceCast for a pointer type depending on
   /// the address space.
   static Constant *getPointerBitCastOrAddrSpaceCast(
     Constant *C,   ///< The constant to addrspacecast or bitcast
     Type *Ty ///< The type to bitcast or addrspacecast C to
   );
 
-  /// @brief Create a ZExt, Bitcast or Trunc for integer -> integer casts
+  /// Create a ZExt, Bitcast or Trunc for integer -> integer casts
   static Constant *getIntegerCast(
     Constant *C,    ///< The integer constant to be casted
     Type *Ty, ///< The integer type to cast to
     bool isSigned   ///< Whether C should be treated as signed or not
   );
 
-  /// @brief Create a FPExt, Bitcast or FPTrunc for fp -> fp casts
+  /// Create a FPExt, Bitcast or FPTrunc for fp -> fp casts
   static Constant *getFPCast(
     Constant *C,    ///< The integer constant to be casted
     Type *Ty ///< The integer type to cast to
   );
 
-  /// @brief Return true if this is a convert constant expression
+  /// Return true if this is a convert constant expression
   bool isCast() const;
 
-  /// @brief Return true if this is a compare constant expression
+  /// Return true if this is a compare constant expression
   bool isCompare() const;
 
-  /// @brief Return true if this is an insertvalue or extractvalue expression,
+  /// Return true if this is an insertvalue or extractvalue expression,
   /// and the getIndices() method may be used.
   bool hasIndices() const;
 
-  /// @brief Return true if this is a getelementptr expression and all
+  /// Return true if this is a getelementptr expression and all
   /// the index operands are compile-time known integers within the
   /// corresponding notional static array extents. Note that this is
   /// not equivalant to, a subset of, or a superset of the "inbounds"
@@ -1093,7 +1121,7 @@ public:
   static Constant *get(unsigned Opcode, Constant *C1, Constant *C2,
                        unsigned Flags = 0, Type *OnlyIfReducedTy = nullptr);
 
-  /// \brief Return an ICmp or FCmp comparison operator constant expression.
+  /// Return an ICmp or FCmp comparison operator constant expression.
   ///
   /// \param OnlyIfReduced see \a getWithOperands() docs.
   static Constant *getCompare(unsigned short pred, Constant *C1, Constant *C2,
diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h
index 3c2074dfe788..06c9421ec1d6 100644
--- a/include/llvm/IR/DIBuilder.h
+++ b/include/llvm/IR/DIBuilder.h
@@ -18,7 +18,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -46,6 +46,7 @@ namespace llvm {
     DICompileUnit *CUNode;   ///< The one compile unit created by this DIBuiler.
     Function *DeclareFn;     ///< llvm.dbg.declare
     Function *ValueFn;       ///< llvm.dbg.value
+    Function *LabelFn;       ///< llvm.dbg.label
 
     SmallVector<Metadata *, 4> AllEnumTypes;
     /// Track the RetainTypes, since they can be updated later on.
@@ -69,6 +70,9 @@ namespace llvm {
     /// copy.
     DenseMap<MDNode *, SmallVector<TrackingMDNodeRef, 1>> PreservedVariables;
 
+    /// Each subprogram's preserved labels.
+    DenseMap<MDNode *, SmallVector<TrackingMDNodeRef, 1>> PreservedLabels;
+
     /// Create a temporary.
     ///
     /// Create an \a temporary node and track it in \a UnresolvedNodes.
@@ -79,6 +83,10 @@ namespace llvm {
                                DIExpression *Expr, const DILocation *DL,
                                BasicBlock *InsertBB, Instruction *InsertBefore);
 
+    /// Internal helper for insertLabel.
+    Instruction *insertLabel(DILabel *LabelInfo, const DILocation *DL,
+                             BasicBlock *InsertBB, Instruction *InsertBefore);
+
     /// Internal helper for insertDbgValueIntrinsic.
     Instruction *
     insertDbgValueIntrinsic(llvm::Value *Val, DILocalVariable *VarInfo,
@@ -90,7 +98,10 @@ namespace llvm {
     ///
     /// If \c AllowUnresolved, collect unresolved nodes attached to the module
     /// in order to resolve cycles during \a finalize().
-    explicit DIBuilder(Module &M, bool AllowUnresolved = true);
+    ///
+    /// If \p CU is given a value other than nullptr, then set \p CUNode to CU.
+    explicit DIBuilder(Module &M, bool AllowUnresolved = true,
+                       DICompileUnit *CU = nullptr);
     DIBuilder(const DIBuilder &) = delete;
     DIBuilder &operator=(const DIBuilder &) = delete;
 
@@ -138,11 +149,13 @@ namespace llvm {
     /// Create a file descriptor to hold debugging information for a file.
     /// \param Filename  File name.
     /// \param Directory Directory.
-    /// \param CSKind    Checksum kind (e.g. CSK_None, CSK_MD5, CSK_SHA1, etc.).
-    /// \param Checksum  Checksum data.
-    DIFile *createFile(StringRef Filename, StringRef Directory,
-                       DIFile::ChecksumKind CSKind = DIFile::CSK_None,
-                       StringRef Checksum = StringRef());
+    /// \param Checksum  Optional checksum kind (e.g. CSK_MD5, CSK_SHA1, etc.)
+    ///                  and value.
+    /// \param Source    Optional source text.
+    DIFile *
+    createFile(StringRef Filename, StringRef Directory,
+               Optional<DIFile::ChecksumInfo<StringRef>> Checksum = None,
+               Optional<StringRef> Source = None);
 
     /// Create debugging information entry for a macro.
     /// \param Parent     Macro parent (could be nullptr).
@@ -163,7 +176,7 @@ namespace llvm {
                                      DIFile *File);
 
     /// Create a single enumerator value.
-    DIEnumerator *createEnumerator(StringRef Name, int64_t Val);
+    DIEnumerator *createEnumerator(StringRef Name, int64_t Val, bool IsUnsigned = false);
 
     /// Create a DWARF unspecified type.
     DIBasicType *createUnspecifiedType(StringRef Name);
@@ -232,10 +245,11 @@ namespace llvm {
     /// \param Ty           Original type.
     /// \param BaseTy       Base type. Ty is inherits from base.
     /// \param BaseOffset   Base offset.
+    /// \param VBPtrOffset  Virtual base pointer offset.
     /// \param Flags        Flags to describe inheritance attribute,
     ///                     e.g. private
     DIDerivedType *createInheritance(DIType *Ty, DIType *BaseTy,
-                                     uint64_t BaseOffset,
+                                     uint64_t BaseOffset, uint32_t VBPtrOffset,
                                      DINode::DIFlags Flags);
 
     /// Create debugging information entry for a member.
@@ -255,6 +269,27 @@ namespace llvm {
                                     uint64_t OffsetInBits,
                                     DINode::DIFlags Flags, DIType *Ty);
 
+    /// Create debugging information entry for a variant.  A variant
+    /// normally should be a member of a variant part.
+    /// \param Scope        Member scope.
+    /// \param Name         Member name.
+    /// \param File         File where this member is defined.
+    /// \param LineNo       Line number.
+    /// \param SizeInBits   Member size.
+    /// \param AlignInBits  Member alignment.
+    /// \param OffsetInBits Member offset.
+    /// \param Flags        Flags to encode member attribute, e.g. private
+    /// \param Discriminant The discriminant for this branch; null for
+    ///                     the default branch
+    /// \param Ty           Parent type.
+    DIDerivedType *createVariantMemberType(DIScope *Scope, StringRef Name,
+					   DIFile *File, unsigned LineNo,
+					   uint64_t SizeInBits,
+					   uint32_t AlignInBits,
+					   uint64_t OffsetInBits,
+					   Constant *Discriminant,
+					   DINode::DIFlags Flags, DIType *Ty);
+
     /// Create debugging information entry for a bit field member.
     /// \param Scope               Member scope.
     /// \param Name                Member name.
@@ -376,6 +411,27 @@ namespace llvm {
                                      unsigned RunTimeLang = 0,
                                      StringRef UniqueIdentifier = "");
 
+    /// Create debugging information entry for a variant part.  A
+    /// variant part normally has a discriminator (though this is not
+    /// required) and a number of variant children.
+    /// \param Scope        Scope in which this union is defined.
+    /// \param Name         Union name.
+    /// \param File         File where this member is defined.
+    /// \param LineNumber   Line number.
+    /// \param SizeInBits   Member size.
+    /// \param AlignInBits  Member alignment.
+    /// \param Flags        Flags to encode member attribute, e.g. private
+    /// \param Discriminator Discriminant member
+    /// \param Elements     Variant elements.
+    /// \param UniqueIdentifier A unique identifier for the union.
+    DICompositeType *createVariantPart(DIScope *Scope, StringRef Name,
+				       DIFile *File, unsigned LineNumber,
+				       uint64_t SizeInBits, uint32_t AlignInBits,
+				       DINode::DIFlags Flags,
+				       DIDerivedType *Discriminator,
+				       DINodeArray Elements,
+				       StringRef UniqueIdentifier = "");
+
     /// Create debugging information for template
     /// type parameter.
     /// \param Scope        Scope in which this type is defined.
@@ -442,10 +498,11 @@ namespace llvm {
     /// \param Elements       Enumeration elements.
     /// \param UnderlyingType Underlying type of a C++11/ObjC fixed enum.
     /// \param UniqueIdentifier A unique identifier for the enum.
+    /// \param IsFixed Boolean flag indicate if this is C++11/ObjC fixed enum.
     DICompositeType *createEnumerationType(
         DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
         uint64_t SizeInBits, uint32_t AlignInBits, DINodeArray Elements,
-        DIType *UnderlyingType, StringRef UniqueIdentifier = "");
+        DIType *UnderlyingType, StringRef UniqueIdentifier = "", bool IsFixed = false);
 
     /// Create subroutine type.
     /// \param ParameterTypes  An array of subroutine parameter types. This
@@ -458,12 +515,15 @@ namespace llvm {
                          DINode::DIFlags Flags = DINode::FlagZero,
                          unsigned CC = 0);
 
-    /// Create a new DIType* with "artificial" flag set.
-    DIType *createArtificialType(DIType *Ty);
+    /// Create a distinct clone of \p SP with FlagArtificial set.
+    static DISubprogram *createArtificialSubprogram(DISubprogram *SP);
+
+    /// Create a uniqued clone of \p Ty with FlagArtificial set.
+    static DIType *createArtificialType(DIType *Ty);
 
-    /// Create a new DIType* with the "object pointer"
-    /// flag set.
-    DIType *createObjectPointerType(DIType *Ty);
+    /// Create a uniqued clone of \p Ty with FlagObjectPointer and
+    /// FlagArtificial set.
+    static DIType *createObjectPointerType(DIType *Ty);
 
     /// Create a permanent forward-declared type.
     DICompositeType *createForwardDecl(unsigned Tag, StringRef Name,
@@ -500,6 +560,7 @@ namespace llvm {
     /// Create a descriptor for a value range.  This
     /// implicitly uniques the values returned.
     DISubrange *getOrCreateSubrange(int64_t Lo, int64_t Count);
+    DISubrange *getOrCreateSubrange(int64_t Lo, Metadata *CountNode);
 
     /// Create a new descriptor for the specified variable.
     /// \param Context     Variable scope.
@@ -542,6 +603,14 @@ namespace llvm {
                        DINode::DIFlags Flags = DINode::FlagZero,
                        uint32_t AlignInBits = 0);
 
+    /// Create a new descriptor for an label.
+    ///
+    /// \c Scope must be a \a DILocalScope, and thus its scope chain eventually
+    /// leads to a \a DISubprogram.
+    DILabel *
+    createLabel(DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo,
+                bool AlwaysPreserve = false);
+
     /// Create a new descriptor for a parameter variable.
     ///
     /// \c Scope must be a \a DILocalScope, and thus its scope chain eventually
@@ -733,6 +802,20 @@ namespace llvm {
                                DIExpression *Expr, const DILocation *DL,
                                Instruction *InsertBefore);
 
+    /// Insert a new llvm.dbg.label intrinsic call.
+    /// \param LabelInfo    Label's debug info descriptor.
+    /// \param DL           Debug info location.
+    /// \param InsertBefore Location for the new intrinsic.
+    Instruction *insertLabel(DILabel *LabelInfo, const DILocation *DL,
+                             Instruction *InsertBefore);
+
+    /// Insert a new llvm.dbg.label intrinsic call.
+    /// \param LabelInfo    Label's debug info descriptor.
+    /// \param DL           Debug info location.
+    /// \param InsertAtEnd Location for the new intrinsic.
+    Instruction *insertLabel(DILabel *LabelInfo, const DILocation *DL,
+                             BasicBlock *InsertAtEnd);
+
     /// Insert a new llvm.dbg.value intrinsic call.
     /// \param Val          llvm::Value of the variable
     /// \param VarInfo      Variable's debug info descriptor.
diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h
index a6c71a5a2c3e..d796a65e6129 100644
--- a/include/llvm/IR/DataLayout.h
+++ b/include/llvm/IR/DataLayout.h
@@ -61,7 +61,7 @@ enum AlignTypeEnum {
 // sunk down to an FTTI element that is queried rather than a global
 // preference.
 
-/// \brief Layout alignment element.
+/// Layout alignment element.
 ///
 /// Stores the alignment data associated with a given alignment type (integer,
 /// vector, float) and type bit width.
@@ -69,7 +69,7 @@ enum AlignTypeEnum {
 /// \note The unusual order of elements in the structure attempts to reduce
 /// padding and make the structure slightly more cache friendly.
 struct LayoutAlignElem {
-  /// \brief Alignment type from \c AlignTypeEnum
+  /// Alignment type from \c AlignTypeEnum
   unsigned AlignType : 8;
   unsigned TypeBitWidth : 24;
   unsigned ABIAlign : 16;
@@ -81,7 +81,7 @@ struct LayoutAlignElem {
   bool operator==(const LayoutAlignElem &rhs) const;
 };
 
-/// \brief Layout pointer alignment element.
+/// Layout pointer alignment element.
 ///
 /// Stores the alignment data associated with a given pointer and address space.
 ///
@@ -92,15 +92,17 @@ struct PointerAlignElem {
   unsigned PrefAlign;
   uint32_t TypeByteWidth;
   uint32_t AddressSpace;
+  uint32_t IndexWidth;
 
   /// Initializer
   static PointerAlignElem get(uint32_t AddressSpace, unsigned ABIAlign,
-                              unsigned PrefAlign, uint32_t TypeByteWidth);
+                              unsigned PrefAlign, uint32_t TypeByteWidth,
+                              uint32_t IndexWidth);
 
   bool operator==(const PointerAlignElem &rhs) const;
 };
 
-/// \brief A parsed version of the target data layout string in and methods for
+/// A parsed version of the target data layout string in and methods for
 /// querying it.
 ///
 /// The target data layout string is specified *by the target* - a frontend
@@ -113,6 +115,7 @@ private:
 
   unsigned AllocaAddrSpace;
   unsigned StackNaturalAlign;
+  unsigned ProgramAddrSpace;
 
   enum ManglingModeT {
     MM_None,
@@ -126,7 +129,7 @@ private:
 
   SmallVector<unsigned char, 8> LegalIntWidths;
 
-  /// \brief Primitive type alignment data. This is sorted by type and bit
+  /// Primitive type alignment data. This is sorted by type and bit
   /// width during construction.
   using AlignmentsTy = SmallVector<LayoutAlignElem, 16>;
   AlignmentsTy Alignments;
@@ -140,7 +143,7 @@ private:
   AlignmentsTy::iterator
   findAlignmentLowerBound(AlignTypeEnum AlignType, uint32_t BitWidth);
 
-  /// \brief The string representation used to create this DataLayout
+  /// The string representation used to create this DataLayout
   std::string StringRepresentation;
 
   using PointersTy = SmallVector<PointerAlignElem, 8>;
@@ -165,7 +168,8 @@ private:
   unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
                             bool ABIAlign, Type *Ty) const;
   void setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
-                           unsigned PrefAlign, uint32_t TypeByteWidth);
+                           unsigned PrefAlign, uint32_t TypeByteWidth,
+                           uint32_t IndexWidth);
 
   /// Internal helper method that returns requested alignment for type.
   unsigned getAlignment(Type *Ty, bool abi_or_pref) const;
@@ -196,6 +200,7 @@ public:
     BigEndian = DL.isBigEndian();
     AllocaAddrSpace = DL.AllocaAddrSpace;
     StackNaturalAlign = DL.StackNaturalAlign;
+    ProgramAddrSpace = DL.ProgramAddrSpace;
     ManglingMode = DL.ManglingMode;
     LegalIntWidths = DL.LegalIntWidths;
     Alignments = DL.Alignments;
@@ -216,7 +221,7 @@ public:
   bool isLittleEndian() const { return !BigEndian; }
   bool isBigEndian() const { return BigEndian; }
 
-  /// \brief Returns the string representation of the DataLayout.
+  /// Returns the string representation of the DataLayout.
   ///
   /// This representation is in the same format accepted by the string
   /// constructor above. This should not be used to compare two DataLayout as
@@ -225,10 +230,10 @@ public:
     return StringRepresentation;
   }
 
-  /// \brief Test if the DataLayout was constructed from an empty string.
+  /// Test if the DataLayout was constructed from an empty string.
   bool isDefault() const { return StringRepresentation.empty(); }
 
-  /// \brief Returns true if the specified type is known to be a native integer
+  /// Returns true if the specified type is known to be a native integer
   /// type supported by the CPU.
   ///
   /// For example, i64 is not native on most 32-bit CPUs and i37 is not native
@@ -252,10 +257,18 @@ public:
   unsigned getStackAlignment() const { return StackNaturalAlign; }
   unsigned getAllocaAddrSpace() const { return AllocaAddrSpace; }
 
+  unsigned getProgramAddressSpace() const { return ProgramAddrSpace; }
+
   bool hasMicrosoftFastStdCallMangling() const {
     return ManglingMode == MM_WinCOFFX86;
   }
 
+  /// Returns true if symbols with leading question marks should not receive IR
+  /// mangling. True for Windows mangling modes.
+  bool doNotMangleLeadingQuestionMark() const {
+    return ManglingMode == MM_WinCOFF || ManglingMode == MM_WinCOFFX86;
+  }
+
   bool hasLinkerPrivateGlobalPrefix() const { return ManglingMode == MM_MachO; }
 
   StringRef getLinkerPrivateGlobalPrefix() const {
@@ -296,7 +309,7 @@ public:
 
   static const char *getManglingComponent(const Triple &T);
 
-  /// \brief Returns true if the specified type fits in a native integer type
+  /// Returns true if the specified type fits in a native integer type
   /// supported by the CPU.
   ///
   /// For example, if the CPU only supports i32 as a native integer type, then
@@ -321,6 +334,9 @@ public:
   /// the backends/clients are updated.
   unsigned getPointerSize(unsigned AS = 0) const;
 
+  // Index size used for address calculation.
+  unsigned getIndexSize(unsigned AS) const;
+
   /// Return the address spaces containing non-integral pointers.  Pointers in
   /// this address space don't have a well-defined bitwise representation.
   ArrayRef<unsigned> getNonIntegralAddressSpaces() const {
@@ -345,6 +361,11 @@ public:
     return getPointerSize(AS) * 8;
   }
 
+  /// Size in bits of index used for address calculation in getelementptr.
+  unsigned getIndexSizeInBits(unsigned AS) const {
+    return getIndexSize(AS) * 8;
+  }
+
   /// Layout pointer size, in bits, based on the type.  If this function is
   /// called with a pointer type, then the type size of the pointer is returned.
   /// If this function is called with a vector of pointers, then the type size
@@ -352,6 +373,10 @@ public:
   /// vector of pointers.
   unsigned getPointerTypeSizeInBits(Type *) const;
 
+  /// Layout size of the index used in GEP calculation.
+  /// The function should be called with pointer or vector of pointers type.
+  unsigned getIndexTypeSizeInBits(Type *Ty) const;
+
   unsigned getPointerTypeSize(Type *Ty) const {
     return getPointerTypeSizeInBits(Ty) / 8;
   }
@@ -373,13 +398,13 @@ public:
   /// [*] The alloc size depends on the alignment, and thus on the target.
   ///     These values are for x86-32 linux.
 
-  /// \brief Returns the number of bits necessary to hold the specified type.
+  /// Returns the number of bits necessary to hold the specified type.
   ///
   /// For example, returns 36 for i36 and 80 for x86_fp80. The type passed must
   /// have a size (Type::isSized() must return true).
   uint64_t getTypeSizeInBits(Type *Ty) const;
 
-  /// \brief Returns the maximum number of bytes that may be overwritten by
+  /// Returns the maximum number of bytes that may be overwritten by
   /// storing the specified type.
   ///
   /// For example, returns 5 for i36 and 10 for x86_fp80.
@@ -387,7 +412,7 @@ public:
     return (getTypeSizeInBits(Ty) + 7) / 8;
   }
 
-  /// \brief Returns the maximum number of bits that may be overwritten by
+  /// Returns the maximum number of bits that may be overwritten by
   /// storing the specified type; always a multiple of 8.
   ///
   /// For example, returns 40 for i36 and 80 for x86_fp80.
@@ -395,7 +420,7 @@ public:
     return 8 * getTypeStoreSize(Ty);
   }
 
-  /// \brief Returns the offset in bytes between successive objects of the
+  /// Returns the offset in bytes between successive objects of the
   /// specified type, including alignment padding.
   ///
   /// This is the amount that alloca reserves for this type. For example,
@@ -405,7 +430,7 @@ public:
     return alignTo(getTypeStoreSize(Ty), getABITypeAlignment(Ty));
   }
 
-  /// \brief Returns the offset in bits between successive objects of the
+  /// Returns the offset in bits between successive objects of the
   /// specified type, including alignment padding; always a multiple of 8.
   ///
   /// This is the amount that alloca reserves for this type. For example,
@@ -414,64 +439,69 @@ public:
     return 8 * getTypeAllocSize(Ty);
   }
 
-  /// \brief Returns the minimum ABI-required alignment for the specified type.
+  /// Returns the minimum ABI-required alignment for the specified type.
   unsigned getABITypeAlignment(Type *Ty) const;
 
-  /// \brief Returns the minimum ABI-required alignment for an integer type of
+  /// Returns the minimum ABI-required alignment for an integer type of
   /// the specified bitwidth.
   unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const;
 
-  /// \brief Returns the preferred stack/global alignment for the specified
+  /// Returns the preferred stack/global alignment for the specified
   /// type.
   ///
   /// This is always at least as good as the ABI alignment.
   unsigned getPrefTypeAlignment(Type *Ty) const;
 
-  /// \brief Returns the preferred alignment for the specified type, returned as
+  /// Returns the preferred alignment for the specified type, returned as
   /// log2 of the value (a shift amount).
   unsigned getPreferredTypeAlignmentShift(Type *Ty) const;
 
-  /// \brief Returns an integer type with size at least as big as that of a
+  /// Returns an integer type with size at least as big as that of a
   /// pointer in the given address space.
   IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace = 0) const;
 
-  /// \brief Returns an integer (vector of integer) type with size at least as
+  /// Returns an integer (vector of integer) type with size at least as
   /// big as that of a pointer of the given pointer (vector of pointer) type.
   Type *getIntPtrType(Type *) const;
 
-  /// \brief Returns the smallest integer type with size at least as big as
+  /// Returns the smallest integer type with size at least as big as
   /// Width bits.
   Type *getSmallestLegalIntType(LLVMContext &C, unsigned Width = 0) const;
 
-  /// \brief Returns the largest legal integer type, or null if none are set.
+  /// Returns the largest legal integer type, or null if none are set.
   Type *getLargestLegalIntType(LLVMContext &C) const {
     unsigned LargestSize = getLargestLegalIntTypeSizeInBits();
     return (LargestSize == 0) ? nullptr : Type::getIntNTy(C, LargestSize);
   }
 
-  /// \brief Returns the size of largest legal integer type size, or 0 if none
+  /// Returns the size of largest legal integer type size, or 0 if none
   /// are set.
   unsigned getLargestLegalIntTypeSizeInBits() const;
 
-  /// \brief Returns the offset from the beginning of the type for the specified
+  /// Returns the type of a GEP index.
+  /// If it was not specified explicitly, it will be the integer type of the
+  /// pointer width - IntPtrType.
+  Type *getIndexType(Type *PtrTy) const;
+
+  /// Returns the offset from the beginning of the type for the specified
   /// indices.
   ///
   /// Note that this takes the element type, not the pointer type.
   /// This is used to implement getelementptr.
   int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef<Value *> Indices) const;
 
-  /// \brief Returns a StructLayout object, indicating the alignment of the
+  /// Returns a StructLayout object, indicating the alignment of the
   /// struct, its size, and the offsets of its fields.
   ///
   /// Note that this information is lazily cached.
   const StructLayout *getStructLayout(StructType *Ty) const;
 
-  /// \brief Returns the preferred alignment of the specified global.
+  /// Returns the preferred alignment of the specified global.
   ///
   /// This includes an explicitly requested alignment (if the global has one).
   unsigned getPreferredAlignment(const GlobalVariable *GV) const;
 
-  /// \brief Returns the preferred alignment of the specified global, returned
+  /// Returns the preferred alignment of the specified global, returned
   /// in log form.
   ///
   /// This includes an explicitly requested alignment (if the global has one).
@@ -506,7 +536,7 @@ public:
   /// NB: Padding in nested element is not taken into account.
   bool hasPadding() const { return IsPadded; }
 
-  /// \brief Given a valid byte offset into the structure, returns the structure
+  /// Given a valid byte offset into the structure, returns the structure
   /// index that contains it.
   unsigned getElementContainingOffset(uint64_t Offset) const;
 
diff --git a/include/llvm/IR/DebugInfo.h b/include/llvm/IR/DebugInfo.h
index 1d8e7e2855fd..01178af3c9ff 100644
--- a/include/llvm/IR/DebugInfo.h
+++ b/include/llvm/IR/DebugInfo.h
@@ -28,10 +28,10 @@ class DbgDeclareInst;
 class DbgValueInst;
 class Module;
 
-/// \brief Find subprogram that is enclosing this scope.
+/// Find subprogram that is enclosing this scope.
 DISubprogram *getDISubprogram(const MDNode *Scope);
 
-/// \brief Strip debug info in the module if it exists.
+/// Strip debug info in the module if it exists.
 ///
 /// To do this, we remove all calls to the debugger intrinsics and any named
 /// metadata for debugging. We also remove debug locations for instructions.
@@ -51,10 +51,10 @@ bool stripDebugInfo(Function &F);
 ///   All debug type metadata nodes are unreachable and garbage collected.
 bool stripNonLineTableDebugInfo(Module &M);
 
-/// \brief Return Debug Info Metadata Version by checking module flags.
+/// Return Debug Info Metadata Version by checking module flags.
 unsigned getDebugMetadataVersionFromModule(const Module &M);
 
-/// \brief Utility to find all debug info in a module.
+/// Utility to find all debug info in a module.
 ///
 /// DebugInfoFinder tries to list all debug info MDNodes used in a module. To
 /// list debug info MDNodes used by an instruction, DebugInfoFinder uses
@@ -64,30 +64,33 @@ unsigned getDebugMetadataVersionFromModule(const Module &M);
 /// used by the CUs.
 class DebugInfoFinder {
 public:
-  /// \brief Process entire module and collect debug info anchors.
+  /// Process entire module and collect debug info anchors.
   void processModule(const Module &M);
+  /// Process a single instruction and collect debug info anchors.
+  void processInstruction(const Module &M, const Instruction &I);
 
-  /// \brief Process DbgDeclareInst.
+  /// Process DbgDeclareInst.
   void processDeclare(const Module &M, const DbgDeclareInst *DDI);
-  /// \brief Process DbgValueInst.
+  /// Process DbgValueInst.
   void processValue(const Module &M, const DbgValueInst *DVI);
-  /// \brief Process debug info location.
+  /// Process debug info location.
   void processLocation(const Module &M, const DILocation *Loc);
 
-  /// \brief Clear all lists.
+  /// Clear all lists.
   void reset();
 
 private:
   void InitializeTypeMap(const Module &M);
 
-  void processType(DIType *DT);
-  void processSubprogram(DISubprogram *SP);
+  void processCompileUnit(DICompileUnit *CU);
   void processScope(DIScope *Scope);
+  void processSubprogram(DISubprogram *SP);
+  void processType(DIType *DT);
   bool addCompileUnit(DICompileUnit *CU);
   bool addGlobalVariable(DIGlobalVariableExpression *DIG);
+  bool addScope(DIScope *Scope);
   bool addSubprogram(DISubprogram *SP);
   bool addType(DIType *DT);
-  bool addScope(DIScope *Scope);
 
 public:
   using compile_unit_iterator =
diff --git a/include/llvm/IR/DebugInfoFlags.def b/include/llvm/IR/DebugInfoFlags.def
index 7ea6346998fe..b1f5fac64232 100644
--- a/include/llvm/IR/DebugInfoFlags.def
+++ b/include/llvm/IR/DebugInfoFlags.def
@@ -43,6 +43,11 @@ HANDLE_DI_FLAG((1 << 18), IntroducedVirtual)
 HANDLE_DI_FLAG((1 << 19), BitField)
 HANDLE_DI_FLAG((1 << 20), NoReturn)
 HANDLE_DI_FLAG((1 << 21), MainSubprogram)
+HANDLE_DI_FLAG((1 << 22), TypePassByValue)
+HANDLE_DI_FLAG((1 << 23), TypePassByReference)
+HANDLE_DI_FLAG((1 << 24), FixedEnum)
+HANDLE_DI_FLAG((1 << 25), Thunk)
+HANDLE_DI_FLAG((1 << 26), Trivial)
 
 // To avoid needing a dedicated value for IndirectVirtualBase, we use
 // the bitwise or of Virtual and FwdDecl, which does not otherwise
@@ -52,7 +57,7 @@ HANDLE_DI_FLAG((1 << 2) | (1 << 5), IndirectVirtualBase)
 #ifdef DI_FLAG_LARGEST_NEEDED
 // intended to be used with ADT/BitmaskEnum.h
 // NOTE: always must be equal to largest flag, check this when adding new flag
-HANDLE_DI_FLAG((1 << 21), Largest)
+HANDLE_DI_FLAG((1 << 26), Largest)
 #undef DI_FLAG_LARGEST_NEEDED
 #endif
 
diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h
index 75b0c43b6512..820746851104 100644
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@@ -18,11 +18,13 @@
 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/Casting.h"
 #include <cassert>
@@ -230,6 +232,7 @@ public:
     case DITemplateValueParameterKind:
     case DIGlobalVariableKind:
     case DILocalVariableKind:
+    case DILabelKind:
     case DIObjCPropertyKind:
     case DIImportedEntityKind:
     case DIModuleKind:
@@ -332,31 +335,53 @@ class DISubrange : public DINode {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  int64_t Count;
   int64_t LowerBound;
 
-  DISubrange(LLVMContext &C, StorageType Storage, int64_t Count,
-             int64_t LowerBound)
-      : DINode(C, DISubrangeKind, Storage, dwarf::DW_TAG_subrange_type, None),
-        Count(Count), LowerBound(LowerBound) {}
+  DISubrange(LLVMContext &C, StorageType Storage, Metadata *Node,
+             int64_t LowerBound, ArrayRef<Metadata *> Ops)
+      : DINode(C, DISubrangeKind, Storage, dwarf::DW_TAG_subrange_type, Ops),
+        LowerBound(LowerBound) {}
+
   ~DISubrange() = default;
 
   static DISubrange *getImpl(LLVMContext &Context, int64_t Count,
                              int64_t LowerBound, StorageType Storage,
                              bool ShouldCreate = true);
 
+  static DISubrange *getImpl(LLVMContext &Context, Metadata *CountNode,
+                             int64_t LowerBound, StorageType Storage,
+                             bool ShouldCreate = true);
+
   TempDISubrange cloneImpl() const {
-    return getTemporary(getContext(), getCount(), getLowerBound());
+    return getTemporary(getContext(), getRawCountNode(), getLowerBound());
   }
 
 public:
   DEFINE_MDNODE_GET(DISubrange, (int64_t Count, int64_t LowerBound = 0),
                     (Count, LowerBound))
 
+  DEFINE_MDNODE_GET(DISubrange, (Metadata *CountNode, int64_t LowerBound = 0),
+                    (CountNode, LowerBound))
+
   TempDISubrange clone() const { return cloneImpl(); }
 
   int64_t getLowerBound() const { return LowerBound; }
-  int64_t getCount() const { return Count; }
+
+  Metadata *getRawCountNode() const {
+    return getOperand(0).get();
+  }
+
+  typedef PointerUnion<ConstantInt*, DIVariable*> CountType;
+
+  CountType getCount() const {
+    if (auto *MD = dyn_cast<ConstantAsMetadata>(getRawCountNode()))
+      return CountType(cast<ConstantInt>(MD->getValue()));
+
+    if (auto *DV = dyn_cast<DIVariable>(getRawCountNode()))
+      return CountType(DV);
+
+    return CountType();
+  }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DISubrangeKind;
@@ -372,36 +397,38 @@ class DIEnumerator : public DINode {
   friend class MDNode;
 
   int64_t Value;
-
   DIEnumerator(LLVMContext &C, StorageType Storage, int64_t Value,
-               ArrayRef<Metadata *> Ops)
+               bool IsUnsigned, ArrayRef<Metadata *> Ops)
       : DINode(C, DIEnumeratorKind, Storage, dwarf::DW_TAG_enumerator, Ops),
-        Value(Value) {}
+        Value(Value) {
+    SubclassData32 = IsUnsigned;
+  }
   ~DIEnumerator() = default;
 
   static DIEnumerator *getImpl(LLVMContext &Context, int64_t Value,
-                               StringRef Name, StorageType Storage,
-                               bool ShouldCreate = true) {
-    return getImpl(Context, Value, getCanonicalMDString(Context, Name), Storage,
-                   ShouldCreate);
+                               bool IsUnsigned, StringRef Name,
+                               StorageType Storage, bool ShouldCreate = true) {
+    return getImpl(Context, Value, IsUnsigned,
+                   getCanonicalMDString(Context, Name), Storage, ShouldCreate);
   }
   static DIEnumerator *getImpl(LLVMContext &Context, int64_t Value,
-                               MDString *Name, StorageType Storage,
-                               bool ShouldCreate = true);
+                               bool IsUnsigned, MDString *Name,
+                               StorageType Storage, bool ShouldCreate = true);
 
   TempDIEnumerator cloneImpl() const {
-    return getTemporary(getContext(), getValue(), getName());
+    return getTemporary(getContext(), getValue(), isUnsigned(), getName());
   }
 
 public:
-  DEFINE_MDNODE_GET(DIEnumerator, (int64_t Value, StringRef Name),
-                    (Value, Name))
-  DEFINE_MDNODE_GET(DIEnumerator, (int64_t Value, MDString *Name),
-                    (Value, Name))
+  DEFINE_MDNODE_GET(DIEnumerator, (int64_t Value, bool IsUnsigned, StringRef Name),
+                    (Value, IsUnsigned, Name))
+  DEFINE_MDNODE_GET(DIEnumerator, (int64_t Value, bool IsUnsigned, MDString *Name),
+                    (Value, IsUnsigned, Name))
 
   TempDIEnumerator clone() const { return cloneImpl(); }
 
   int64_t getValue() const { return Value; }
+  bool isUnsigned() const { return SubclassData32; }
   StringRef getName() const { return getStringOperand(0); }
 
   MDString *getRawName() const { return getOperandAs<MDString>(0); }
@@ -429,6 +456,7 @@ public:
 
   inline StringRef getFilename() const;
   inline StringRef getDirectory() const;
+  inline Optional<StringRef> getSource() const;
 
   StringRef getName() const;
   DIScopeRef getScope() const;
@@ -473,63 +501,103 @@ class DIFile : public DIScope {
   friend class MDNode;
 
 public:
-  // These values must be explictly set, as they end up in the final object
-  // file.
+  /// Which algorithm (e.g. MD5) a checksum was generated with.
+  ///
+  /// The encoding is explicit because it is used directly in Bitcode. The
+  /// value 0 is reserved to indicate the absence of a checksum in Bitcode.
   enum ChecksumKind {
-    CSK_None = 0,
+    // The first variant was originally CSK_None, encoded as 0. The new
+    // internal representation removes the need for this by wrapping the
+    // ChecksumInfo in an Optional, but to preserve Bitcode compatibility the 0
+    // encoding is reserved.
     CSK_MD5 = 1,
     CSK_SHA1 = 2,
     CSK_Last = CSK_SHA1 // Should be last enumeration.
   };
 
+  /// A single checksum, represented by a \a Kind and a \a Value (a string).
+  template <typename T>
+  struct ChecksumInfo {
+    /// The kind of checksum which \a Value encodes.
+    ChecksumKind Kind;
+    /// The string value of the checksum.
+    T Value;
+
+    ChecksumInfo(ChecksumKind Kind, T Value) : Kind(Kind), Value(Value) { }
+    ~ChecksumInfo() = default;
+    bool operator==(const ChecksumInfo<T> &X) const {
+      return Kind == X.Kind && Value == X.Value;
+    }
+    bool operator!=(const ChecksumInfo<T> &X) const { return !(*this == X); }
+    StringRef getKindAsString() const { return getChecksumKindAsString(Kind); }
+  };
+
 private:
-  ChecksumKind CSKind;
+  Optional<ChecksumInfo<MDString *>> Checksum;
+  Optional<MDString *> Source;
 
-  DIFile(LLVMContext &C, StorageType Storage, ChecksumKind CSK,
+  DIFile(LLVMContext &C, StorageType Storage,
+         Optional<ChecksumInfo<MDString *>> CS, Optional<MDString *> Src,
          ArrayRef<Metadata *> Ops)
       : DIScope(C, DIFileKind, Storage, dwarf::DW_TAG_file_type, Ops),
-        CSKind(CSK) {}
+        Checksum(CS), Source(Src) {}
   ~DIFile() = default;
 
   static DIFile *getImpl(LLVMContext &Context, StringRef Filename,
-                         StringRef Directory, ChecksumKind CSK, StringRef CS,
+                         StringRef Directory,
+                         Optional<ChecksumInfo<StringRef>> CS,
+                         Optional<StringRef> Source,
                          StorageType Storage, bool ShouldCreate = true) {
+    Optional<ChecksumInfo<MDString *>> MDChecksum;
+    if (CS)
+      MDChecksum.emplace(CS->Kind, getCanonicalMDString(Context, CS->Value));
     return getImpl(Context, getCanonicalMDString(Context, Filename),
-                   getCanonicalMDString(Context, Directory), CSK,
-                   getCanonicalMDString(Context, CS), Storage, ShouldCreate);
+                   getCanonicalMDString(Context, Directory), MDChecksum,
+                   Source ? Optional<MDString *>(getCanonicalMDString(Context, *Source)) : None,
+                   Storage, ShouldCreate);
   }
   static DIFile *getImpl(LLVMContext &Context, MDString *Filename,
-                         MDString *Directory, ChecksumKind CSK, MDString *CS,
-                         StorageType Storage, bool ShouldCreate = true);
+                         MDString *Directory,
+                         Optional<ChecksumInfo<MDString *>> CS,
+                         Optional<MDString *> Source, StorageType Storage,
+                         bool ShouldCreate = true);
 
   TempDIFile cloneImpl() const {
     return getTemporary(getContext(), getFilename(), getDirectory(),
-                        getChecksumKind(), getChecksum());
+                        getChecksum(), getSource());
   }
 
 public:
   DEFINE_MDNODE_GET(DIFile, (StringRef Filename, StringRef Directory,
-                             ChecksumKind CSK = CSK_None,
-                             StringRef CS = StringRef()),
-                    (Filename, Directory, CSK, CS))
+                             Optional<ChecksumInfo<StringRef>> CS = None,
+                             Optional<StringRef> Source = None),
+                    (Filename, Directory, CS, Source))
   DEFINE_MDNODE_GET(DIFile, (MDString * Filename, MDString *Directory,
-                             ChecksumKind CSK = CSK_None,
-                             MDString *CS = nullptr),
-                    (Filename, Directory, CSK, CS))
+                             Optional<ChecksumInfo<MDString *>> CS = None,
+                             Optional<MDString *> Source = None),
+                    (Filename, Directory, CS, Source))
 
   TempDIFile clone() const { return cloneImpl(); }
 
   StringRef getFilename() const { return getStringOperand(0); }
   StringRef getDirectory() const { return getStringOperand(1); }
-  StringRef getChecksum() const { return getStringOperand(2); }
-  ChecksumKind getChecksumKind() const { return CSKind; }
-  StringRef getChecksumKindAsString() const;
+  Optional<ChecksumInfo<StringRef>> getChecksum() const {
+    Optional<ChecksumInfo<StringRef>> StringRefChecksum;
+    if (Checksum)
+      StringRefChecksum.emplace(Checksum->Kind, Checksum->Value->getString());
+    return StringRefChecksum;
+  }
+  Optional<StringRef> getSource() const {
+    return Source ? Optional<StringRef>((*Source)->getString()) : None;
+  }
 
   MDString *getRawFilename() const { return getOperandAs<MDString>(0); }
   MDString *getRawDirectory() const { return getOperandAs<MDString>(1); }
-  MDString *getRawChecksum() const { return getOperandAs<MDString>(2); }
+  Optional<ChecksumInfo<MDString *>> getRawChecksum() const { return Checksum; }
+  Optional<MDString *> getRawSource() const { return Source; }
 
-  static ChecksumKind getChecksumKind(StringRef CSKindStr);
+  static StringRef getChecksumKindAsString(ChecksumKind CSKind);
+  static Optional<ChecksumKind> getChecksumKind(StringRef CSKindStr);
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DIFileKind;
@@ -548,6 +616,12 @@ StringRef DIScope::getDirectory() const {
   return "";
 }
 
+Optional<StringRef> DIScope::getSource() const {
+  if (auto *F = getFile())
+    return F->getSource();
+  return None;
+}
+
 /// Base class for types.
 ///
 /// TODO: Remove the hardcoded name and context, since many types don't use
@@ -605,9 +679,11 @@ public:
   Metadata *getRawScope() const { return getOperand(1); }
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
 
-  void setFlags(DIFlags NewFlags) {
-    assert(!isUniqued() && "Cannot set flags on uniqued nodes");
-    Flags = NewFlags;
+  /// Returns a new temporary DIType with updated Flags
+  TempDIType cloneWithFlags(DIFlags NewFlags) const {
+    auto NewTy = clone();
+    NewTy->Flags = NewFlags;
+    return NewTy;
   }
 
   bool isPrivate() const {
@@ -633,6 +709,10 @@ public:
   bool isStaticMember() const { return getFlags() & FlagStaticMember; }
   bool isLValueReference() const { return getFlags() & FlagLValueReference; }
   bool isRValueReference() const { return getFlags() & FlagRValueReference; }
+  bool isTypePassByValue() const { return getFlags() & FlagTypePassByValue; }
+  bool isTypePassByReference() const {
+    return getFlags() & FlagTypePassByReference;
+  }
 
   static bool classof(const Metadata *MD) {
     switch (MD->getMetadataID()) {
@@ -698,6 +778,12 @@ public:
 
   unsigned getEncoding() const { return Encoding; }
 
+  enum class Signedness { Signed, Unsigned };
+
+  /// Return the signedness of this type, or None if this type is neither
+  /// signed nor unsigned.
+  Optional<Signedness> getSignedness() const;
+
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DIBasicTypeKind;
   }
@@ -713,7 +799,7 @@ class DIDerivedType : public DIType {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  /// \brief The DWARF address space of the memory pointed to or referenced by a
+  /// The DWARF address space of the memory pointed to or referenced by a
   /// pointer or reference type respectively.
   Optional<unsigned> DWARFAddressSpace;
 
@@ -788,7 +874,8 @@ public:
   /// Get extra data associated with this derived type.
   ///
   /// Class type for pointer-to-members, objective-c property node for ivars,
-  /// or global constant wrapper for static members.
+  /// global constant wrapper for static members, or virtual base pointer offset
+  /// for inheritance.
   ///
   /// TODO: Separate out types that need this extra operand: pointer-to-member
   /// types and member fields (static members and ivars).
@@ -806,6 +893,14 @@ public:
     return dyn_cast_or_null<DIObjCProperty>(getExtraData());
   }
 
+  uint32_t getVBPtrOffset() const {
+    assert(getTag() == dwarf::DW_TAG_inheritance);
+    if (auto *CM = cast_or_null<ConstantAsMetadata>(getExtraData()))
+      if (auto *CI = dyn_cast_or_null<ConstantInt>(CM->getValue()))
+        return static_cast<uint32_t>(CI->getZExtValue());
+    return 0;
+  }
+
   Constant *getStorageOffsetInBits() const {
     assert(getTag() == dwarf::DW_TAG_member && isBitField());
     if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
@@ -819,6 +914,12 @@ public:
       return C->getValue();
     return nullptr;
   }
+  Constant *getDiscriminantValue() const {
+    assert(getTag() == dwarf::DW_TAG_member && !isStaticMember());
+    if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
+      return C->getValue();
+    return nullptr;
+  }
   /// @}
 
   static bool classof(const Metadata *MD) {
@@ -861,12 +962,13 @@ class DICompositeType : public DIType {
           uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
           DIFlags Flags, DINodeArray Elements, unsigned RuntimeLang,
           DITypeRef VTableHolder, DITemplateParameterArray TemplateParams,
-          StringRef Identifier, StorageType Storage, bool ShouldCreate = true) {
+          StringRef Identifier, DIDerivedType *Discriminator,
+          StorageType Storage, bool ShouldCreate = true) {
     return getImpl(
         Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope,
         BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(),
         RuntimeLang, VTableHolder, TemplateParams.get(),
-        getCanonicalMDString(Context, Identifier), Storage, ShouldCreate);
+        getCanonicalMDString(Context, Identifier), Discriminator, Storage, ShouldCreate);
   }
   static DICompositeType *
   getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
@@ -874,14 +976,15 @@ class DICompositeType : public DIType {
           uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
           DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
           Metadata *VTableHolder, Metadata *TemplateParams,
-          MDString *Identifier, StorageType Storage, bool ShouldCreate = true);
+          MDString *Identifier, Metadata *Discriminator,
+          StorageType Storage, bool ShouldCreate = true);
 
   TempDICompositeType cloneImpl() const {
     return getTemporary(getContext(), getTag(), getName(), getFile(), getLine(),
                         getScope(), getBaseType(), getSizeInBits(),
                         getAlignInBits(), getOffsetInBits(), getFlags(),
                         getElements(), getRuntimeLang(), getVTableHolder(),
-                        getTemplateParams(), getIdentifier());
+                        getTemplateParams(), getIdentifier(), getDiscriminator());
   }
 
 public:
@@ -892,10 +995,10 @@ public:
                      DIFlags Flags, DINodeArray Elements, unsigned RuntimeLang,
                      DITypeRef VTableHolder,
                      DITemplateParameterArray TemplateParams = nullptr,
-                     StringRef Identifier = ""),
+                     StringRef Identifier = "", DIDerivedType *Discriminator = nullptr),
                     (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                      AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
-                     VTableHolder, TemplateParams, Identifier))
+                     VTableHolder, TemplateParams, Identifier, Discriminator))
   DEFINE_MDNODE_GET(DICompositeType,
                     (unsigned Tag, MDString *Name, Metadata *File,
                      unsigned Line, Metadata *Scope, Metadata *BaseType,
@@ -903,10 +1006,11 @@ public:
                      uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements,
                      unsigned RuntimeLang, Metadata *VTableHolder,
                      Metadata *TemplateParams = nullptr,
-                     MDString *Identifier = nullptr),
+                     MDString *Identifier = nullptr,
+                     Metadata *Discriminator = nullptr),
                     (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                      AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
-                     VTableHolder, TemplateParams, Identifier))
+                     VTableHolder, TemplateParams, Identifier, Discriminator))
 
   TempDICompositeType clone() const { return cloneImpl(); }
 
@@ -923,7 +1027,7 @@ public:
              Metadata *BaseType, uint64_t SizeInBits, uint32_t AlignInBits,
              uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements,
              unsigned RuntimeLang, Metadata *VTableHolder,
-             Metadata *TemplateParams);
+             Metadata *TemplateParams, Metadata *Discriminator);
   static DICompositeType *getODRTypeIfExists(LLVMContext &Context,
                                              MDString &Identifier);
 
@@ -942,7 +1046,7 @@ public:
                Metadata *BaseType, uint64_t SizeInBits, uint32_t AlignInBits,
                uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements,
                unsigned RuntimeLang, Metadata *VTableHolder,
-               Metadata *TemplateParams);
+               Metadata *TemplateParams, Metadata *Discriminator);
 
   DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
   DINodeArray getElements() const {
@@ -960,6 +1064,8 @@ public:
   Metadata *getRawVTableHolder() const { return getOperand(5); }
   Metadata *getRawTemplateParams() const { return getOperand(6); }
   MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); }
+  Metadata *getRawDiscriminator() const { return getOperand(8); }
+  DIDerivedType *getDiscriminator() const { return getOperandAs<DIDerivedType>(8); }
 
   /// Replace operands.
   ///
@@ -1060,7 +1166,7 @@ public:
   };
 
   static Optional<DebugEmissionKind> getEmissionKind(StringRef Str);
-  static const char *EmissionKindString(DebugEmissionKind EK);
+  static const char *emissionKindString(DebugEmissionKind EK);
 
 private:
   unsigned SourceLanguage;
@@ -1337,6 +1443,7 @@ public:
   DIFile *getFile() const { return getScope()->getFile(); }
   StringRef getFilename() const { return getScope()->getFilename(); }
   StringRef getDirectory() const { return getScope()->getDirectory(); }
+  Optional<StringRef> getSource() const { return getScope()->getSource(); }
 
   /// Get the scope where this is inlined.
   ///
@@ -1380,7 +1487,7 @@ public:
   ///
   /// The above 3 components are encoded into a 32bit unsigned integer in
   /// order. If the lowest bit is 1, the current component is empty, and the
-  /// next component will start in the next bit. Otherwise, the the current
+  /// next component will start in the next bit. Otherwise, the current
   /// component is non-empty, and its content starts in the next bit. The
   /// length of each components is either 5 bit or 12 bit: if the 7th bit
   /// is 0, the bit 2~6 (5 bits) are used to represent the component; if the
@@ -1408,26 +1515,25 @@ public:
   /// discriminator.
   inline const DILocation *cloneWithDuplicationFactor(unsigned DF) const;
 
+  enum { NoGeneratedLocation = false, WithGeneratedLocation = true };
+
   /// When two instructions are combined into a single instruction we also
   /// need to combine the original locations into a single location.
   ///
   /// When the locations are the same we can use either location. When they
-  /// differ, we need a third location which is distinct from either. If
-  /// they have the same file/line but have a different discriminator we
-  /// could create a location with a new discriminator. If they are from
-  /// different files/lines the location is ambiguous and can't be
-  /// represented in a single line entry.  In this case, no location
-  /// should be set, unless the merged instruction is a call, which we will
-  /// set the merged debug location as line 0 of the nearest common scope
-  /// where 2 locations are inlined from. This only applies to Instruction;
-  /// for MachineInstruction, as it is post-inline, we will treat the call
-  /// instruction the same way as other instructions.
+  /// differ, we need a third location which is distinct from either. If they
+  /// have the same file/line but have a different discriminator we could
+  /// create a location with a new discriminator. If they are from different
+  /// files/lines the location is ambiguous and can't be represented in a line
+  /// entry. In this case, if \p GenerateLocation is true, we will set the
+  /// merged debug location as line 0 of the nearest common scope where the two
+  /// locations are inlined from.
   ///
-  /// \p ForInst: The Instruction the merged DILocation is for. If the
-  /// Instruction is unavailable or non-existent, use nullptr.
+  /// \p GenerateLocation: Whether the merged location can be generated when
+  /// \p LocA and \p LocB differ.
   static const DILocation *
   getMergedLocation(const DILocation *LocA, const DILocation *LocB,
-                    const Instruction *ForInst = nullptr);
+                    bool GenerateLocation = NoGeneratedLocation);
 
   /// Returns the base discriminator for a given encoded discriminator \p D.
   static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) {
@@ -1521,13 +1627,13 @@ class DISubprogram : public DILocalScope {
           unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
           bool IsOptimized, DICompileUnit *Unit,
           DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
-          DILocalVariableArray Variables, DITypeArray ThrownTypes,
+          DINodeArray RetainedNodes, DITypeArray ThrownTypes,
           StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
                    getCanonicalMDString(Context, LinkageName), File, Line, Type,
                    IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
                    Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
-                   Unit, TemplateParams.get(), Declaration, Variables.get(),
+                   Unit, TemplateParams.get(), Declaration, RetainedNodes.get(),
                    ThrownTypes.get(), Storage, ShouldCreate);
   }
   static DISubprogram *
@@ -1536,7 +1642,7 @@ class DISubprogram : public DILocalScope {
           bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
           Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
           int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit,
-          Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
+          Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes,
           Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate = true);
 
   TempDISubprogram cloneImpl() const {
@@ -1545,7 +1651,7 @@ class DISubprogram : public DILocalScope {
                         isDefinition(), getScopeLine(), getContainingType(),
                         getVirtuality(), getVirtualIndex(), getThisAdjustment(),
                         getFlags(), isOptimized(), getUnit(),
-                        getTemplateParams(), getDeclaration(), getVariables(),
+                        getTemplateParams(), getDeclaration(), getRetainedNodes(),
                         getThrownTypes());
   }
 
@@ -1559,12 +1665,12 @@ public:
                      bool IsOptimized, DICompileUnit *Unit,
                      DITemplateParameterArray TemplateParams = nullptr,
                      DISubprogram *Declaration = nullptr,
-                     DILocalVariableArray Variables = nullptr,
+                     DINodeArray RetainedNodes = nullptr,
                      DITypeArray ThrownTypes = nullptr),
                     (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
                      IsDefinition, ScopeLine, ContainingType, Virtuality,
                      VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit,
-                     TemplateParams, Declaration, Variables, ThrownTypes))
+                     TemplateParams, Declaration, RetainedNodes, ThrownTypes))
   DEFINE_MDNODE_GET(
       DISubprogram,
       (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File,
@@ -1572,15 +1678,22 @@ public:
        unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality,
        unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
        bool IsOptimized, Metadata *Unit, Metadata *TemplateParams = nullptr,
-       Metadata *Declaration = nullptr, Metadata *Variables = nullptr,
+       Metadata *Declaration = nullptr, Metadata *RetainedNodes = nullptr,
        Metadata *ThrownTypes = nullptr),
       (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
        ScopeLine, ContainingType, Virtuality, VirtualIndex, ThisAdjustment,
-       Flags, IsOptimized, Unit, TemplateParams, Declaration, Variables,
+       Flags, IsOptimized, Unit, TemplateParams, Declaration, RetainedNodes,
        ThrownTypes))
 
   TempDISubprogram clone() const { return cloneImpl(); }
 
+  /// Returns a new temporary DISubprogram with updated Flags
+  TempDISubprogram cloneWithFlags(DIFlags NewFlags) const {
+    auto NewSP = clone();
+    NewSP->Flags = NewFlags;
+    return NewSP;
+  }
+
 public:
   unsigned getLine() const { return Line; }
   unsigned getVirtuality() const { return Virtuality; }
@@ -1623,6 +1736,11 @@ public:
   /// Return true if this subprogram is C++11 noreturn or C11 _Noreturn
   bool isNoReturn() const { return getFlags() & FlagNoReturn; }
 
+  // Check if this routine is a compiler-generated thunk.
+  //
+  // Returns true if this subprogram is a thunk generated by the compiler.
+  bool isThunk() const { return getFlags() & FlagThunk; }
+
   DIScopeRef getScope() const { return DIScopeRef(getRawScope()); }
 
   StringRef getName() const { return getStringOperand(2); }
@@ -1645,8 +1763,8 @@ public:
   DISubprogram *getDeclaration() const {
     return cast_or_null<DISubprogram>(getRawDeclaration());
   }
-  DILocalVariableArray getVariables() const {
-    return cast_or_null<MDTuple>(getRawVariables());
+  DINodeArray getRetainedNodes() const {
+    return cast_or_null<MDTuple>(getRawRetainedNodes());
   }
   DITypeArray getThrownTypes() const {
     return cast_or_null<MDTuple>(getRawThrownTypes());
@@ -1658,7 +1776,7 @@ public:
   Metadata *getRawType() const { return getOperand(4); }
   Metadata *getRawUnit() const { return getOperand(5); }
   Metadata *getRawDeclaration() const { return getOperand(6); }
-  Metadata *getRawVariables() const { return getOperand(7); }
+  Metadata *getRawRetainedNodes() const { return getOperand(7); }
   Metadata *getRawContainingType() const {
     return getNumOperands() > 8 ? getOperandAs<Metadata>(8) : nullptr;
   }
@@ -2094,6 +2212,14 @@ public:
   /// Determines the size of the variable's type.
   Optional<uint64_t> getSizeInBits() const;
 
+  /// Return the signedness of this variable's type, or None if this type is
+  /// neither signed nor unsigned.
+  Optional<DIBasicType::Signedness> getSignedness() const {
+    if (auto *BT = dyn_cast<DIBasicType>(getType().resolve()))
+      return BT->getSignedness();
+    return None;
+  }
+
   StringRef getFilename() const {
     if (auto *F = getFile())
       return F->getFilename();
@@ -2106,6 +2232,12 @@ public:
     return "";
   }
 
+  Optional<StringRef> getSource() const {
+    if (auto *F = getFile())
+      return F->getSource();
+    return None;
+  }
+
   Metadata *getRawScope() const { return getOperand(0); }
   MDString *getRawName() const { return getOperandAs<MDString>(1); }
   Metadata *getRawFile() const { return getOperand(2); }
@@ -2194,6 +2326,11 @@ public:
     ///
     /// Return the number of elements in the operand (1 + args).
     unsigned getSize() const;
+
+    /// Append the elements of this operand to \p V.
+    void appendToVector(SmallVectorImpl<uint64_t> &V) const {
+      V.append(get(), get() + getSize());
+    }
   };
 
   /// An iterator for expression operands.
@@ -2297,10 +2434,29 @@ public:
 
   /// Prepend \p DIExpr with a deref and offset operation and optionally turn it
   /// into a stack value.
-  static DIExpression *prepend(const DIExpression *DIExpr, bool DerefBefore,
+  static DIExpression *prepend(const DIExpression *Expr, bool DerefBefore,
                                int64_t Offset = 0, bool DerefAfter = false,
                                bool StackValue = false);
 
+  /// Prepend \p DIExpr with the given opcodes and optionally turn it into a
+  /// stack value.
+  static DIExpression *prependOpcodes(const DIExpression *Expr,
+                                      SmallVectorImpl<uint64_t> &Ops,
+                                      bool StackValue = false);
+
+  /// Append the opcodes \p Ops to \p DIExpr. Unlike \ref appendToStack, the
+  /// returned expression is a stack value only if \p DIExpr is a stack value.
+  /// If \p DIExpr describes a fragment, the returned expression will describe
+  /// the same fragment.
+  static DIExpression *append(const DIExpression *Expr, ArrayRef<uint64_t> Ops);
+
+  /// Convert \p DIExpr into a stack value if it isn't one already by appending
+  /// DW_OP_deref if needed, and appending \p Ops to the resulting expression.
+  /// If \p DIExpr describes a fragment, the returned expression will describe
+  /// the same fragment.
+  static DIExpression *appendToStack(const DIExpression *Expr,
+                                     ArrayRef<uint64_t> Ops);
+
   /// Create a DIExpression to describe one part of an aggregate variable that
   /// is fragmented across multiple Values. The DW_OP_LLVM_fragment operation
   /// will be appended to the elements of \c Expr. If \c Expr already contains
@@ -2314,6 +2470,32 @@ public:
   static Optional<DIExpression *>
   createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits,
                            unsigned SizeInBits);
+
+  /// Determine the relative position of the fragments described by this
+  /// DIExpression and \p Other.
+  /// Returns -1 if this is entirely before Other, 0 if this and Other overlap,
+  /// 1 if this is entirely after Other.
+  int fragmentCmp(const DIExpression *Other) const {
+    auto Fragment1 = *getFragmentInfo();
+    auto Fragment2 = *Other->getFragmentInfo();
+    unsigned l1 = Fragment1.OffsetInBits;
+    unsigned l2 = Fragment2.OffsetInBits;
+    unsigned r1 = l1 + Fragment1.SizeInBits;
+    unsigned r2 = l2 + Fragment2.SizeInBits;
+    if (r1 <= l2)
+      return -1;
+    else if (r2 <= l1)
+      return 1;
+    else
+      return 0;
+  }
+
+  /// Check if fragments overlap between this DIExpression and \p Other.
+  bool fragmentsOverlap(const DIExpression *Other) const {
+    if (!isFragment() || !Other->isFragment())
+      return true;
+    return fragmentCmp(Other) == 0;
+  }
 };
 
 /// Global variables.
@@ -2476,6 +2658,76 @@ public:
   }
 };
 
+/// Label.
+///
+class DILabel : public DINode {
+  friend class LLVMContextImpl;
+  friend class MDNode;
+
+  unsigned Line;
+
+  DILabel(LLVMContext &C, StorageType Storage, unsigned Line,
+          ArrayRef<Metadata *> Ops)
+      : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {}
+  ~DILabel() = default;
+
+  static DILabel *getImpl(LLVMContext &Context, DIScope *Scope,
+                          StringRef Name, DIFile *File, unsigned Line,
+                          StorageType Storage,
+                          bool ShouldCreate = true) {
+    return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
+                   Line, Storage, ShouldCreate);
+  }
+  static DILabel *getImpl(LLVMContext &Context, Metadata *Scope,
+                          MDString *Name, Metadata *File, unsigned Line,
+                          StorageType Storage,
+                          bool ShouldCreate = true);
+
+  TempDILabel cloneImpl() const {
+    return getTemporary(getContext(), getScope(), getName(), getFile(),
+                        getLine());
+  }
+
+public:
+  DEFINE_MDNODE_GET(DILabel,
+                    (DILocalScope * Scope, StringRef Name, DIFile *File,
+                     unsigned Line),
+                    (Scope, Name, File, Line))
+  DEFINE_MDNODE_GET(DILabel,
+                    (Metadata * Scope, MDString *Name, Metadata *File,
+                     unsigned Line),
+                    (Scope, Name, File, Line))
+
+  TempDILabel clone() const { return cloneImpl(); }
+
+  /// Get the local scope for this label.
+  ///
+  /// Labels must be defined in a local scope.
+  DILocalScope *getScope() const {
+    return cast_or_null<DILocalScope>(getRawScope());
+  }
+  unsigned getLine() const { return Line; }
+  StringRef getName() const { return getStringOperand(1); }
+  DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
+
+  Metadata *getRawScope() const { return getOperand(0); }
+  MDString *getRawName() const { return getOperandAs<MDString>(1); }
+  Metadata *getRawFile() const { return getOperand(2); }
+
+  /// Check that a location is valid for this label.
+  ///
+  /// Check that \c DL exists, is in the same subprogram, and has the same
+  /// inlined-at location as \c this.  (Otherwise, it's not a valid attachment
+  /// to a \a DbgInfoIntrinsic.)
+  bool isValidLocationForIntrinsic(const DILocation *DL) const {
+    return DL && getScope()->getSubprogram() == DL->getScope()->getSubprogram();
+  }
+
+  static bool classof(const Metadata *MD) {
+    return MD->getMetadataID() == DILabelKind;
+  }
+};
+
 class DIObjCProperty : public DINode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -2547,6 +2799,12 @@ public:
     return "";
   }
 
+  Optional<StringRef> getSource() const {
+    if (auto *F = getFile())
+      return F->getSource();
+    return None;
+  }
+
   MDString *getRawName() const { return getOperandAs<MDString>(0); }
   Metadata *getRawFile() const { return getOperand(1); }
   MDString *getRawGetterName() const { return getOperandAs<MDString>(2); }
diff --git a/include/llvm/IR/DebugLoc.h b/include/llvm/IR/DebugLoc.h
index eef1212abc4b..9f619ffc5c4d 100644
--- a/include/llvm/IR/DebugLoc.h
+++ b/include/llvm/IR/DebugLoc.h
@@ -24,7 +24,7 @@ namespace llvm {
   class raw_ostream;
   class DILocation;
 
-  /// \brief A debug info location.
+  /// A debug info location.
   ///
   /// This class is a wrapper around a tracking reference to an \a DILocation
   /// pointer.
@@ -37,10 +37,10 @@ namespace llvm {
   public:
     DebugLoc() = default;
 
-    /// \brief Construct from an \a DILocation.
+    /// Construct from an \a DILocation.
     DebugLoc(const DILocation *L);
 
-    /// \brief Construct from an \a MDNode.
+    /// Construct from an \a MDNode.
     ///
     /// Note: if \c N is not an \a DILocation, a verifier check will fail, and
     /// accessors will crash.  However, construction from other nodes is
@@ -48,7 +48,7 @@ namespace llvm {
     /// IR.
     explicit DebugLoc(const MDNode *N);
 
-    /// \brief Get the underlying \a DILocation.
+    /// Get the underlying \a DILocation.
     ///
     /// \pre !*this or \c isa<DILocation>(getAsMDNode()).
     /// @{
@@ -58,7 +58,7 @@ namespace llvm {
     DILocation &operator*() const { return *get(); }
     /// @}
 
-    /// \brief Check for null.
+    /// Check for null.
     ///
     /// Check for null in a way that is safe with broken debug info.  Unlike
     /// the conversion to \c DILocation, this doesn't require that \c Loc is of
@@ -66,10 +66,10 @@ namespace llvm {
     /// \a Instruction::hasMetadata().
     explicit operator bool() const { return Loc; }
 
-    /// \brief Check whether this has a trivial destructor.
+    /// Check whether this has a trivial destructor.
     bool hasTrivialDestructor() const { return Loc.hasTrivialDestructor(); }
 
-    /// \brief Create a new DebugLoc.
+    /// Create a new DebugLoc.
     ///
     /// Create a new DebugLoc at the specified line/col and scope/inline.  This
     /// forwards to \a DILocation::get().
@@ -95,12 +95,12 @@ namespace llvm {
     MDNode *getScope() const;
     DILocation *getInlinedAt() const;
 
-    /// \brief Get the fully inlined-at scope for a DebugLoc.
+    /// Get the fully inlined-at scope for a DebugLoc.
     ///
     /// Gets the inlined-at scope for a DebugLoc.
     MDNode *getInlinedAtScope() const;
 
-    /// \brief Find the debug info location for the start of the function.
+    /// Find the debug info location for the start of the function.
     ///
     /// Walk up the scope chain of given debug loc and find line number info
     /// for the function.
@@ -109,7 +109,7 @@ namespace llvm {
     /// find the subprogram, and then DILocation::get().
     DebugLoc getFnDebugLoc() const;
 
-    /// \brief Return \c this as a bar \a MDNode.
+    /// Return \c this as a bar \a MDNode.
     MDNode *getAsMDNode() const { return Loc; }
 
     bool operator==(const DebugLoc &DL) const { return Loc == DL.Loc; }
@@ -117,7 +117,7 @@ namespace llvm {
 
     void dump() const;
 
-    /// \brief prints source location /path/to/file.exe:line:col @[inlined at]
+    /// prints source location /path/to/file.exe:line:col @[inlined at]
     void print(raw_ostream &OS) const;
   };
 
diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h
index 6e5e085873ab..9526d6287d2f 100644
--- a/include/llvm/IR/DerivedTypes.h
+++ b/include/llvm/IR/DerivedTypes.h
@@ -36,7 +36,7 @@ class LLVMContext;
 /// Class to represent integer types. Note that this class is also used to
 /// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and
 /// Int64Ty.
-/// @brief Integer representation type
+/// Integer representation type
 class IntegerType : public Type {
   friend class LLVMContextImpl;
 
@@ -59,10 +59,10 @@ public:
   /// If an IntegerType with the same NumBits value was previously instantiated,
   /// that instance will be returned. Otherwise a new one will be created. Only
   /// one instance with a given NumBits value is ever created.
-  /// @brief Get or create an IntegerType instance.
+  /// Get or create an IntegerType instance.
   static IntegerType *get(LLVMContext &C, unsigned NumBits);
 
-  /// @brief Get the number of bits in this IntegerType
+  /// Get the number of bits in this IntegerType
   unsigned getBitWidth() const { return getSubclassData(); }
 
   /// Return a bitmask with ones set for all of the bits that can be set by an
@@ -79,13 +79,13 @@ public:
 
   /// For example, this is 0xFF for an 8 bit integer, 0xFFFF for i16, etc.
   /// @returns a bit mask with ones set for all the bits of this type.
-  /// @brief Get a bit mask for this type.
+  /// Get a bit mask for this type.
   APInt getMask() const;
 
   /// This method determines if the width of this IntegerType is a power-of-2
   /// in terms of 8 bit bytes.
   /// @returns true if this is a power-of-2 byte width.
-  /// @brief Is this a power-of-2 byte-width IntegerType ?
+  /// Is this a power-of-2 byte-width IntegerType ?
   bool isPowerOf2ByteWidth() const;
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast.
@@ -193,7 +193,7 @@ public:
 /// StructType::create() forms.
 ///
 /// Independent of what kind of struct you have, the body of a struct type are
-/// laid out in memory consequtively with the elements directly one after the
+/// laid out in memory consecutively with the elements directly one after the
 /// other (if the struct is packed) or (if not packed) with padding between the
 /// elements as defined by DataLayout (which is required to match what the code
 /// generator for a target expects).
diff --git a/include/llvm/IR/DiagnosticHandler.h b/include/llvm/IR/DiagnosticHandler.h
index 9256d4850df1..51873bea3d41 100644
--- a/include/llvm/IR/DiagnosticHandler.h
+++ b/include/llvm/IR/DiagnosticHandler.h
@@ -18,7 +18,7 @@
 namespace llvm {
 class DiagnosticInfo;
 
-/// \brief This is the base class for diagnostic handling in LLVM.
+/// This is the base class for diagnostic handling in LLVM.
 /// The handleDiagnostics method must be overriden by the subclasses to handle
 /// diagnostic. The *RemarkEnabled methods can be overriden to control
 /// which remarks are enabled.
diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h
index 020b67d6b711..81d4ae84bf01 100644
--- a/include/llvm/IR/DiagnosticInfo.h
+++ b/include/llvm/IR/DiagnosticInfo.h
@@ -39,7 +39,7 @@ class LLVMContext;
 class Module;
 class SMDiagnostic;
 
-/// \brief Defines the different supported severity of a diagnostic.
+/// Defines the different supported severity of a diagnostic.
 enum DiagnosticSeverity : char {
   DS_Error,
   DS_Warning,
@@ -49,7 +49,7 @@ enum DiagnosticSeverity : char {
   DS_Note
 };
 
-/// \brief Defines the different supported kind of a diagnostic.
+/// Defines the different supported kind of a diagnostic.
 /// This enum should be extended with a new ID for each added concrete subclass.
 enum DiagnosticKind {
   DK_InlineAsm,
@@ -79,7 +79,7 @@ enum DiagnosticKind {
   DK_FirstPluginKind
 };
 
-/// \brief Get the next available kind ID for a plugin diagnostic.
+/// Get the next available kind ID for a plugin diagnostic.
 /// Each time this function is called, it returns a different number.
 /// Therefore, a plugin that wants to "identify" its own classes
 /// with a dynamic identifier, just have to use this method to get a new ID
@@ -89,7 +89,7 @@ enum DiagnosticKind {
 /// DiagnosticKind values.
 int getNextAvailablePluginDiagnosticKind();
 
-/// \brief This is the base abstract class for diagnostic reporting in
+/// This is the base abstract class for diagnostic reporting in
 /// the backend.
 /// The print method must be overloaded by the subclasses to print a
 /// user-friendly message in the client of the backend (let us call it a
@@ -389,20 +389,20 @@ private:
   DiagnosticLocation Loc;
 };
 
-/// \brief Common features for diagnostics dealing with optimization remarks
+/// Common features for diagnostics dealing with optimization remarks
 /// that are used by both IR and MIR passes.
 class DiagnosticInfoOptimizationBase : public DiagnosticInfoWithLocationBase {
 public:
-  /// \brief Used to set IsVerbose via the stream interface.
+  /// Used to set IsVerbose via the stream interface.
   struct setIsVerbose {};
 
-  /// \brief When an instance of this is inserted into the stream, the arguments
+  /// When an instance of this is inserted into the stream, the arguments
   /// following will not appear in the remark printed in the compiler output
   /// (-Rpass) but only in the optimization record file
   /// (-fsave-optimization-record).
   struct setExtraArgs {};
 
-  /// \brief Used in the streaming interface as the general argument type.  It
+  /// Used in the streaming interface as the general argument type.  It
   /// internally converts everything into a key-value pair.
   struct Argument {
     std::string Key;
@@ -415,6 +415,7 @@ public:
     Argument(StringRef Key, const Type *T);
     Argument(StringRef Key, StringRef S);
     Argument(StringRef Key, int N);
+    Argument(StringRef Key, float N);
     Argument(StringRef Key, long N);
     Argument(StringRef Key, long long N);
     Argument(StringRef Key, unsigned N);
@@ -503,7 +504,7 @@ protected:
   /// The remark is expected to be noisy.
   bool IsVerbose = false;
 
-  /// \brief If positive, the index of the first argument that only appear in
+  /// If positive, the index of the first argument that only appear in
   /// the optimization records and not in the remark printed in the compiler
   /// output.
   int FirstExtraArgIndex = -1;
@@ -586,7 +587,7 @@ operator<<(RemarkT &R,
   return R;
 }
 
-/// \brief Common features for diagnostics dealing with optimization remarks
+/// Common features for diagnostics dealing with optimization remarks
 /// that are used by IR passes.
 class DiagnosticInfoIROptimization : public DiagnosticInfoOptimizationBase {
 public:
@@ -608,7 +609,7 @@ public:
                                        Loc),
         CodeRegion(CodeRegion) {}
 
-  /// \brief This is ctor variant allows a pass to build an optimization remark
+  /// This is ctor variant allows a pass to build an optimization remark
   /// from an existing remark.
   ///
   /// This is useful when a transformation pass (e.g LV) wants to emit a remark
@@ -711,7 +712,7 @@ public:
                            const DiagnosticLocation &Loc,
                            const Value *CodeRegion);
 
-  /// \brief Same as above but \p Inst is used to derive code region and debug
+  /// Same as above but \p Inst is used to derive code region and debug
   /// location.
   OptimizationRemarkMissed(const char *PassName, StringRef RemarkName,
                            const Instruction *Inst);
@@ -752,7 +753,7 @@ public:
                              const DiagnosticLocation &Loc,
                              const Value *CodeRegion);
 
-  /// \brief This is ctor variant allows a pass to build an optimization remark
+  /// This is ctor variant allows a pass to build an optimization remark
   /// from an existing remark.
   ///
   /// This is useful when a transformation pass (e.g LV) wants to emit a remark
@@ -763,7 +764,7 @@ public:
                              const OptimizationRemarkAnalysis &Orig)
       : DiagnosticInfoIROptimization(PassName, Prepend, Orig) {}
 
-  /// \brief Same as above but \p Inst is used to derive code region and debug
+  /// Same as above but \p Inst is used to derive code region and debug
   /// location.
   OptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
                              const Instruction *Inst);
diff --git a/include/llvm/IR/DiagnosticPrinter.h b/include/llvm/IR/DiagnosticPrinter.h
index 59c83291affa..25c47cdd1a12 100644
--- a/include/llvm/IR/DiagnosticPrinter.h
+++ b/include/llvm/IR/DiagnosticPrinter.h
@@ -28,7 +28,7 @@ class StringRef;
 class Twine;
 class Value;
 
-/// \brief Interface for custom diagnostic printing.
+/// Interface for custom diagnostic printing.
 class DiagnosticPrinter {
 public:
   virtual ~DiagnosticPrinter() = default;
@@ -58,7 +58,7 @@ public:
   virtual DiagnosticPrinter &operator<<(const SMDiagnostic &Diag) = 0;
 };
 
-/// \brief Basic diagnostic printer that uses an underlying raw_ostream.
+/// Basic diagnostic printer that uses an underlying raw_ostream.
 class DiagnosticPrinterRawOStream : public DiagnosticPrinter {
 protected:
   raw_ostream &Stream;
diff --git a/include/llvm/IR/DomTreeUpdater.h b/include/llvm/IR/DomTreeUpdater.h
new file mode 100644
index 000000000000..81ba670ac0f5
--- /dev/null
+++ b/include/llvm/IR/DomTreeUpdater.h
@@ -0,0 +1,259 @@
+//===- DomTreeUpdater.h - DomTree/Post DomTree Updater ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DomTreeUpdater class, which provides a uniform way to
+// update dominator tree related data structures.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DOMTREEUPDATER_H
+#define LLVM_DOMTREEUPDATER_H
+
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/GenericDomTree.h"
+#include <functional>
+#include <vector>
+
+namespace llvm {
+class DomTreeUpdater {
+public:
+  enum class UpdateStrategy : unsigned char { Eager = 0, Lazy = 1 };
+
+  explicit DomTreeUpdater(UpdateStrategy Strategy_) : Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree &DT_, UpdateStrategy Strategy_)
+      : DT(&DT_), Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree *DT_, UpdateStrategy Strategy_)
+      : DT(DT_), Strategy(Strategy_) {}
+  DomTreeUpdater(PostDominatorTree &PDT_, UpdateStrategy Strategy_)
+      : PDT(&PDT_), Strategy(Strategy_) {}
+  DomTreeUpdater(PostDominatorTree *PDT_, UpdateStrategy Strategy_)
+      : PDT(PDT_), Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree &DT_, PostDominatorTree &PDT_,
+                 UpdateStrategy Strategy_)
+      : DT(&DT_), PDT(&PDT_), Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree *DT_, PostDominatorTree *PDT_,
+                 UpdateStrategy Strategy_)
+      : DT(DT_), PDT(PDT_), Strategy(Strategy_) {}
+
+  ~DomTreeUpdater() { flush(); }
+
+  /// Returns true if the current strategy is Lazy.
+  bool isLazy() const { return Strategy == UpdateStrategy::Lazy; };
+
+  /// Returns true if the current strategy is Eager.
+  bool isEager() const { return Strategy == UpdateStrategy::Eager; };
+
+  /// Returns true if it holds a DominatorTree.
+  bool hasDomTree() const { return DT != nullptr; }
+
+  /// Returns true if it holds a PostDominatorTree.
+  bool hasPostDomTree() const { return PDT != nullptr; }
+
+  /// Returns true if there is BasicBlock awaiting deletion.
+  /// The deletion will only happen until a flush event and
+  /// all available trees are up-to-date.
+  /// Returns false under Eager UpdateStrategy.
+  bool hasPendingDeletedBB() const { return !DeletedBBs.empty(); }
+
+  /// Returns true if DelBB is awaiting deletion.
+  /// Returns false under Eager UpdateStrategy.
+  bool isBBPendingDeletion(BasicBlock *DelBB) const;
+
+  /// Returns true if either of DT or PDT is valid and the tree has at
+  /// least one update pending. If DT or PDT is nullptr it is treated
+  /// as having no pending updates. This function does not check
+  /// whether there is BasicBlock awaiting deletion.
+  /// Returns false under Eager UpdateStrategy.
+  bool hasPendingUpdates() const;
+
+  /// Returns true if there are DominatorTree updates queued.
+  /// Returns false under Eager UpdateStrategy or DT is nullptr.
+  bool hasPendingDomTreeUpdates() const;
+
+  /// Returns true if there are PostDominatorTree updates queued.
+  /// Returns false under Eager UpdateStrategy or PDT is nullptr.
+  bool hasPendingPostDomTreeUpdates() const;
+
+  /// Apply updates on all available trees. Under Eager UpdateStrategy with
+  /// ForceRemoveDuplicates enabled or under Lazy UpdateStrategy, it will
+  /// discard duplicated updates and self-dominance updates. If both DT and PDT
+  /// are nullptrs, this function discards all updates. The Eager Strategy
+  /// applies the updates immediately while the Lazy Strategy queues the
+  /// updates. It is required for the state of the LLVM IR to be updated
+  /// *before* applying the Updates because the internal update routine will
+  /// analyze the current state of the relationship between a pair of (From, To)
+  /// BasicBlocks to determine whether a single update needs to be discarded.
+  void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates,
+                    bool ForceRemoveDuplicates = false);
+
+  /// Notify all available trees on an edge insertion. If both DT and PDT are
+  /// nullptrs, this function discards the update. Under either Strategy,
+  /// self-dominance update will be removed. The Eager Strategy applies
+  /// the update immediately while the Lazy Strategy queues the update.
+  /// It is recommended to only use this method when you have exactly one
+  /// insertion (and no deletions). It is recommended to use applyUpdates() in
+  /// all other cases. This function has to be called *after* making the update
+  /// on the actual CFG. An internal functions checks if the edge exists in the
+  /// CFG in DEBUG mode.
+  void insertEdge(BasicBlock *From, BasicBlock *To);
+
+  /// Notify all available trees on an edge insertion.
+  /// Under either Strategy, the following updates will be discard silently
+  /// 1. Invalid - Inserting an edge that does not exist in the CFG.
+  /// 2. Self-dominance update.
+  /// 3. Both DT and PDT are nullptrs.
+  /// The Eager Strategy applies the update immediately while the Lazy Strategy
+  /// queues the update. It is recommended to only use this method when you have
+  /// exactly one insertion (and no deletions) and want to discard an invalid
+  /// update.
+  void insertEdgeRelaxed(BasicBlock *From, BasicBlock *To);
+
+  /// Notify all available trees on an edge deletion. If both DT and PDT are
+  /// nullptrs, this function discards the update. Under either Strategy,
+  /// self-dominance update will be removed. The Eager Strategy applies
+  /// the update immediately while the Lazy Strategy queues the update.
+  /// It is recommended to only use this method when you have exactly one
+  /// deletion (and no insertions). It is recommended to use applyUpdates() in
+  /// all other cases. This function has to be called *after* making the update
+  /// on the actual CFG. An internal functions checks if the edge doesn't exist
+  /// in the CFG in DEBUG mode.
+  void deleteEdge(BasicBlock *From, BasicBlock *To);
+
+  /// Notify all available trees on an edge deletion.
+  /// Under either Strategy, the following updates will be discard silently
+  /// 1. Invalid - Deleting an edge that still exists in the CFG.
+  /// 2. Self-dominance update.
+  /// 3. Both DT and PDT are nullptrs.
+  /// The Eager Strategy applies the update immediately while the Lazy Strategy
+  /// queues the update. It is recommended to only use this method when you have
+  /// exactly one deletion (and no insertions) and want to discard an invalid
+  /// update.
+  void deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To);
+
+  /// Delete DelBB. DelBB will be removed from its Parent and
+  /// erased from available trees if it exists and finally get deleted.
+  /// Under Eager UpdateStrategy, DelBB will be processed immediately.
+  /// Under Lazy UpdateStrategy, DelBB will be queued until a flush event and
+  /// all available trees are up-to-date. Assert if any instruction of DelBB is
+  /// modified while awaiting deletion. When both DT and PDT are nullptrs, DelBB
+  /// will be queued until flush() is called.
+  void deleteBB(BasicBlock *DelBB);
+
+  /// Delete DelBB. DelBB will be removed from its Parent and
+  /// erased from available trees if it exists. Then the callback will
+  /// be called. Finally, DelBB will be deleted.
+  /// Under Eager UpdateStrategy, DelBB will be processed immediately.
+  /// Under Lazy UpdateStrategy, DelBB will be queued until a flush event and
+  /// all available trees are up-to-date. Assert if any instruction of DelBB is
+  /// modified while awaiting deletion. Multiple callbacks can be queued for one
+  /// DelBB under Lazy UpdateStrategy.
+  void callbackDeleteBB(BasicBlock *DelBB,
+                        std::function<void(BasicBlock *)> Callback);
+
+  /// Recalculate all available trees.
+  /// Under Lazy Strategy, available trees will only be recalculated if there
+  /// are pending updates or there is BasicBlock awaiting deletion. Returns true
+  /// if at least one tree is recalculated.
+  bool recalculate(Function &F);
+
+  /// Flush DomTree updates and return DomTree.
+  /// It also flush out of date updates applied by all available trees
+  /// and flush Deleted BBs if both trees are up-to-date.
+  /// It must only be called when it has a DomTree.
+  DominatorTree &getDomTree();
+
+  /// Flush PostDomTree updates and return PostDomTree.
+  /// It also flush out of date updates applied by all available trees
+  /// and flush Deleted BBs if both trees are up-to-date.
+  /// It must only be called when it has a PostDomTree.
+  PostDominatorTree &getPostDomTree();
+
+  /// Apply all pending updates to available trees and flush all BasicBlocks
+  /// awaiting deletion.
+  /// Does nothing under Eager UpdateStrategy.
+  void flush();
+
+  /// Debug method to help view the internal state of this class.
+  LLVM_DUMP_METHOD void dump() const;
+
+private:
+  class CallBackOnDeletion final : public CallbackVH {
+  public:
+    CallBackOnDeletion(BasicBlock *V,
+                       std::function<void(BasicBlock *)> Callback)
+        : CallbackVH(V), DelBB(V), Callback_(Callback) {}
+
+  private:
+    BasicBlock *DelBB = nullptr;
+    std::function<void(BasicBlock *)> Callback_;
+
+    void deleted() override {
+      Callback_(DelBB);
+      CallbackVH::deleted();
+    }
+  };
+
+  SmallVector<DominatorTree::UpdateType, 16> PendUpdates;
+  size_t PendDTUpdateIndex = 0;
+  size_t PendPDTUpdateIndex = 0;
+  DominatorTree *DT = nullptr;
+  PostDominatorTree *PDT = nullptr;
+  const UpdateStrategy Strategy;
+  SmallPtrSet<BasicBlock *, 8> DeletedBBs;
+  std::vector<CallBackOnDeletion> Callbacks;
+  bool IsRecalculatingDomTree = false;
+  bool IsRecalculatingPostDomTree = false;
+
+  /// First remove all the instructions of DelBB and then make sure DelBB has a
+  /// valid terminator instruction which is necessary to have when DelBB still
+  /// has to be inside of its parent Function while awaiting deletion under Lazy
+  /// UpdateStrategy to prevent other routines from asserting the state of the
+  /// IR is inconsistent. Assert if DelBB is nullptr or has predecessors.
+  void validateDeleteBB(BasicBlock *DelBB);
+
+  /// Returns true if at least one BasicBlock is deleted.
+  bool forceFlushDeletedBB();
+
+  /// Deduplicate and remove unnecessary updates (no-ops) when using Lazy
+  /// UpdateStrategy. Returns true if the update is queued for update.
+  bool applyLazyUpdate(DominatorTree::UpdateKind Kind, BasicBlock *From,
+                       BasicBlock *To);
+
+  /// Helper function to apply all pending DomTree updates.
+  void applyDomTreeUpdates();
+
+  /// Helper function to apply all pending PostDomTree updates.
+  void applyPostDomTreeUpdates();
+
+  /// Helper function to flush deleted BasicBlocks if all available
+  /// trees are up-to-date.
+  void tryFlushDeletedBB();
+
+  /// Drop all updates applied by all available trees and delete BasicBlocks if
+  /// all available trees are up-to-date.
+  void dropOutOfDateUpdates();
+
+  /// Erase Basic Block node that has been unlinked from Function
+  /// in the DomTree and PostDomTree.
+  void eraseDelBBNode(BasicBlock *DelBB);
+
+  /// Returns true if the update appears in the LLVM IR.
+  /// It is used to check whether an update is valid in
+  /// insertEdge/deleteEdge or is unnecessary in the batch update.
+  bool isUpdateValid(DominatorTree::UpdateType Update) const;
+
+  /// Returns true if the update is self dominance.
+  bool isSelfDominance(DominatorTree::UpdateType Update) const;
+};
+} // namespace llvm
+
+#endif // LLVM_DOMTREEUPDATER_H
diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h
index 6ad99e516fba..f9e992b0ef0c 100644
--- a/include/llvm/IR/Dominators.h
+++ b/include/llvm/IR/Dominators.h
@@ -63,8 +63,10 @@ extern template void DeleteEdge<BBPostDomTree>(BBPostDomTree &DT,
 extern template void ApplyUpdates<BBDomTree>(BBDomTree &DT, BBUpdates);
 extern template void ApplyUpdates<BBPostDomTree>(BBPostDomTree &DT, BBUpdates);
 
-extern template bool Verify<BBDomTree>(const BBDomTree &DT);
-extern template bool Verify<BBPostDomTree>(const BBPostDomTree &DT);
+extern template bool Verify<BBDomTree>(const BBDomTree &DT,
+                                       BBDomTree::VerificationLevel VL);
+extern template bool Verify<BBPostDomTree>(const BBPostDomTree &DT,
+                                           BBPostDomTree::VerificationLevel VL);
 }  // namespace DomTreeBuilder
 
 using DomTreeNode = DomTreeNodeBase<BasicBlock>;
@@ -119,7 +121,7 @@ template <> struct DenseMapInfo<BasicBlockEdge> {
   }
 };
 
-/// \brief Concrete subclass of DominatorTreeBase that is used to compute a
+/// Concrete subclass of DominatorTreeBase that is used to compute a
 /// normal dominator tree.
 ///
 /// Definition: A block is said to be forward statically reachable if there is
@@ -148,19 +150,10 @@ class DominatorTree : public DominatorTreeBase<BasicBlock, false> {
   bool invalidate(Function &F, const PreservedAnalyses &PA,
                   FunctionAnalysisManager::Invalidator &);
 
-  /// \brief Returns *false* if the other dominator tree matches this dominator
-  /// tree.
-  inline bool compare(const DominatorTree &Other) const {
-    const DomTreeNode *R = getRootNode();
-    const DomTreeNode *OtherR = Other.getRootNode();
-    return !R || !OtherR || R->getBlock() != OtherR->getBlock() ||
-           Base::compare(Other);
-  }
-
   // Ensure base-class overloads are visible.
   using Base::dominates;
 
-  /// \brief Return true if Def dominates a use in User.
+  /// Return true if Def dominates a use in User.
   ///
   /// This performs the special checks necessary if Def and User are in the same
   /// basic block. Note that Def doesn't dominate a use in Def itself!
@@ -178,15 +171,9 @@ class DominatorTree : public DominatorTreeBase<BasicBlock, false> {
   // Ensure base class overloads are visible.
   using Base::isReachableFromEntry;
 
-  /// \brief Provide an overload for a Use.
+  /// Provide an overload for a Use.
   bool isReachableFromEntry(const Use &U) const;
 
-  /// \brief Verify the correctness of the domtree by re-computing it.
-  ///
-  /// This should only be used for debugging as it aborts the program if the
-  /// verification fails.
-  void verifyDomTree() const;
-
   // Pop up a GraphViz/gv window with the Dominator Tree rendered using `dot`.
   void viewGraph(const Twine &Name, const Twine &Title);
   void viewGraph();
@@ -234,20 +221,20 @@ template <> struct GraphTraits<DominatorTree*>
   }
 };
 
-/// \brief Analysis pass which computes a \c DominatorTree.
+/// Analysis pass which computes a \c DominatorTree.
 class DominatorTreeAnalysis : public AnalysisInfoMixin<DominatorTreeAnalysis> {
   friend AnalysisInfoMixin<DominatorTreeAnalysis>;
   static AnalysisKey Key;
 
 public:
-  /// \brief Provide the result typedef for this analysis pass.
+  /// Provide the result typedef for this analysis pass.
   using Result = DominatorTree;
 
-  /// \brief Run the analysis pass over a function and produce a dominator tree.
+  /// Run the analysis pass over a function and produce a dominator tree.
   DominatorTree run(Function &F, FunctionAnalysisManager &);
 };
 
-/// \brief Printer pass for the \c DominatorTree.
+/// Printer pass for the \c DominatorTree.
 class DominatorTreePrinterPass
     : public PassInfoMixin<DominatorTreePrinterPass> {
   raw_ostream &OS;
@@ -258,12 +245,12 @@ public:
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
-/// \brief Verifier pass for the \c DominatorTree.
+/// Verifier pass for the \c DominatorTree.
 struct DominatorTreeVerifierPass : PassInfoMixin<DominatorTreeVerifierPass> {
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
-/// \brief Legacy analysis pass which computes a \c DominatorTree.
+/// Legacy analysis pass which computes a \c DominatorTree.
 class DominatorTreeWrapperPass : public FunctionPass {
   DominatorTree DT;
 
@@ -290,6 +277,93 @@ public:
   void print(raw_ostream &OS, const Module *M = nullptr) const override;
 };
 
+//===-------------------------------------
+/// Class to defer updates to a DominatorTree.
+///
+/// Definition: Applying updates to every edge insertion and deletion is
+/// expensive and not necessary. When one needs the DominatorTree for analysis
+/// they can request a flush() to perform a larger batch update. This has the
+/// advantage of the DominatorTree inspecting the set of updates to find
+/// duplicates or unnecessary subtree updates.
+///
+/// The scope of DeferredDominance operates at a Function level.
+///
+/// It is not necessary for the user to scrub the updates for duplicates or
+/// updates that point to the same block (Delete, BB_A, BB_A). Performance
+/// can be gained if the caller attempts to batch updates before submitting
+/// to applyUpdates(ArrayRef) in cases where duplicate edge requests will
+/// occur.
+///
+/// It is required for the state of the LLVM IR to be applied *before*
+/// submitting updates. The update routines must analyze the current state
+/// between a pair of (From, To) basic blocks to determine if the update
+/// needs to be queued.
+/// Example (good):
+///     TerminatorInstructionBB->removeFromParent();
+///     DDT->deleteEdge(BB, Successor);
+/// Example (bad):
+///     DDT->deleteEdge(BB, Successor);
+///     TerminatorInstructionBB->removeFromParent();
+class DeferredDominance {
+public:
+  DeferredDominance(DominatorTree &DT_) : DT(DT_) {}
+
+  /// Queues multiple updates and discards duplicates.
+  void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates);
+
+  /// Helper method for a single edge insertion. It's almost always
+  /// better to batch updates and call applyUpdates to quickly remove duplicate
+  /// edges. This is best used when there is only a single insertion needed to
+  /// update Dominators.
+  void insertEdge(BasicBlock *From, BasicBlock *To);
+
+  /// Helper method for a single edge deletion. It's almost always better
+  /// to batch updates and call applyUpdates to quickly remove duplicate edges.
+  /// This is best used when there is only a single deletion needed to update
+  /// Dominators.
+  void deleteEdge(BasicBlock *From, BasicBlock *To);
+
+  /// Delays the deletion of a basic block until a flush() event.
+  void deleteBB(BasicBlock *DelBB);
+
+  /// Returns true if DelBB is awaiting deletion at a flush() event.
+  bool pendingDeletedBB(BasicBlock *DelBB);
+
+  /// Returns true if pending DT updates are queued for a flush() event.
+  bool pending();
+
+  /// Flushes all pending updates and block deletions. Returns a
+  /// correct DominatorTree reference to be used by the caller for analysis.
+  DominatorTree &flush();
+
+  /// Drops all internal state and forces a (slow) recalculation of the
+  /// DominatorTree based on the current state of the LLVM IR in F. This should
+  /// only be used in corner cases such as the Entry block of F being deleted.
+  void recalculate(Function &F);
+
+  /// Debug method to help view the state of pending updates.
+  LLVM_DUMP_METHOD void dump() const;
+
+private:
+  DominatorTree &DT;
+  SmallVector<DominatorTree::UpdateType, 16> PendUpdates;
+  SmallPtrSet<BasicBlock *, 8> DeletedBBs;
+
+  /// Apply an update (Kind, From, To) to the internal queued updates. The
+  /// update is only added when determined to be necessary. Checks for
+  /// self-domination, unnecessary updates, duplicate requests, and balanced
+  /// pairs of requests are all performed. Returns true if the update is
+  /// queued and false if it is discarded.
+  bool applyUpdate(DominatorTree::UpdateKind Kind, BasicBlock *From,
+                   BasicBlock *To);
+
+  /// Performs all pending basic block deletions. We have to defer the deletion
+  /// of these blocks until after the DominatorTree updates are applied. The
+  /// internal workings of the DominatorTree code expect every update's From
+  /// and To blocks to exist and to be a member of the same Function.
+  bool flushDelBB();
+};
+
 } // end namespace llvm
 
 #endif // LLVM_IR_DOMINATORS_H
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index 79c56abe1c37..c8d6b0776fbf 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -141,6 +141,11 @@ public:
   // Provide fast operand accessors.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
+  /// Returns the number of non-debug IR instructions in this function.
+  /// This is equivalent to the sum of the sizes of each basic block contained
+  /// within this function.
+  unsigned getInstructionCount();
+
   /// Returns the FunctionType for me.
   FunctionType *getFunctionType() const {
     return cast<FunctionType>(getValueType());
@@ -181,7 +186,7 @@ public:
 
   static Intrinsic::ID lookupIntrinsicID(StringRef Name);
 
-  /// \brief Recalculate the ID for this function if it is an Intrinsic defined
+  /// Recalculate the ID for this function if it is an Intrinsic defined
   /// in llvm/Intrinsics.h.  Sets the intrinsic ID to Intrinsic::not_intrinsic
   /// if the name of this function does not match an intrinsic in that header.
   /// Note, this method does not need to be called directly, as it is called
@@ -201,52 +206,86 @@ public:
     setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4));
   }
 
-  /// @brief Return the attribute list for this Function.
+  /// Return the attribute list for this Function.
   AttributeList getAttributes() const { return AttributeSets; }
 
-  /// @brief Set the attribute list for this Function.
+  /// Set the attribute list for this Function.
   void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; }
 
-  /// @brief Add function attributes to this function.
+  /// Add function attributes to this function.
   void addFnAttr(Attribute::AttrKind Kind) {
     addAttribute(AttributeList::FunctionIndex, Kind);
   }
 
-  /// @brief Add function attributes to this function.
+  /// Add function attributes to this function.
   void addFnAttr(StringRef Kind, StringRef Val = StringRef()) {
     addAttribute(AttributeList::FunctionIndex,
                  Attribute::get(getContext(), Kind, Val));
   }
 
+  /// Add function attributes to this function.
   void addFnAttr(Attribute Attr) {
     addAttribute(AttributeList::FunctionIndex, Attr);
   }
 
-  /// @brief Remove function attributes from this function.
+  /// Remove function attributes from this function.
   void removeFnAttr(Attribute::AttrKind Kind) {
     removeAttribute(AttributeList::FunctionIndex, Kind);
   }
 
-  /// @brief Remove function attribute from this function.
+  /// Remove function attribute from this function.
   void removeFnAttr(StringRef Kind) {
     setAttributes(getAttributes().removeAttribute(
         getContext(), AttributeList::FunctionIndex, Kind));
   }
 
-  /// \brief Set the entry count for this function.
+  enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic };
+
+  /// Class to represent profile counts.
+  ///
+  /// This class represents both real and synthetic profile counts.
+  class ProfileCount {
+  private:
+    uint64_t Count;
+    ProfileCountType PCT;
+    static ProfileCount Invalid;
+
+  public:
+    ProfileCount() : Count(-1), PCT(PCT_Invalid) {}
+    ProfileCount(uint64_t Count, ProfileCountType PCT)
+        : Count(Count), PCT(PCT) {}
+    bool hasValue() const { return PCT != PCT_Invalid; }
+    uint64_t getCount() const { return Count; }
+    ProfileCountType getType() const { return PCT; }
+    bool isSynthetic() const { return PCT == PCT_Synthetic; }
+    explicit operator bool() { return hasValue(); }
+    bool operator!() const { return !hasValue(); }
+    // Update the count retaining the same profile count type.
+    ProfileCount &setCount(uint64_t C) {
+      Count = C;
+      return *this;
+    }
+    static ProfileCount getInvalid() { return ProfileCount(-1, PCT_Invalid); }
+  };
+
+  /// Set the entry count for this function.
   ///
   /// Entry count is the number of times this function was executed based on
-  /// pgo data. \p Imports points to a set of GUIDs that needs to be imported
-  /// by the function for sample PGO, to enable the same inlines as the
-  /// profiled optimized binary.
-  void setEntryCount(uint64_t Count,
+  /// pgo data. \p Imports points to a set of GUIDs that needs to
+  /// be imported by the function for sample PGO, to enable the same inlines as
+  /// the profiled optimized binary.
+  void setEntryCount(ProfileCount Count,
+                     const DenseSet<GlobalValue::GUID> *Imports = nullptr);
+
+  /// A convenience wrapper for setting entry count
+  void setEntryCount(uint64_t Count, ProfileCountType Type = PCT_Real,
                      const DenseSet<GlobalValue::GUID> *Imports = nullptr);
 
-  /// \brief Get the entry count for this function.
+  /// Get the entry count for this function.
   ///
   /// Entry count is the number of times the function was executed based on
   /// pgo data.
-  Optional<uint64_t> getEntryCount() const;
+  ProfileCount getEntryCount() const;
 
   /// Return true if the function is annotated with profile data.
   ///
@@ -264,23 +303,27 @@ public:
   /// Get the section prefix for this function.
   Optional<StringRef> getSectionPrefix() const;
 
-  /// @brief Return true if the function has the attribute.
+  /// Return true if the function has the attribute.
   bool hasFnAttribute(Attribute::AttrKind Kind) const {
     return AttributeSets.hasFnAttribute(Kind);
   }
+
+  /// Return true if the function has the attribute.
   bool hasFnAttribute(StringRef Kind) const {
     return AttributeSets.hasFnAttribute(Kind);
   }
 
-  /// @brief Return the attribute for the given attribute kind.
+  /// Return the attribute for the given attribute kind.
   Attribute getFnAttribute(Attribute::AttrKind Kind) const {
     return getAttribute(AttributeList::FunctionIndex, Kind);
   }
+
+  /// Return the attribute for the given attribute kind.
   Attribute getFnAttribute(StringRef Kind) const {
     return getAttribute(AttributeList::FunctionIndex, Kind);
   }
 
-  /// \brief Return the stack alignment for the function.
+  /// Return the stack alignment for the function.
   unsigned getFnStackAlignment() const {
     if (!hasFnAttribute(Attribute::StackAlignment))
       return 0;
@@ -296,108 +339,110 @@ public:
   void setGC(std::string Str);
   void clearGC();
 
-  /// @brief adds the attribute to the list of attributes.
+  /// adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attribute::AttrKind Kind);
 
-  /// @brief adds the attribute to the list of attributes.
+  /// adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attribute Attr);
 
-  /// @brief adds the attributes to the list of attributes.
+  /// adds the attributes to the list of attributes.
   void addAttributes(unsigned i, const AttrBuilder &Attrs);
 
-  /// @brief adds the attribute to the list of attributes for the given arg.
+  /// adds the attribute to the list of attributes for the given arg.
   void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
 
-  /// @brief adds the attribute to the list of attributes for the given arg.
+  /// adds the attribute to the list of attributes for the given arg.
   void addParamAttr(unsigned ArgNo, Attribute Attr);
 
-  /// @brief adds the attributes to the list of attributes for the given arg.
+  /// adds the attributes to the list of attributes for the given arg.
   void addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);
 
-  /// @brief removes the attribute from the list of attributes.
+  /// removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, Attribute::AttrKind Kind);
 
-  /// @brief removes the attribute from the list of attributes.
+  /// removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, StringRef Kind);
 
-  /// @brief removes the attributes from the list of attributes.
+  /// removes the attributes from the list of attributes.
   void removeAttributes(unsigned i, const AttrBuilder &Attrs);
 
-  /// @brief removes the attribute from the list of attributes.
+  /// removes the attribute from the list of attributes.
   void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
 
-  /// @brief removes the attribute from the list of attributes.
+  /// removes the attribute from the list of attributes.
   void removeParamAttr(unsigned ArgNo, StringRef Kind);
 
-  /// @brief removes the attribute from the list of attributes.
+  /// removes the attribute from the list of attributes.
   void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);
 
-  /// @brief check if an attributes is in the list of attributes.
+  /// check if an attributes is in the list of attributes.
   bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const {
     return getAttributes().hasAttribute(i, Kind);
   }
 
-  /// @brief check if an attributes is in the list of attributes.
+  /// check if an attributes is in the list of attributes.
   bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
     return getAttributes().hasParamAttribute(ArgNo, Kind);
   }
 
+  /// gets the attribute from the list of attributes.
   Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
     return AttributeSets.getAttribute(i, Kind);
   }
 
+  /// gets the attribute from the list of attributes.
   Attribute getAttribute(unsigned i, StringRef Kind) const {
     return AttributeSets.getAttribute(i, Kind);
   }
 
-  /// @brief adds the dereferenceable attribute to the list of attributes.
+  /// adds the dereferenceable attribute to the list of attributes.
   void addDereferenceableAttr(unsigned i, uint64_t Bytes);
 
-  /// @brief adds the dereferenceable attribute to the list of attributes for
+  /// adds the dereferenceable attribute to the list of attributes for
   /// the given arg.
   void addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes);
 
-  /// @brief adds the dereferenceable_or_null attribute to the list of
+  /// adds the dereferenceable_or_null attribute to the list of
   /// attributes.
   void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
 
-  /// @brief adds the dereferenceable_or_null attribute to the list of
+  /// adds the dereferenceable_or_null attribute to the list of
   /// attributes for the given arg.
   void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes);
 
-  /// @brief Extract the alignment for a call or parameter (0=unknown).
+  /// Extract the alignment for a call or parameter (0=unknown).
   unsigned getParamAlignment(unsigned ArgNo) const {
     return AttributeSets.getParamAlignment(ArgNo);
   }
 
-  /// @brief Extract the number of dereferenceable bytes for a call or
+  /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   /// @param i AttributeList index, referring to a return value or argument.
   uint64_t getDereferenceableBytes(unsigned i) const {
     return AttributeSets.getDereferenceableBytes(i);
   }
 
-  /// @brief Extract the number of dereferenceable bytes for a parameter.
+  /// Extract the number of dereferenceable bytes for a parameter.
   /// @param ArgNo Index of an argument, with 0 being the first function arg.
   uint64_t getParamDereferenceableBytes(unsigned ArgNo) const {
     return AttributeSets.getParamDereferenceableBytes(ArgNo);
   }
 
-  /// @brief Extract the number of dereferenceable_or_null bytes for a call or
+  /// Extract the number of dereferenceable_or_null bytes for a call or
   /// parameter (0=unknown).
   /// @param i AttributeList index, referring to a return value or argument.
   uint64_t getDereferenceableOrNullBytes(unsigned i) const {
     return AttributeSets.getDereferenceableOrNullBytes(i);
   }
 
-  /// @brief Extract the number of dereferenceable_or_null bytes for a
+  /// Extract the number of dereferenceable_or_null bytes for a
   /// parameter.
   /// @param ArgNo AttributeList ArgNo, referring to an argument.
   uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const {
     return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo);
   }
 
-  /// @brief Determine if the function does not access memory.
+  /// Determine if the function does not access memory.
   bool doesNotAccessMemory() const {
     return hasFnAttribute(Attribute::ReadNone);
   }
@@ -405,7 +450,7 @@ public:
     addFnAttr(Attribute::ReadNone);
   }
 
-  /// @brief Determine if the function does not access or only reads memory.
+  /// Determine if the function does not access or only reads memory.
   bool onlyReadsMemory() const {
     return doesNotAccessMemory() || hasFnAttribute(Attribute::ReadOnly);
   }
@@ -413,7 +458,7 @@ public:
     addFnAttr(Attribute::ReadOnly);
   }
 
-  /// @brief Determine if the function does not access or only writes memory.
+  /// Determine if the function does not access or only writes memory.
   bool doesNotReadMemory() const {
     return doesNotAccessMemory() || hasFnAttribute(Attribute::WriteOnly);
   }
@@ -421,14 +466,14 @@ public:
     addFnAttr(Attribute::WriteOnly);
   }
 
-  /// @brief Determine if the call can access memmory only using pointers based
+  /// Determine if the call can access memmory only using pointers based
   /// on its arguments.
   bool onlyAccessesArgMemory() const {
     return hasFnAttribute(Attribute::ArgMemOnly);
   }
   void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); }
 
-  /// @brief Determine if the function may only access memory that is
+  /// Determine if the function may only access memory that is
   ///  inaccessible from the IR.
   bool onlyAccessesInaccessibleMemory() const {
     return hasFnAttribute(Attribute::InaccessibleMemOnly);
@@ -437,7 +482,7 @@ public:
     addFnAttr(Attribute::InaccessibleMemOnly);
   }
 
-  /// @brief Determine if the function may only access memory that is
+  /// Determine if the function may only access memory that is
   ///  either inaccessible from the IR or pointed to by its arguments.
   bool onlyAccessesInaccessibleMemOrArgMem() const {
     return hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly);
@@ -446,7 +491,7 @@ public:
     addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
   }
 
-  /// @brief Determine if the function cannot return.
+  /// Determine if the function cannot return.
   bool doesNotReturn() const {
     return hasFnAttribute(Attribute::NoReturn);
   }
@@ -454,7 +499,10 @@ public:
     addFnAttr(Attribute::NoReturn);
   }
 
-  /// @brief Determine if the function cannot unwind.
+  /// Determine if the function should not perform indirect branch tracking.
+  bool doesNoCfCheck() const { return hasFnAttribute(Attribute::NoCfCheck); }
+
+  /// Determine if the function cannot unwind.
   bool doesNotThrow() const {
     return hasFnAttribute(Attribute::NoUnwind);
   }
@@ -462,7 +510,7 @@ public:
     addFnAttr(Attribute::NoUnwind);
   }
 
-  /// @brief Determine if the call cannot be duplicated.
+  /// Determine if the call cannot be duplicated.
   bool cannotDuplicate() const {
     return hasFnAttribute(Attribute::NoDuplicate);
   }
@@ -470,7 +518,7 @@ public:
     addFnAttr(Attribute::NoDuplicate);
   }
 
-  /// @brief Determine if the call is convergent.
+  /// Determine if the call is convergent.
   bool isConvergent() const {
     return hasFnAttribute(Attribute::Convergent);
   }
@@ -481,7 +529,7 @@ public:
     removeFnAttr(Attribute::Convergent);
   }
 
-  /// @brief Determine if the call has sideeffects.
+  /// Determine if the call has sideeffects.
   bool isSpeculatable() const {
     return hasFnAttribute(Attribute::Speculatable);
   }
@@ -498,7 +546,7 @@ public:
     addFnAttr(Attribute::NoRecurse);
   }
 
-  /// @brief True if the ABI mandates (or the user requested) that this
+  /// True if the ABI mandates (or the user requested) that this
   /// function be in a unwind table.
   bool hasUWTable() const {
     return hasFnAttribute(Attribute::UWTable);
@@ -507,19 +555,19 @@ public:
     addFnAttr(Attribute::UWTable);
   }
 
-  /// @brief True if this function needs an unwind table.
+  /// True if this function needs an unwind table.
   bool needsUnwindTableEntry() const {
     return hasUWTable() || !doesNotThrow();
   }
 
-  /// @brief Determine if the function returns a structure through first
+  /// Determine if the function returns a structure through first
   /// or second pointer argument.
   bool hasStructRetAttr() const {
     return AttributeSets.hasParamAttribute(0, Attribute::StructRet) ||
            AttributeSets.hasParamAttribute(1, Attribute::StructRet);
   }
 
-  /// @brief Determine if the parameter or return value is marked with NoAlias
+  /// Determine if the parameter or return value is marked with NoAlias
   /// attribute.
   bool returnDoesNotAlias() const {
     return AttributeSets.hasAttribute(AttributeList::ReturnIndex,
@@ -636,30 +684,30 @@ public:
   size_t arg_size() const { return NumArgs; }
   bool arg_empty() const { return arg_size() == 0; }
 
-  /// \brief Check whether this function has a personality function.
+  /// Check whether this function has a personality function.
   bool hasPersonalityFn() const {
     return getSubclassDataFromValue() & (1<<3);
   }
 
-  /// \brief Get the personality function associated with this function.
+  /// Get the personality function associated with this function.
   Constant *getPersonalityFn() const;
   void setPersonalityFn(Constant *Fn);
 
-  /// \brief Check whether this function has prefix data.
+  /// Check whether this function has prefix data.
   bool hasPrefixData() const {
     return getSubclassDataFromValue() & (1<<1);
   }
 
-  /// \brief Get the prefix data associated with this function.
+  /// Get the prefix data associated with this function.
   Constant *getPrefixData() const;
   void setPrefixData(Constant *PrefixData);
 
-  /// \brief Check whether this function has prologue data.
+  /// Check whether this function has prologue data.
   bool hasPrologueData() const {
     return getSubclassDataFromValue() & (1<<2);
   }
 
-  /// \brief Get the prologue data associated with this function.
+  /// Get the prologue data associated with this function.
   Constant *getPrologueData() const;
   void setPrologueData(Constant *PrologueData);
 
@@ -719,12 +767,12 @@ public:
   /// setjmp or other function that gcc recognizes as "returning twice".
   bool callsFunctionThatReturnsTwice() const;
 
-  /// \brief Set the attached subprogram.
+  /// Set the attached subprogram.
   ///
   /// Calls \a setMetadata() with \a LLVMContext::MD_dbg.
   void setSubprogram(DISubprogram *SP);
 
-  /// \brief Get the attached subprogram.
+  /// Get the attached subprogram.
   ///
   /// Calls \a getMetadata() with \a LLVMContext::MD_dbg and casts the result
   /// to \a DISubprogram.
@@ -733,6 +781,12 @@ public:
   /// Returns true if we should emit debug info for profiling.
   bool isDebugInfoForProfiling() const;
 
+  /// Check if null pointer dereferencing is considered undefined behavior for
+  /// the function.
+  /// Return value: false => null pointer dereference is undefined.
+  /// Return value: true =>  null pointer dereference is not undefined.
+  bool nullPointerIsDefined() const;
+
 private:
   void allocHungoffUselist();
   template<int Idx> void setHungoffOperand(Constant *C);
@@ -745,6 +799,13 @@ private:
   void setValueSubclassDataBit(unsigned Bit, bool On);
 };
 
+/// Check whether null pointer dereferencing is considered undefined behavior
+/// for a given function or an address space.
+/// Null pointer access in non-zero address space is not considered undefined.
+/// Return value: false => null pointer dereference is undefined.
+/// Return value: true =>  null pointer dereference is not undefined.
+bool NullPointerIsDefined(const Function *F, unsigned AS = 0);
+
 template <>
 struct OperandTraits<Function> : public HungoffOperandTraits<3> {};
 
diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h
index 278b193567f1..1fd3568100c2 100644
--- a/include/llvm/IR/GlobalObject.h
+++ b/include/llvm/IR/GlobalObject.h
@@ -105,6 +105,14 @@ public:
   /// Check if this has any metadata.
   bool hasMetadata() const { return hasMetadataHashEntry(); }
 
+  /// Check if this has any metadata of the given kind.
+  bool hasMetadata(unsigned KindID) const {
+    return getMetadata(KindID) != nullptr;
+  }
+  bool hasMetadata(StringRef Kind) const {
+    return getMetadata(Kind) != nullptr;
+  }
+
   /// Get the current metadata attachments for the given kind, if any.
   ///
   /// These functions require that the function have at most a single attachment
@@ -143,7 +151,9 @@ public:
   getAllMetadata(SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const;
 
   /// Erase all metadata attachments with the given kind.
-  void eraseMetadata(unsigned KindID);
+  ///
+  /// \returns true if any metadata was removed.
+  bool eraseMetadata(unsigned KindID);
 
   /// Copy metadata from Src, adjusting offsets by Offset.
   void copyMetadata(const GlobalObject *Src, unsigned Offset);
diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h
index 1793de7887fc..9d9f4f65a6b5 100644
--- a/include/llvm/IR/GlobalValue.h
+++ b/include/llvm/IR/GlobalValue.h
@@ -44,7 +44,7 @@ namespace Intrinsic {
 
 class GlobalValue : public Constant {
 public:
-  /// @brief An enumeration for the kinds of linkage for global values.
+  /// An enumeration for the kinds of linkage for global values.
   enum LinkageTypes {
     ExternalLinkage = 0,///< Externally visible function
     AvailableExternallyLinkage, ///< Available for inspection, not emission.
@@ -59,14 +59,14 @@ public:
     CommonLinkage       ///< Tentative definitions.
   };
 
-  /// @brief An enumeration for the kinds of visibility of global values.
+  /// An enumeration for the kinds of visibility of global values.
   enum VisibilityTypes {
     DefaultVisibility = 0,  ///< The GV is visible
     HiddenVisibility,       ///< The GV is hidden
     ProtectedVisibility     ///< The GV is protected
   };
 
-  /// @brief Storage classes of global values for PE targets.
+  /// Storage classes of global values for PE targets.
   enum DLLStorageClassTypes {
     DefaultStorageClass   = 0,
     DLLImportStorageClass = 1, ///< Function to be imported from DLL
@@ -77,11 +77,12 @@ protected:
   GlobalValue(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
               LinkageTypes Linkage, const Twine &Name, unsigned AddressSpace)
       : Constant(PointerType::get(Ty, AddressSpace), VTy, Ops, NumOps),
-        ValueType(Ty), Linkage(Linkage), Visibility(DefaultVisibility),
+        ValueType(Ty), Visibility(DefaultVisibility),
         UnnamedAddrVal(unsigned(UnnamedAddr::None)),
         DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal),
-        HasLLVMReservedName(false), IsDSOLocal(false),
-        IntID((Intrinsic::ID)0U), Parent(nullptr) {
+        HasLLVMReservedName(false), IsDSOLocal(false), IntID((Intrinsic::ID)0U),
+        Parent(nullptr) {
+    setLinkage(Linkage);
     setName(Name);
   }
 
@@ -109,12 +110,12 @@ protected:
   unsigned IsDSOLocal : 1;
 
 private:
-  friend class Constant;
-
   // Give subclasses access to what otherwise would be wasted padding.
   // (17 + 4 + 2 + 2 + 2 + 3 + 1 + 1) == 32.
   unsigned SubClassData : GlobalValueSubClassDataBits;
 
+  friend class Constant;
+
   void destroyConstantImpl();
   Value *handleOperandChangeImpl(Value *From, Value *To);
 
@@ -142,8 +143,14 @@ private:
     llvm_unreachable("Fully covered switch above!");
   }
 
+  void maybeSetDsoLocal() {
+    if (hasLocalLinkage() ||
+        (!hasDefaultVisibility() && !hasExternalWeakLinkage()))
+      setDSOLocal(true);
+  }
+
 protected:
-  /// \brief The intrinsic ID for this subclass (which must be a Function).
+  /// The intrinsic ID for this subclass (which must be a Function).
   ///
   /// This member is defined by this class, but not used for anything.
   /// Subclasses can use it to store their intrinsic ID, if they have one.
@@ -232,6 +239,7 @@ public:
     assert((!hasLocalLinkage() || V == DefaultVisibility) &&
            "local linkage requires default visibility");
     Visibility = V;
+    maybeSetDsoLocal();
   }
 
   /// If the value is "Thread Local", its value isn't shared by the threads.
@@ -437,6 +445,7 @@ public:
     if (isLocalLinkage(LT))
       Visibility = DefaultVisibility;
     Linkage = LT;
+    maybeSetDsoLocal();
   }
   LinkageTypes getLinkage() const { return LinkageTypes(Linkage); }
 
@@ -563,6 +572,13 @@ public:
            V->getValueID() == Value::GlobalAliasVal ||
            V->getValueID() == Value::GlobalIFuncVal;
   }
+
+  /// True if GV can be left out of the object symbol table. This is the case
+  /// for linkonce_odr values whose address is not significant. While legal, it
+  /// is not normally profitable to omit them from the .o symbol table. Using
+  /// this analysis makes sense when the information can be passed down to the
+  /// linker or we are in LTO.
+  bool canBeOmittedFromSymbolTable() const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h
index 34ace6f2b4f4..03b9ec46ebb4 100644
--- a/include/llvm/IR/GlobalVariable.h
+++ b/include/llvm/IR/GlobalVariable.h
@@ -68,9 +68,6 @@ public:
 
   ~GlobalVariable() {
     dropAllReferences();
-
-    // FIXME: needed by operator delete
-    setGlobalVariableNumOperands(1);
   }
 
   // allocate space for exactly one operand
@@ -78,6 +75,16 @@ public:
     return User::operator new(s, 1);
   }
 
+  // delete space for exactly one operand as created in the corresponding new operator
+  void operator delete(void *ptr){
+    assert(ptr != nullptr && "must not be nullptr");
+    User *Obj = static_cast<User *>(ptr);
+    // Number of operands can be set to 0 after construction and initialization. Make sure
+    // that number of operands is reset to 1, as this is needed in User::operator delete
+    Obj->setGlobalVariableNumOperands(1);
+    User::operator delete(Obj);
+  }
+
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index e687ca689d46..70641ba25d2e 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -1,4 +1,4 @@
-//===---- llvm/IRBuilder.h - Builder for LLVM Instructions ------*- C++ -*-===//
+//===- llvm/IRBuilder.h - Builder for LLVM Instructions ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -42,20 +42,19 @@
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Casting.h"
-#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <functional>
+#include <utility>
 
 namespace llvm {
 
 class APInt;
 class MDNode;
-class Module;
 class Use;
 
-/// \brief This provides the default implementation of the IRBuilder
+/// This provides the default implementation of the IRBuilder
 /// 'InsertHelper' method that is called whenever an instruction is created by
 /// IRBuilder and needs to be inserted.
 ///
@@ -86,7 +85,7 @@ protected:
   }
 };
 
-/// \brief Common base class shared among various IRBuilders.
+/// Common base class shared among various IRBuilders.
 class IRBuilderBase {
   DebugLoc CurDbgLocation;
 
@@ -112,7 +111,7 @@ public:
   // Builder configuration methods
   //===--------------------------------------------------------------------===//
 
-  /// \brief Clear the insertion point: created instructions will not be
+  /// Clear the insertion point: created instructions will not be
   /// inserted into a block.
   void ClearInsertionPoint() {
     BB = nullptr;
@@ -123,14 +122,14 @@ public:
   BasicBlock::iterator GetInsertPoint() const { return InsertPt; }
   LLVMContext &getContext() const { return Context; }
 
-  /// \brief This specifies that created instructions should be appended to the
+  /// This specifies that created instructions should be appended to the
   /// end of the specified block.
   void SetInsertPoint(BasicBlock *TheBB) {
     BB = TheBB;
     InsertPt = BB->end();
   }
 
-  /// \brief This specifies that created instructions should be inserted before
+  /// This specifies that created instructions should be inserted before
   /// the specified instruction.
   void SetInsertPoint(Instruction *I) {
     BB = I->getParent();
@@ -139,7 +138,7 @@ public:
     SetCurrentDebugLocation(I->getDebugLoc());
   }
 
-  /// \brief This specifies that created instructions should be inserted at the
+  /// This specifies that created instructions should be inserted at the
   /// specified point.
   void SetInsertPoint(BasicBlock *TheBB, BasicBlock::iterator IP) {
     BB = TheBB;
@@ -148,20 +147,20 @@ public:
       SetCurrentDebugLocation(IP->getDebugLoc());
   }
 
-  /// \brief Set location information used by debugging information.
+  /// Set location information used by debugging information.
   void SetCurrentDebugLocation(DebugLoc L) { CurDbgLocation = std::move(L); }
 
-  /// \brief Get location information used by debugging information.
+  /// Get location information used by debugging information.
   const DebugLoc &getCurrentDebugLocation() const { return CurDbgLocation; }
 
-  /// \brief If this builder has a current debug location, set it on the
+  /// If this builder has a current debug location, set it on the
   /// specified instruction.
   void SetInstDebugLocation(Instruction *I) const {
     if (CurDbgLocation)
       I->setDebugLoc(CurDbgLocation);
   }
 
-  /// \brief Get the return type of the current function that we're emitting
+  /// Get the return type of the current function that we're emitting
   /// into.
   Type *getCurrentFunctionReturnType() const;
 
@@ -171,33 +170,33 @@ public:
     BasicBlock::iterator Point;
 
   public:
-    /// \brief Creates a new insertion point which doesn't point to anything.
+    /// Creates a new insertion point which doesn't point to anything.
     InsertPoint() = default;
 
-    /// \brief Creates a new insertion point at the given location.
+    /// Creates a new insertion point at the given location.
     InsertPoint(BasicBlock *InsertBlock, BasicBlock::iterator InsertPoint)
-      : Block(InsertBlock), Point(InsertPoint) {}
+        : Block(InsertBlock), Point(InsertPoint) {}
 
-    /// \brief Returns true if this insert point is set.
+    /// Returns true if this insert point is set.
     bool isSet() const { return (Block != nullptr); }
 
     BasicBlock *getBlock() const { return Block; }
     BasicBlock::iterator getPoint() const { return Point; }
   };
 
-  /// \brief Returns the current insert point.
+  /// Returns the current insert point.
   InsertPoint saveIP() const {
     return InsertPoint(GetInsertBlock(), GetInsertPoint());
   }
 
-  /// \brief Returns the current insert point, clearing it in the process.
+  /// Returns the current insert point, clearing it in the process.
   InsertPoint saveAndClearIP() {
     InsertPoint IP(GetInsertBlock(), GetInsertPoint());
     ClearInsertionPoint();
     return IP;
   }
 
-  /// \brief Sets the current insert point to a previously-saved location.
+  /// Sets the current insert point to a previously-saved location.
   void restoreIP(InsertPoint IP) {
     if (IP.isSet())
       SetInsertPoint(IP.getBlock(), IP.getPoint());
@@ -205,26 +204,26 @@ public:
       ClearInsertionPoint();
   }
 
-  /// \brief Get the floating point math metadata being used.
+  /// Get the floating point math metadata being used.
   MDNode *getDefaultFPMathTag() const { return DefaultFPMathTag; }
 
-  /// \brief Get the flags to be applied to created floating point ops
+  /// Get the flags to be applied to created floating point ops
   FastMathFlags getFastMathFlags() const { return FMF; }
 
-  /// \brief Clear the fast-math flags.
+  /// Clear the fast-math flags.
   void clearFastMathFlags() { FMF.clear(); }
 
-  /// \brief Set the floating point math metadata to be used.
+  /// Set the floating point math metadata to be used.
   void setDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; }
 
-  /// \brief Set the fast-math flags to be used with generated fp-math operators
+  /// Set the fast-math flags to be used with generated fp-math operators
   void setFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; }
 
   //===--------------------------------------------------------------------===//
   // RAII helpers.
   //===--------------------------------------------------------------------===//
 
-  // \brief RAII object that stores the current insertion point and restores it
+  // RAII object that stores the current insertion point and restores it
   // when the object is destroyed. This includes the debug location.
   class InsertPointGuard {
     IRBuilderBase &Builder;
@@ -246,7 +245,7 @@ public:
     }
   };
 
-  // \brief RAII object that stores the current fast math settings and restores
+  // RAII object that stores the current fast math settings and restores
   // them when the object is destroyed.
   class FastMathFlagGuard {
     IRBuilderBase &Builder;
@@ -270,7 +269,7 @@ public:
   // Miscellaneous creation methods.
   //===--------------------------------------------------------------------===//
 
-  /// \brief Make a new global variable with initializer type i8*
+  /// Make a new global variable with initializer type i8*
   ///
   /// Make a new global variable with an initializer that has array of i8 type
   /// filled in with the null terminated string value specified.  The new global
@@ -279,48 +278,48 @@ public:
   GlobalVariable *CreateGlobalString(StringRef Str, const Twine &Name = "",
                                      unsigned AddressSpace = 0);
 
-  /// \brief Get a constant value representing either true or false.
+  /// Get a constant value representing either true or false.
   ConstantInt *getInt1(bool V) {
     return ConstantInt::get(getInt1Ty(), V);
   }
 
-  /// \brief Get the constant value for i1 true.
+  /// Get the constant value for i1 true.
   ConstantInt *getTrue() {
     return ConstantInt::getTrue(Context);
   }
 
-  /// \brief Get the constant value for i1 false.
+  /// Get the constant value for i1 false.
   ConstantInt *getFalse() {
     return ConstantInt::getFalse(Context);
   }
 
-  /// \brief Get a constant 8-bit value.
+  /// Get a constant 8-bit value.
   ConstantInt *getInt8(uint8_t C) {
     return ConstantInt::get(getInt8Ty(), C);
   }
 
-  /// \brief Get a constant 16-bit value.
+  /// Get a constant 16-bit value.
   ConstantInt *getInt16(uint16_t C) {
     return ConstantInt::get(getInt16Ty(), C);
   }
 
-  /// \brief Get a constant 32-bit value.
+  /// Get a constant 32-bit value.
   ConstantInt *getInt32(uint32_t C) {
     return ConstantInt::get(getInt32Ty(), C);
   }
 
-  /// \brief Get a constant 64-bit value.
+  /// Get a constant 64-bit value.
   ConstantInt *getInt64(uint64_t C) {
     return ConstantInt::get(getInt64Ty(), C);
   }
 
-  /// \brief Get a constant N-bit value, zero extended or truncated from
+  /// Get a constant N-bit value, zero extended or truncated from
   /// a 64-bit value.
   ConstantInt *getIntN(unsigned N, uint64_t C) {
     return ConstantInt::get(getIntNTy(N), C);
   }
 
-  /// \brief Get a constant integer value.
+  /// Get a constant integer value.
   ConstantInt *getInt(const APInt &AI) {
     return ConstantInt::get(Context, AI);
   }
@@ -329,65 +328,65 @@ public:
   // Type creation methods
   //===--------------------------------------------------------------------===//
 
-  /// \brief Fetch the type representing a single bit
+  /// Fetch the type representing a single bit
   IntegerType *getInt1Ty() {
     return Type::getInt1Ty(Context);
   }
 
-  /// \brief Fetch the type representing an 8-bit integer.
+  /// Fetch the type representing an 8-bit integer.
   IntegerType *getInt8Ty() {
     return Type::getInt8Ty(Context);
   }
 
-  /// \brief Fetch the type representing a 16-bit integer.
+  /// Fetch the type representing a 16-bit integer.
   IntegerType *getInt16Ty() {
     return Type::getInt16Ty(Context);
   }
 
-  /// \brief Fetch the type representing a 32-bit integer.
+  /// Fetch the type representing a 32-bit integer.
   IntegerType *getInt32Ty() {
     return Type::getInt32Ty(Context);
   }
 
-  /// \brief Fetch the type representing a 64-bit integer.
+  /// Fetch the type representing a 64-bit integer.
   IntegerType *getInt64Ty() {
     return Type::getInt64Ty(Context);
   }
 
-  /// \brief Fetch the type representing a 128-bit integer.
+  /// Fetch the type representing a 128-bit integer.
   IntegerType *getInt128Ty() { return Type::getInt128Ty(Context); }
 
-  /// \brief Fetch the type representing an N-bit integer.
+  /// Fetch the type representing an N-bit integer.
   IntegerType *getIntNTy(unsigned N) {
     return Type::getIntNTy(Context, N);
   }
 
-  /// \brief Fetch the type representing a 16-bit floating point value.
+  /// Fetch the type representing a 16-bit floating point value.
   Type *getHalfTy() {
     return Type::getHalfTy(Context);
   }
 
-  /// \brief Fetch the type representing a 32-bit floating point value.
+  /// Fetch the type representing a 32-bit floating point value.
   Type *getFloatTy() {
     return Type::getFloatTy(Context);
   }
 
-  /// \brief Fetch the type representing a 64-bit floating point value.
+  /// Fetch the type representing a 64-bit floating point value.
   Type *getDoubleTy() {
     return Type::getDoubleTy(Context);
   }
 
-  /// \brief Fetch the type representing void.
+  /// Fetch the type representing void.
   Type *getVoidTy() {
     return Type::getVoidTy(Context);
   }
 
-  /// \brief Fetch the type representing a pointer to an 8-bit integer value.
+  /// Fetch the type representing a pointer to an 8-bit integer value.
   PointerType *getInt8PtrTy(unsigned AddrSpace = 0) {
     return Type::getInt8PtrTy(Context, AddrSpace);
   }
 
-  /// \brief Fetch the type representing a pointer to an integer value.
+  /// Fetch the type representing a pointer to an integer value.
   IntegerType *getIntPtrTy(const DataLayout &DL, unsigned AddrSpace = 0) {
     return DL.getIntPtrType(Context, AddrSpace);
   }
@@ -396,7 +395,7 @@ public:
   // Intrinsic creation methods
   //===--------------------------------------------------------------------===//
 
-  /// \brief Create and insert a memset to the specified pointer and the
+  /// Create and insert a memset to the specified pointer and the
   /// specified value.
   ///
   /// If the pointer isn't an i8*, it will be converted. If a TBAA tag is
@@ -415,27 +414,54 @@ public:
                          MDNode *ScopeTag = nullptr,
                          MDNode *NoAliasTag = nullptr);
 
-  /// \brief Create and insert a memcpy between the specified pointers.
+  /// Create and insert an element unordered-atomic memset of the region of
+  /// memory starting at the given pointer to the given value.
+  ///
+  /// If the pointer isn't an i8*, it will be converted. If a TBAA tag is
+  /// specified, it will be added to the instruction. Likewise with alias.scope
+  /// and noalias tags.
+  CallInst *CreateElementUnorderedAtomicMemSet(Value *Ptr, Value *Val,
+                                               uint64_t Size, unsigned Align,
+                                               uint32_t ElementSize,
+                                               MDNode *TBAATag = nullptr,
+                                               MDNode *ScopeTag = nullptr,
+                                               MDNode *NoAliasTag = nullptr) {
+    return CreateElementUnorderedAtomicMemSet(Ptr, Val, getInt64(Size), Align,
+                                              ElementSize, TBAATag, ScopeTag,
+                                              NoAliasTag);
+  }
+
+  CallInst *CreateElementUnorderedAtomicMemSet(Value *Ptr, Value *Val,
+                                               Value *Size, unsigned Align,
+                                               uint32_t ElementSize,
+                                               MDNode *TBAATag = nullptr,
+                                               MDNode *ScopeTag = nullptr,
+                                               MDNode *NoAliasTag = nullptr);
+
+  /// Create and insert a memcpy between the specified pointers.
   ///
   /// If the pointers aren't i8*, they will be converted.  If a TBAA tag is
   /// specified, it will be added to the instruction. Likewise with alias.scope
   /// and noalias tags.
-  CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
+  CallInst *CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src,
+                         unsigned SrcAlign, uint64_t Size,
                          bool isVolatile = false, MDNode *TBAATag = nullptr,
                          MDNode *TBAAStructTag = nullptr,
                          MDNode *ScopeTag = nullptr,
                          MDNode *NoAliasTag = nullptr) {
-    return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag,
-                        TBAAStructTag, ScopeTag, NoAliasTag);
+    return CreateMemCpy(Dst, DstAlign, Src, SrcAlign, getInt64(Size),
+                        isVolatile, TBAATag, TBAAStructTag, ScopeTag,
+                        NoAliasTag);
   }
 
-  CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+  CallInst *CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src,
+                         unsigned SrcAlign, Value *Size,
                          bool isVolatile = false, MDNode *TBAATag = nullptr,
                          MDNode *TBAAStructTag = nullptr,
                          MDNode *ScopeTag = nullptr,
                          MDNode *NoAliasTag = nullptr);
 
-  /// \brief Create and insert an element unordered-atomic memcpy between the
+  /// Create and insert an element unordered-atomic memcpy between the
   /// specified pointers.
   ///
   /// DstAlign/SrcAlign are the alignments of the Dst/Src pointers, respectively.
@@ -459,70 +485,95 @@ public:
       MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr,
       MDNode *NoAliasTag = nullptr);
 
-  /// \brief Create and insert a memmove between the specified
+  /// Create and insert a memmove between the specified
   /// pointers.
   ///
   /// If the pointers aren't i8*, they will be converted.  If a TBAA tag is
   /// specified, it will be added to the instruction. Likewise with alias.scope
   /// and noalias tags.
-  CallInst *CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
-                          bool isVolatile = false, MDNode *TBAATag = nullptr,
-                          MDNode *ScopeTag = nullptr,
+  CallInst *CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
+                          uint64_t Size, bool isVolatile = false,
+                          MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr,
                           MDNode *NoAliasTag = nullptr) {
-    return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile,
+    return CreateMemMove(Dst, DstAlign, Src, SrcAlign, getInt64(Size), isVolatile,
                          TBAATag, ScopeTag, NoAliasTag);
   }
 
-  CallInst *CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
-                          bool isVolatile = false, MDNode *TBAATag = nullptr,
+  CallInst *CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
+                          Value *Size, bool isVolatile = false, MDNode *TBAATag = nullptr,
                           MDNode *ScopeTag = nullptr,
                           MDNode *NoAliasTag = nullptr);
 
-  /// \brief Create a vector fadd reduction intrinsic of the source vector.
+  /// \brief Create and insert an element unordered-atomic memmove between the
+  /// specified pointers.
+  ///
+  /// DstAlign/SrcAlign are the alignments of the Dst/Src pointers,
+  /// respectively.
+  ///
+  /// If the pointers aren't i8*, they will be converted.  If a TBAA tag is
+  /// specified, it will be added to the instruction. Likewise with alias.scope
+  /// and noalias tags.
+  CallInst *CreateElementUnorderedAtomicMemMove(
+      Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
+      uint64_t Size, uint32_t ElementSize, MDNode *TBAATag = nullptr,
+      MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr,
+      MDNode *NoAliasTag = nullptr) {
+    return CreateElementUnorderedAtomicMemMove(
+        Dst, DstAlign, Src, SrcAlign, getInt64(Size), ElementSize, TBAATag,
+        TBAAStructTag, ScopeTag, NoAliasTag);
+  }
+
+  CallInst *CreateElementUnorderedAtomicMemMove(
+      Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, Value *Size,
+      uint32_t ElementSize, MDNode *TBAATag = nullptr,
+      MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr,
+      MDNode *NoAliasTag = nullptr);
+
+  /// Create a vector fadd reduction intrinsic of the source vector.
   /// The first parameter is a scalar accumulator value for ordered reductions.
   CallInst *CreateFAddReduce(Value *Acc, Value *Src);
 
-  /// \brief Create a vector fmul reduction intrinsic of the source vector.
+  /// Create a vector fmul reduction intrinsic of the source vector.
   /// The first parameter is a scalar accumulator value for ordered reductions.
   CallInst *CreateFMulReduce(Value *Acc, Value *Src);
 
-  /// \brief Create a vector int add reduction intrinsic of the source vector.
+  /// Create a vector int add reduction intrinsic of the source vector.
   CallInst *CreateAddReduce(Value *Src);
 
-  /// \brief Create a vector int mul reduction intrinsic of the source vector.
+  /// Create a vector int mul reduction intrinsic of the source vector.
   CallInst *CreateMulReduce(Value *Src);
 
-  /// \brief Create a vector int AND reduction intrinsic of the source vector.
+  /// Create a vector int AND reduction intrinsic of the source vector.
   CallInst *CreateAndReduce(Value *Src);
 
-  /// \brief Create a vector int OR reduction intrinsic of the source vector.
+  /// Create a vector int OR reduction intrinsic of the source vector.
   CallInst *CreateOrReduce(Value *Src);
 
-  /// \brief Create a vector int XOR reduction intrinsic of the source vector.
+  /// Create a vector int XOR reduction intrinsic of the source vector.
   CallInst *CreateXorReduce(Value *Src);
 
-  /// \brief Create a vector integer max reduction intrinsic of the source
+  /// Create a vector integer max reduction intrinsic of the source
   /// vector.
   CallInst *CreateIntMaxReduce(Value *Src, bool IsSigned = false);
 
-  /// \brief Create a vector integer min reduction intrinsic of the source
+  /// Create a vector integer min reduction intrinsic of the source
   /// vector.
   CallInst *CreateIntMinReduce(Value *Src, bool IsSigned = false);
 
-  /// \brief Create a vector float max reduction intrinsic of the source
+  /// Create a vector float max reduction intrinsic of the source
   /// vector.
   CallInst *CreateFPMaxReduce(Value *Src, bool NoNaN = false);
 
-  /// \brief Create a vector float min reduction intrinsic of the source
+  /// Create a vector float min reduction intrinsic of the source
   /// vector.
   CallInst *CreateFPMinReduce(Value *Src, bool NoNaN = false);
 
-  /// \brief Create a lifetime.start intrinsic.
+  /// Create a lifetime.start intrinsic.
   ///
   /// If the pointer isn't i8* it will be converted.
   CallInst *CreateLifetimeStart(Value *Ptr, ConstantInt *Size = nullptr);
 
-  /// \brief Create a lifetime.end intrinsic.
+  /// Create a lifetime.end intrinsic.
   ///
   /// If the pointer isn't i8* it will be converted.
   CallInst *CreateLifetimeEnd(Value *Ptr, ConstantInt *Size = nullptr);
@@ -532,29 +583,29 @@ public:
   /// If the pointer isn't i8* it will be converted.
   CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr);
 
-  /// \brief Create a call to Masked Load intrinsic
+  /// Create a call to Masked Load intrinsic
   CallInst *CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask,
                              Value *PassThru = nullptr, const Twine &Name = "");
 
-  /// \brief Create a call to Masked Store intrinsic
+  /// Create a call to Masked Store intrinsic
   CallInst *CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align,
                               Value *Mask);
 
-  /// \brief Create a call to Masked Gather intrinsic
+  /// Create a call to Masked Gather intrinsic
   CallInst *CreateMaskedGather(Value *Ptrs, unsigned Align,
                                Value *Mask = nullptr,
                                Value *PassThru = nullptr,
                                const Twine& Name = "");
 
-  /// \brief Create a call to Masked Scatter intrinsic
+  /// Create a call to Masked Scatter intrinsic
   CallInst *CreateMaskedScatter(Value *Val, Value *Ptrs, unsigned Align,
                                 Value *Mask = nullptr);
 
-  /// \brief Create an assume intrinsic call that allows the optimizer to
+  /// Create an assume intrinsic call that allows the optimizer to
   /// assume that the provided condition will be true.
   CallInst *CreateAssumption(Value *Cond);
 
-  /// \brief Create a call to the experimental.gc.statepoint intrinsic to
+  /// Create a call to the experimental.gc.statepoint intrinsic to
   /// start a new statepoint sequence.
   CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes,
                                    Value *ActualCallee,
@@ -563,7 +614,7 @@ public:
                                    ArrayRef<Value *> GCArgs,
                                    const Twine &Name = "");
 
-  /// \brief Create a call to the experimental.gc.statepoint intrinsic to
+  /// Create a call to the experimental.gc.statepoint intrinsic to
   /// start a new statepoint sequence.
   CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes,
                                    Value *ActualCallee, uint32_t Flags,
@@ -573,16 +624,16 @@ public:
                                    ArrayRef<Value *> GCArgs,
                                    const Twine &Name = "");
 
-  // \brief Conveninence function for the common case when CallArgs are filled
-  // in using makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be
-  // .get()'ed to get the Value pointer.
+  /// Conveninence function for the common case when CallArgs are filled
+  /// in using makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be
+  /// .get()'ed to get the Value pointer.
   CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes,
                                    Value *ActualCallee, ArrayRef<Use> CallArgs,
                                    ArrayRef<Value *> DeoptArgs,
                                    ArrayRef<Value *> GCArgs,
                                    const Twine &Name = "");
 
-  /// brief Create an invoke to the experimental.gc.statepoint intrinsic to
+  /// Create an invoke to the experimental.gc.statepoint intrinsic to
   /// start a new statepoint sequence.
   InvokeInst *
   CreateGCStatepointInvoke(uint64_t ID, uint32_t NumPatchBytes,
@@ -591,7 +642,7 @@ public:
                            ArrayRef<Value *> DeoptArgs,
                            ArrayRef<Value *> GCArgs, const Twine &Name = "");
 
-  /// brief Create an invoke to the experimental.gc.statepoint intrinsic to
+  /// Create an invoke to the experimental.gc.statepoint intrinsic to
   /// start a new statepoint sequence.
   InvokeInst *CreateGCStatepointInvoke(
       uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
@@ -610,13 +661,13 @@ public:
                            ArrayRef<Value *> DeoptArgs,
                            ArrayRef<Value *> GCArgs, const Twine &Name = "");
 
-  /// \brief Create a call to the experimental.gc.result intrinsic to extract
+  /// Create a call to the experimental.gc.result intrinsic to extract
   /// the result from a call wrapped in a statepoint.
   CallInst *CreateGCResult(Instruction *Statepoint,
                            Type *ResultType,
                            const Twine &Name = "");
 
-  /// \brief Create a call to the experimental.gc.relocate intrinsics to
+  /// Create a call to the experimental.gc.relocate intrinsics to
   /// project the relocated value of one pointer from the statepoint.
   CallInst *CreateGCRelocate(Instruction *Statepoint,
                              int BaseOffset,
@@ -630,6 +681,18 @@ public:
                                   Value *LHS, Value *RHS,
                                   const Twine &Name = "");
 
+  /// Create a call to intrinsic \p ID with no operands.
+  CallInst *CreateIntrinsic(Intrinsic::ID ID,
+                            Instruction *FMFSource = nullptr,
+                            const Twine &Name = "");
+
+  /// Create a call to intrinsic \p ID with 1 or more operands assuming the
+  /// intrinsic and all operands have the same type. If \p FMFSource is
+  /// provided, copy fast-math-flags from that instruction to the intrinsic.
+  CallInst *CreateIntrinsic(Intrinsic::ID ID, ArrayRef<Value *> Args,
+                            Instruction *FMFSource = nullptr,
+                            const Twine &Name = "");
+
   /// Create call to the minnum intrinsic.
   CallInst *CreateMinNum(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, Name);
@@ -637,11 +700,11 @@ public:
 
   /// Create call to the maxnum intrinsic.
   CallInst *CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, Name);
+    return CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS, Name);
   }
 
 private:
-  /// \brief Create a call to a masked intrinsic with given Id.
+  /// Create a call to a masked intrinsic with given Id.
   CallInst *CreateMaskedIntrinsic(Intrinsic::ID Id, ArrayRef<Value *> Ops,
                                   ArrayRef<Type *> OverloadedTypes,
                                   const Twine &Name = "");
@@ -649,7 +712,7 @@ private:
   Value *getCastedInt8PtrValue(Value *Ptr);
 };
 
-/// \brief This provides a uniform API for creating instructions and inserting
+/// This provides a uniform API for creating instructions and inserting
 /// them into a basic block: either at the end of a BasicBlock, or at a specific
 /// iterator location in a block.
 ///
@@ -677,7 +740,7 @@ public:
 
   explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr,
                      ArrayRef<OperandBundleDef> OpBundles = None)
-      : IRBuilderBase(C, FPMathTag, OpBundles), Folder() {}
+      : IRBuilderBase(C, FPMathTag, OpBundles) {}
 
   explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr,
                      ArrayRef<OperandBundleDef> OpBundles = None)
@@ -687,13 +750,13 @@ public:
 
   explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr,
                      ArrayRef<OperandBundleDef> OpBundles = None)
-      : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() {
+      : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles) {
     SetInsertPoint(TheBB);
   }
 
   explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr,
                      ArrayRef<OperandBundleDef> OpBundles = None)
-      : IRBuilderBase(IP->getContext(), FPMathTag, OpBundles), Folder() {
+      : IRBuilderBase(IP->getContext(), FPMathTag, OpBundles) {
     SetInsertPoint(IP);
   }
 
@@ -707,14 +770,14 @@ public:
   IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP,
             MDNode *FPMathTag = nullptr,
             ArrayRef<OperandBundleDef> OpBundles = None)
-      : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() {
+      : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles) {
     SetInsertPoint(TheBB, IP);
   }
 
-  /// \brief Get the constant folder being used.
+  /// Get the constant folder being used.
   const T &getFolder() { return Folder; }
 
-  /// \brief Insert and return the specified instruction.
+  /// Insert and return the specified instruction.
   template<typename InstTy>
   InstTy *Insert(InstTy *I, const Twine &Name = "") const {
     this->InsertHelper(I, Name, BB, InsertPt);
@@ -722,7 +785,7 @@ public:
     return I;
   }
 
-  /// \brief No-op overload to handle constants.
+  /// No-op overload to handle constants.
   Constant *Insert(Constant *C, const Twine& = "") const {
     return C;
   }
@@ -732,7 +795,7 @@ public:
   //===--------------------------------------------------------------------===//
 
 private:
-  /// \brief Helper to add branch weight and unpredictable metadata onto an
+  /// Helper to add branch weight and unpredictable metadata onto an
   /// instruction.
   /// \returns The annotated instruction.
   template <typename InstTy>
@@ -745,17 +808,17 @@ private:
   }
 
 public:
-  /// \brief Create a 'ret void' instruction.
+  /// Create a 'ret void' instruction.
   ReturnInst *CreateRetVoid() {
     return Insert(ReturnInst::Create(Context));
   }
 
-  /// \brief Create a 'ret <val>' instruction.
+  /// Create a 'ret <val>' instruction.
   ReturnInst *CreateRet(Value *V) {
     return Insert(ReturnInst::Create(Context, V));
   }
 
-  /// \brief Create a sequence of N insertvalue instructions,
+  /// Create a sequence of N insertvalue instructions,
   /// with one Value from the retVals array each, that build a aggregate
   /// return value one value at a time, and a ret instruction to return
   /// the resulting aggregate value.
@@ -769,12 +832,12 @@ public:
     return Insert(ReturnInst::Create(Context, V));
   }
 
-  /// \brief Create an unconditional 'br label X' instruction.
+  /// Create an unconditional 'br label X' instruction.
   BranchInst *CreateBr(BasicBlock *Dest) {
     return Insert(BranchInst::Create(Dest));
   }
 
-  /// \brief Create a conditional 'br Cond, TrueDest, FalseDest'
+  /// Create a conditional 'br Cond, TrueDest, FalseDest'
   /// instruction.
   BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False,
                            MDNode *BranchWeights = nullptr,
@@ -783,7 +846,7 @@ public:
                                     BranchWeights, Unpredictable));
   }
 
-  /// \brief Create a conditional 'br Cond, TrueDest, FalseDest'
+  /// Create a conditional 'br Cond, TrueDest, FalseDest'
   /// instruction. Copy branch meta data if available.
   BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False,
                            Instruction *MDSrc) {
@@ -796,7 +859,7 @@ public:
     return Insert(Br);
   }
 
-  /// \brief Create a switch instruction with the specified value, default dest,
+  /// Create a switch instruction with the specified value, default dest,
   /// and with a hint for the number of cases that will be added (for efficient
   /// allocation).
   SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10,
@@ -806,14 +869,14 @@ public:
                                     BranchWeights, Unpredictable));
   }
 
-  /// \brief Create an indirect branch instruction with the specified address
+  /// Create an indirect branch instruction with the specified address
   /// operand, with an optional hint for the number of destinations that will be
   /// added (for efficient allocation).
   IndirectBrInst *CreateIndirectBr(Value *Addr, unsigned NumDests = 10) {
     return Insert(IndirectBrInst::Create(Addr, NumDests));
   }
 
-  /// \brief Create an invoke instruction.
+  /// Create an invoke instruction.
   InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
                            BasicBlock *UnwindDest,
                            ArrayRef<Value *> Args = None,
@@ -878,150 +941,128 @@ private:
     return BO;
   }
 
-  Instruction *AddFPMathAttributes(Instruction *I,
-                                   MDNode *FPMathTag,
-                                   FastMathFlags FMF) const {
-    if (!FPMathTag)
-      FPMathTag = DefaultFPMathTag;
-    if (FPMathTag)
-      I->setMetadata(LLVMContext::MD_fpmath, FPMathTag);
+  Instruction *setFPAttrs(Instruction *I, MDNode *FPMD,
+                          FastMathFlags FMF) const {
+    if (!FPMD)
+      FPMD = DefaultFPMathTag;
+    if (FPMD)
+      I->setMetadata(LLVMContext::MD_fpmath, FPMD);
     I->setFastMathFlags(FMF);
     return I;
   }
 
+  Value *foldConstant(Instruction::BinaryOps Opc, Value *L,
+                      Value *R, const Twine &Name = nullptr) const {
+    auto *LC = dyn_cast<Constant>(L);
+    auto *RC = dyn_cast<Constant>(R);
+    return (LC && RC) ? Insert(Folder.CreateBinOp(Opc, LC, RC), Name) : nullptr;
+  }
+
 public:
   Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (auto *LC = dyn_cast<Constant>(LHS))
+      if (auto *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateAdd(LC, RC, HasNUW, HasNSW), Name);
     return CreateInsertNUWNSWBinOp(Instruction::Add, LHS, RHS, Name,
                                    HasNUW, HasNSW);
   }
+
   Value *CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateAdd(LHS, RHS, Name, false, true);
   }
+
   Value *CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateAdd(LHS, RHS, Name, true, false);
   }
-  Value *CreateFAdd(Value *LHS, Value *RHS, const Twine &Name = "",
-                    MDNode *FPMathTag = nullptr) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFAdd(LC, RC), Name);
-    return Insert(AddFPMathAttributes(BinaryOperator::CreateFAdd(LHS, RHS),
-                                      FPMathTag, FMF), Name);
-  }
+
   Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (auto *LC = dyn_cast<Constant>(LHS))
+      if (auto *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateSub(LC, RC, HasNUW, HasNSW), Name);
     return CreateInsertNUWNSWBinOp(Instruction::Sub, LHS, RHS, Name,
                                    HasNUW, HasNSW);
   }
+
   Value *CreateNSWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateSub(LHS, RHS, Name, false, true);
   }
+
   Value *CreateNUWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateSub(LHS, RHS, Name, true, false);
   }
-  Value *CreateFSub(Value *LHS, Value *RHS, const Twine &Name = "",
-                    MDNode *FPMathTag = nullptr) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFSub(LC, RC), Name);
-    return Insert(AddFPMathAttributes(BinaryOperator::CreateFSub(LHS, RHS),
-                                      FPMathTag, FMF), Name);
-  }
+
   Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (auto *LC = dyn_cast<Constant>(LHS))
+      if (auto *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateMul(LC, RC, HasNUW, HasNSW), Name);
     return CreateInsertNUWNSWBinOp(Instruction::Mul, LHS, RHS, Name,
                                    HasNUW, HasNSW);
   }
+
   Value *CreateNSWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateMul(LHS, RHS, Name, false, true);
   }
+
   Value *CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateMul(LHS, RHS, Name, true, false);
   }
-  Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "",
-                    MDNode *FPMathTag = nullptr) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFMul(LC, RC), Name);
-    return Insert(AddFPMathAttributes(BinaryOperator::CreateFMul(LHS, RHS),
-                                      FPMathTag, FMF), Name);
-  }
+
   Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (auto *LC = dyn_cast<Constant>(LHS))
+      if (auto *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateUDiv(LC, RC, isExact), Name);
     if (!isExact)
       return Insert(BinaryOperator::CreateUDiv(LHS, RHS), Name);
     return Insert(BinaryOperator::CreateExactUDiv(LHS, RHS), Name);
   }
+
   Value *CreateExactUDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateUDiv(LHS, RHS, Name, true);
   }
+
   Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (auto *LC = dyn_cast<Constant>(LHS))
+      if (auto *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateSDiv(LC, RC, isExact), Name);
     if (!isExact)
       return Insert(BinaryOperator::CreateSDiv(LHS, RHS), Name);
     return Insert(BinaryOperator::CreateExactSDiv(LHS, RHS), Name);
   }
+
   Value *CreateExactSDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateSDiv(LHS, RHS, Name, true);
   }
-  Value *CreateFDiv(Value *LHS, Value *RHS, const Twine &Name = "",
-                    MDNode *FPMathTag = nullptr) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFDiv(LC, RC), Name);
-    return Insert(AddFPMathAttributes(BinaryOperator::CreateFDiv(LHS, RHS),
-                                      FPMathTag, FMF), Name);
-  }
+
   Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateURem(LC, RC), Name);
+    if (Value *V = foldConstant(Instruction::URem, LHS, RHS, Name)) return V;
     return Insert(BinaryOperator::CreateURem(LHS, RHS), Name);
   }
+
   Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateSRem(LC, RC), Name);
+    if (Value *V = foldConstant(Instruction::SRem, LHS, RHS, Name)) return V;
     return Insert(BinaryOperator::CreateSRem(LHS, RHS), Name);
   }
-  Value *CreateFRem(Value *LHS, Value *RHS, const Twine &Name = "",
-                    MDNode *FPMathTag = nullptr) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFRem(LC, RC), Name);
-    return Insert(AddFPMathAttributes(BinaryOperator::CreateFRem(LHS, RHS),
-                                      FPMathTag, FMF), Name);
-  }
 
   Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (auto *LC = dyn_cast<Constant>(LHS))
+      if (auto *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateShl(LC, RC, HasNUW, HasNSW), Name);
     return CreateInsertNUWNSWBinOp(Instruction::Shl, LHS, RHS, Name,
                                    HasNUW, HasNSW);
   }
+
   Value *CreateShl(Value *LHS, const APInt &RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
     return CreateShl(LHS, ConstantInt::get(LHS->getType(), RHS), Name,
                      HasNUW, HasNSW);
   }
+
   Value *CreateShl(Value *LHS, uint64_t RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
     return CreateShl(LHS, ConstantInt::get(LHS->getType(), RHS), Name,
@@ -1030,17 +1071,19 @@ public:
 
   Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (auto *LC = dyn_cast<Constant>(LHS))
+      if (auto *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateLShr(LC, RC, isExact), Name);
     if (!isExact)
       return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name);
     return Insert(BinaryOperator::CreateExactLShr(LHS, RHS), Name);
   }
+
   Value *CreateLShr(Value *LHS, const APInt &RHS, const Twine &Name = "",
                     bool isExact = false) {
     return CreateLShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
   }
+
   Value *CreateLShr(Value *LHS, uint64_t RHS, const Twine &Name = "",
                     bool isExact = false) {
     return CreateLShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
@@ -1048,103 +1091,196 @@ public:
 
   Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (auto *LC = dyn_cast<Constant>(LHS))
+      if (auto *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateAShr(LC, RC, isExact), Name);
     if (!isExact)
       return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
     return Insert(BinaryOperator::CreateExactAShr(LHS, RHS), Name);
   }
+
   Value *CreateAShr(Value *LHS, const APInt &RHS, const Twine &Name = "",
                     bool isExact = false) {
     return CreateAShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
   }
+
   Value *CreateAShr(Value *LHS, uint64_t RHS, const Twine &Name = "",
                     bool isExact = false) {
     return CreateAShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
   }
 
   Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *RC = dyn_cast<Constant>(RHS)) {
+    if (auto *RC = dyn_cast<Constant>(RHS)) {
       if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isMinusOne())
         return LHS;  // LHS & -1 -> LHS
-      if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (auto *LC = dyn_cast<Constant>(LHS))
         return Insert(Folder.CreateAnd(LC, RC), Name);
     }
     return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name);
   }
+
   Value *CreateAnd(Value *LHS, const APInt &RHS, const Twine &Name = "") {
     return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
+
   Value *CreateAnd(Value *LHS, uint64_t RHS, const Twine &Name = "") {
     return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
 
   Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *RC = dyn_cast<Constant>(RHS)) {
+    if (auto *RC = dyn_cast<Constant>(RHS)) {
       if (RC->isNullValue())
         return LHS;  // LHS | 0 -> LHS
-      if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (auto *LC = dyn_cast<Constant>(LHS))
         return Insert(Folder.CreateOr(LC, RC), Name);
     }
     return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
   }
+
   Value *CreateOr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
     return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
+
   Value *CreateOr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
     return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
 
   Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateXor(LC, RC), Name);
+    if (Value *V = foldConstant(Instruction::Xor, LHS, RHS, Name)) return V;
     return Insert(BinaryOperator::CreateXor(LHS, RHS), Name);
   }
+
   Value *CreateXor(Value *LHS, const APInt &RHS, const Twine &Name = "") {
     return CreateXor(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
+
   Value *CreateXor(Value *LHS, uint64_t RHS, const Twine &Name = "") {
     return CreateXor(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
 
+  Value *CreateFAdd(Value *L, Value *R, const Twine &Name = "",
+                    MDNode *FPMD = nullptr) {
+    if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V;
+    Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), FPMD, FMF);
+    return Insert(I, Name);
+  }
+
+  /// Copy fast-math-flags from an instruction rather than using the builder's
+  /// default FMF.
+  Value *CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource,
+                       const Twine &Name = "") {
+    if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V;
+    Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), nullptr,
+                                FMFSource->getFastMathFlags());
+    return Insert(I, Name);
+  }
+
+  Value *CreateFSub(Value *L, Value *R, const Twine &Name = "",
+                    MDNode *FPMD = nullptr) {
+    if (Value *V = foldConstant(Instruction::FSub, L, R, Name)) return V;
+    Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), FPMD, FMF);
+    return Insert(I, Name);
+  }
+
+  /// Copy fast-math-flags from an instruction rather than using the builder's
+  /// default FMF.
+  Value *CreateFSubFMF(Value *L, Value *R, Instruction *FMFSource,
+                       const Twine &Name = "") {
+    if (Value *V = foldConstant(Instruction::FSub, L, R, Name)) return V;
+    Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), nullptr,
+                                FMFSource->getFastMathFlags());
+    return Insert(I, Name);
+  }
+
+  Value *CreateFMul(Value *L, Value *R, const Twine &Name = "",
+                    MDNode *FPMD = nullptr) {
+    if (Value *V = foldConstant(Instruction::FMul, L, R, Name)) return V;
+    Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), FPMD, FMF);
+    return Insert(I, Name);
+  }
+
+  /// Copy fast-math-flags from an instruction rather than using the builder's
+  /// default FMF.
+  Value *CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource,
+                       const Twine &Name = "") {
+    if (Value *V = foldConstant(Instruction::FMul, L, R, Name)) return V;
+    Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), nullptr,
+                                FMFSource->getFastMathFlags());
+    return Insert(I, Name);
+  }
+
+  Value *CreateFDiv(Value *L, Value *R, const Twine &Name = "",
+                    MDNode *FPMD = nullptr) {
+    if (Value *V = foldConstant(Instruction::FDiv, L, R, Name)) return V;
+    Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), FPMD, FMF);
+    return Insert(I, Name);
+  }
+
+  /// Copy fast-math-flags from an instruction rather than using the builder's
+  /// default FMF.
+  Value *CreateFDivFMF(Value *L, Value *R, Instruction *FMFSource,
+                       const Twine &Name = "") {
+    if (Value *V = foldConstant(Instruction::FDiv, L, R, Name)) return V;
+    Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), nullptr,
+                                FMFSource->getFastMathFlags());
+    return Insert(I, Name);
+  }
+
+  Value *CreateFRem(Value *L, Value *R, const Twine &Name = "",
+                    MDNode *FPMD = nullptr) {
+    if (Value *V = foldConstant(Instruction::FRem, L, R, Name)) return V;
+    Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), FPMD, FMF);
+    return Insert(I, Name);
+  }
+
+  /// Copy fast-math-flags from an instruction rather than using the builder's
+  /// default FMF.
+  Value *CreateFRemFMF(Value *L, Value *R, Instruction *FMFSource,
+                       const Twine &Name = "") {
+    if (Value *V = foldConstant(Instruction::FRem, L, R, Name)) return V;
+    Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), nullptr,
+                                FMFSource->getFastMathFlags());
+    return Insert(I, Name);
+  }
+
   Value *CreateBinOp(Instruction::BinaryOps Opc,
                      Value *LHS, Value *RHS, const Twine &Name = "",
                      MDNode *FPMathTag = nullptr) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateBinOp(Opc, LC, RC), Name);
+    if (Value *V = foldConstant(Opc, LHS, RHS, Name)) return V;
     Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS);
     if (isa<FPMathOperator>(BinOp))
-      BinOp = AddFPMathAttributes(BinOp, FPMathTag, FMF);
+      BinOp = setFPAttrs(BinOp, FPMathTag, FMF);
     return Insert(BinOp, Name);
   }
 
   Value *CreateNeg(Value *V, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
-    if (Constant *VC = dyn_cast<Constant>(V))
+    if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateNeg(VC, HasNUW, HasNSW), Name);
     BinaryOperator *BO = Insert(BinaryOperator::CreateNeg(V), Name);
     if (HasNUW) BO->setHasNoUnsignedWrap();
     if (HasNSW) BO->setHasNoSignedWrap();
     return BO;
   }
+
   Value *CreateNSWNeg(Value *V, const Twine &Name = "") {
     return CreateNeg(V, Name, false, true);
   }
+
   Value *CreateNUWNeg(Value *V, const Twine &Name = "") {
     return CreateNeg(V, Name, true, false);
   }
+
   Value *CreateFNeg(Value *V, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
-    if (Constant *VC = dyn_cast<Constant>(V))
+    if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateFNeg(VC), Name);
-    return Insert(AddFPMathAttributes(BinaryOperator::CreateFNeg(V),
-                                      FPMathTag, FMF), Name);
+    return Insert(setFPAttrs(BinaryOperator::CreateFNeg(V), FPMathTag, FMF),
+                  Name);
   }
+
   Value *CreateNot(Value *V, const Twine &Name = "") {
-    if (Constant *VC = dyn_cast<Constant>(V))
+    if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateNot(VC), Name);
     return Insert(BinaryOperator::CreateNot(V), Name);
   }
@@ -1163,26 +1299,32 @@ public:
     const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
     return Insert(new AllocaInst(Ty, DL.getAllocaAddrSpace(), ArraySize), Name);
   }
-  // \brief Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of
-  // converting the string to 'bool' for the isVolatile parameter.
+
+  /// Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of
+  /// converting the string to 'bool' for the isVolatile parameter.
   LoadInst *CreateLoad(Value *Ptr, const char *Name) {
     return Insert(new LoadInst(Ptr), Name);
   }
+
   LoadInst *CreateLoad(Value *Ptr, const Twine &Name = "") {
     return Insert(new LoadInst(Ptr), Name);
   }
+
   LoadInst *CreateLoad(Type *Ty, Value *Ptr, const Twine &Name = "") {
     return Insert(new LoadInst(Ty, Ptr), Name);
   }
+
   LoadInst *CreateLoad(Value *Ptr, bool isVolatile, const Twine &Name = "") {
     return Insert(new LoadInst(Ptr, nullptr, isVolatile), Name);
   }
+
   StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
     return Insert(new StoreInst(Val, Ptr, isVolatile));
   }
-  // \brief Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")'
-  // correctly, instead of converting the string to 'bool' for the isVolatile
-  // parameter.
+
+  /// Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")'
+  /// correctly, instead of converting the string to 'bool' for the isVolatile
+  /// parameter.
   LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) {
     LoadInst *LI = CreateLoad(Ptr, Name);
     LI->setAlignment(Align);
@@ -1200,17 +1342,20 @@ public:
     LI->setAlignment(Align);
     return LI;
   }
+
   StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align,
                                 bool isVolatile = false) {
     StoreInst *SI = CreateStore(Val, Ptr, isVolatile);
     SI->setAlignment(Align);
     return SI;
   }
+
   FenceInst *CreateFence(AtomicOrdering Ordering,
                          SyncScope::ID SSID = SyncScope::System,
                          const Twine &Name = "") {
     return Insert(new FenceInst(Context, Ordering, SSID), Name);
   }
+
   AtomicCmpXchgInst *
   CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New,
                       AtomicOrdering SuccessOrdering,
@@ -1219,18 +1364,21 @@ public:
     return Insert(new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering,
                                         FailureOrdering, SSID));
   }
+
   AtomicRMWInst *CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val,
                                  AtomicOrdering Ordering,
                                  SyncScope::ID SSID = SyncScope::System) {
     return Insert(new AtomicRMWInst(Op, Ptr, Val, Ordering, SSID));
   }
+
   Value *CreateGEP(Value *Ptr, ArrayRef<Value *> IdxList,
                    const Twine &Name = "") {
     return CreateGEP(nullptr, Ptr, IdxList, Name);
   }
+
   Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
                    const Twine &Name = "") {
-    if (Constant *PC = dyn_cast<Constant>(Ptr)) {
+    if (auto *PC = dyn_cast<Constant>(Ptr)) {
       // Every index must be constant.
       size_t i, e;
       for (i = 0, e = IdxList.size(); i != e; ++i)
@@ -1241,13 +1389,15 @@ public:
     }
     return Insert(GetElementPtrInst::Create(Ty, Ptr, IdxList), Name);
   }
+
   Value *CreateInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList,
                            const Twine &Name = "") {
     return CreateInBoundsGEP(nullptr, Ptr, IdxList, Name);
   }
+
   Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
                            const Twine &Name = "") {
-    if (Constant *PC = dyn_cast<Constant>(Ptr)) {
+    if (auto *PC = dyn_cast<Constant>(Ptr)) {
       // Every index must be constant.
       size_t i, e;
       for (i = 0, e = IdxList.size(); i != e; ++i)
@@ -1259,43 +1409,50 @@ public:
     }
     return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, IdxList), Name);
   }
+
   Value *CreateGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
     return CreateGEP(nullptr, Ptr, Idx, Name);
   }
+
   Value *CreateGEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "") {
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      if (Constant *IC = dyn_cast<Constant>(Idx))
+    if (auto *PC = dyn_cast<Constant>(Ptr))
+      if (auto *IC = dyn_cast<Constant>(Idx))
         return Insert(Folder.CreateGetElementPtr(Ty, PC, IC), Name);
     return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name);
   }
+
   Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, Value *Idx,
                            const Twine &Name = "") {
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      if (Constant *IC = dyn_cast<Constant>(Idx))
+    if (auto *PC = dyn_cast<Constant>(Ptr))
+      if (auto *IC = dyn_cast<Constant>(Idx))
         return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, IC), Name);
     return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
   }
+
   Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name = "") {
     return CreateConstGEP1_32(nullptr, Ptr, Idx0, Name);
   }
+
   Value *CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0,
                             const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
+    if (auto *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(Ty, PC, Idx), Name);
 
     return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name);
   }
+
   Value *CreateConstInBoundsGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0,
                                     const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
+    if (auto *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idx), Name);
 
     return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
   }
+
   Value *CreateConstGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1,
                             const Twine &Name = "") {
     Value *Idxs[] = {
@@ -1303,11 +1460,12 @@ public:
       ConstantInt::get(Type::getInt32Ty(Context), Idx1)
     };
 
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
+    if (auto *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(Ty, PC, Idxs), Name);
 
     return Insert(GetElementPtrInst::Create(Ty, Ptr, Idxs), Name);
   }
+
   Value *CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0,
                                     unsigned Idx1, const Twine &Name = "") {
     Value *Idxs[] = {
@@ -1315,28 +1473,31 @@ public:
       ConstantInt::get(Type::getInt32Ty(Context), Idx1)
     };
 
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
+    if (auto *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idxs), Name);
 
     return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idxs), Name);
   }
+
   Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
+    if (auto *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idx), Name);
 
     return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idx), Name);
   }
+
   Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
                                     const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
+    if (auto *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idx), Name);
 
     return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idx), Name);
   }
+
   Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
                     const Twine &Name = "") {
     Value *Idxs[] = {
@@ -1344,11 +1505,12 @@ public:
       ConstantInt::get(Type::getInt64Ty(Context), Idx1)
     };
 
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
+    if (auto *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idxs), Name);
 
     return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idxs), Name);
   }
+
   Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
                                     const Twine &Name = "") {
     Value *Idxs[] = {
@@ -1356,25 +1518,31 @@ public:
       ConstantInt::get(Type::getInt64Ty(Context), Idx1)
     };
 
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
+    if (auto *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idxs),
                     Name);
 
     return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idxs), Name);
   }
+
   Value *CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx,
                          const Twine &Name = "") {
     return CreateConstInBoundsGEP2_32(Ty, Ptr, 0, Idx, Name);
   }
 
-  /// \brief Same as CreateGlobalString, but return a pointer with "i8*" type
+  Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = "") {
+    return CreateConstInBoundsGEP2_32(nullptr, Ptr, 0, Idx, Name);
+  }
+
+  /// Same as CreateGlobalString, but return a pointer with "i8*" type
   /// instead of a pointer to array of i8.
-  Value *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "",
-                               unsigned AddressSpace = 0) {
-    GlobalVariable *gv = CreateGlobalString(Str, Name, AddressSpace);
-    Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
-    Value *Args[] = { zero, zero };
-    return CreateInBoundsGEP(gv->getValueType(), gv, Args, Name);
+  Constant *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "",
+                                  unsigned AddressSpace = 0) {
+    GlobalVariable *GV = CreateGlobalString(Str, Name, AddressSpace);
+    Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
+    Constant *Indices[] = {Zero, Zero};
+    return ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV,
+                                                  Indices);
   }
 
   //===--------------------------------------------------------------------===//
@@ -1384,13 +1552,16 @@ public:
   Value *CreateTrunc(Value *V, Type *DestTy, const Twine &Name = "") {
     return CreateCast(Instruction::Trunc, V, DestTy, Name);
   }
+
   Value *CreateZExt(Value *V, Type *DestTy, const Twine &Name = "") {
     return CreateCast(Instruction::ZExt, V, DestTy, Name);
   }
+
   Value *CreateSExt(Value *V, Type *DestTy, const Twine &Name = "") {
     return CreateCast(Instruction::SExt, V, DestTy, Name);
   }
-  /// \brief Create a ZExt or Trunc from the integer value V to DestTy. Return
+
+  /// Create a ZExt or Trunc from the integer value V to DestTy. Return
   /// the value untouched if the type of V is already DestTy.
   Value *CreateZExtOrTrunc(Value *V, Type *DestTy,
                            const Twine &Name = "") {
@@ -1404,7 +1575,8 @@ public:
       return CreateTrunc(V, DestTy, Name);
     return V;
   }
-  /// \brief Create a SExt or Trunc from the integer value V to DestTy. Return
+
+  /// Create a SExt or Trunc from the integer value V to DestTy. Return
   /// the value untouched if the type of V is already DestTy.
   Value *CreateSExtOrTrunc(Value *V, Type *DestTy,
                            const Twine &Name = "") {
@@ -1418,78 +1590,93 @@ public:
       return CreateTrunc(V, DestTy, Name);
     return V;
   }
+
   Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = ""){
     return CreateCast(Instruction::FPToUI, V, DestTy, Name);
   }
+
   Value *CreateFPToSI(Value *V, Type *DestTy, const Twine &Name = ""){
     return CreateCast(Instruction::FPToSI, V, DestTy, Name);
   }
+
   Value *CreateUIToFP(Value *V, Type *DestTy, const Twine &Name = ""){
     return CreateCast(Instruction::UIToFP, V, DestTy, Name);
   }
+
   Value *CreateSIToFP(Value *V, Type *DestTy, const Twine &Name = ""){
     return CreateCast(Instruction::SIToFP, V, DestTy, Name);
   }
+
   Value *CreateFPTrunc(Value *V, Type *DestTy,
                        const Twine &Name = "") {
     return CreateCast(Instruction::FPTrunc, V, DestTy, Name);
   }
+
   Value *CreateFPExt(Value *V, Type *DestTy, const Twine &Name = "") {
     return CreateCast(Instruction::FPExt, V, DestTy, Name);
   }
+
   Value *CreatePtrToInt(Value *V, Type *DestTy,
                         const Twine &Name = "") {
     return CreateCast(Instruction::PtrToInt, V, DestTy, Name);
   }
+
   Value *CreateIntToPtr(Value *V, Type *DestTy,
                         const Twine &Name = "") {
     return CreateCast(Instruction::IntToPtr, V, DestTy, Name);
   }
+
   Value *CreateBitCast(Value *V, Type *DestTy,
                        const Twine &Name = "") {
     return CreateCast(Instruction::BitCast, V, DestTy, Name);
   }
+
   Value *CreateAddrSpaceCast(Value *V, Type *DestTy,
                              const Twine &Name = "") {
     return CreateCast(Instruction::AddrSpaceCast, V, DestTy, Name);
   }
+
   Value *CreateZExtOrBitCast(Value *V, Type *DestTy,
                              const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
+    if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateZExtOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
   }
+
   Value *CreateSExtOrBitCast(Value *V, Type *DestTy,
                              const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
+    if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateSExtOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
   }
+
   Value *CreateTruncOrBitCast(Value *V, Type *DestTy,
                               const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
+    if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateTruncOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateTruncOrBitCast(V, DestTy), Name);
   }
+
   Value *CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy,
                     const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
+    if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateCast(Op, VC, DestTy), Name);
     return Insert(CastInst::Create(Op, V, DestTy), Name);
   }
+
   Value *CreatePointerCast(Value *V, Type *DestTy,
                            const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
+    if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreatePointerCast(VC, DestTy), Name);
     return Insert(CastInst::CreatePointerCast(V, DestTy), Name);
   }
@@ -1499,7 +1686,7 @@ public:
     if (V->getType() == DestTy)
       return V;
 
-    if (Constant *VC = dyn_cast<Constant>(V)) {
+    if (auto *VC = dyn_cast<Constant>(V)) {
       return Insert(Folder.CreatePointerBitCastOrAddrSpaceCast(VC, DestTy),
                     Name);
     }
@@ -1512,7 +1699,7 @@ public:
                        const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
+    if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateIntCast(VC, DestTy, isSigned), Name);
     return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name);
   }
@@ -1529,16 +1716,15 @@ public:
     return CreateBitCast(V, DestTy, Name);
   }
 
-public:
   Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
+    if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateFPCast(VC, DestTy), Name);
     return Insert(CastInst::CreateFPCast(V, DestTy), Name);
   }
 
-  // \brief Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a
+  // Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a
   // compile time error, instead of converting the string to bool for the
   // isSigned parameter.
   Value *CreateIntCast(Value *, Type *, const char *) = delete;
@@ -1550,30 +1736,39 @@ public:
   Value *CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_EQ, LHS, RHS, Name);
   }
+
   Value *CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_NE, LHS, RHS, Name);
   }
+
   Value *CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_UGT, LHS, RHS, Name);
   }
+
   Value *CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_UGE, LHS, RHS, Name);
   }
+
   Value *CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_ULT, LHS, RHS, Name);
   }
+
   Value *CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_ULE, LHS, RHS, Name);
   }
+
   Value *CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_SGT, LHS, RHS, Name);
   }
+
   Value *CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_SGE, LHS, RHS, Name);
   }
+
   Value *CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_SLT, LHS, RHS, Name);
   }
+
   Value *CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name = "") {
     return CreateICmp(ICmpInst::ICMP_SLE, LHS, RHS, Name);
   }
@@ -1582,54 +1777,67 @@ public:
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_OEQ, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpOGT(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_OGT, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_OGE, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_OLT, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpOLE(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_OLE, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpONE(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_ONE, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpORD(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_ORD, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_UNO, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpUEQ(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_UEQ, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpUGT(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_UGT, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpUGE(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_UGE, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpULT(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_ULT, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpULE(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_ULE, LHS, RHS, Name, FPMathTag);
   }
+
   Value *CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     return CreateFCmp(FCmpInst::FCMP_UNE, LHS, RHS, Name, FPMathTag);
@@ -1637,18 +1845,18 @@ public:
 
   Value *CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
                     const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (auto *LC = dyn_cast<Constant>(LHS))
+      if (auto *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateICmp(P, LC, RC), Name);
     return Insert(new ICmpInst(P, LHS, RHS), Name);
   }
+
   Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
                     const Twine &Name = "", MDNode *FPMathTag = nullptr) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
+    if (auto *LC = dyn_cast<Constant>(LHS))
+      if (auto *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFCmp(P, LC, RC), Name);
-    return Insert(AddFPMathAttributes(new FCmpInst(P, LHS, RHS),
-                                      FPMathTag, FMF), Name);
+    return Insert(setFPAttrs(new FCmpInst(P, LHS, RHS), FPMathTag, FMF), Name);
   }
 
   //===--------------------------------------------------------------------===//
@@ -1662,8 +1870,8 @@ public:
 
   CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
                        const Twine &Name = "", MDNode *FPMathTag = nullptr) {
-    PointerType *PTy = cast<PointerType>(Callee->getType());
-    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    auto *PTy = cast<PointerType>(Callee->getType());
+    auto *FTy = cast<FunctionType>(PTy->getElementType());
     return CreateCall(FTy, Callee, Args, Name, FPMathTag);
   }
 
@@ -1672,7 +1880,7 @@ public:
                        MDNode *FPMathTag = nullptr) {
     CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles);
     if (isa<FPMathOperator>(CI))
-      CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
+      CI = cast<CallInst>(setFPAttrs(CI, FPMathTag, FMF));
     return Insert(CI, Name);
   }
 
@@ -1681,7 +1889,7 @@ public:
                        const Twine &Name = "", MDNode *FPMathTag = nullptr) {
     CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
     if (isa<FPMathOperator>(CI))
-      CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
+      CI = cast<CallInst>(setFPAttrs(CI, FPMathTag, FMF));
     return Insert(CI, Name);
   }
 
@@ -1692,9 +1900,9 @@ public:
 
   Value *CreateSelect(Value *C, Value *True, Value *False,
                       const Twine &Name = "", Instruction *MDFrom = nullptr) {
-    if (Constant *CC = dyn_cast<Constant>(C))
-      if (Constant *TC = dyn_cast<Constant>(True))
-        if (Constant *FC = dyn_cast<Constant>(False))
+    if (auto *CC = dyn_cast<Constant>(C))
+      if (auto *TC = dyn_cast<Constant>(True))
+        if (auto *FC = dyn_cast<Constant>(False))
           return Insert(Folder.CreateSelect(CC, TC, FC), Name);
 
     SelectInst *Sel = SelectInst::Create(C, True, False);
@@ -1712,8 +1920,8 @@ public:
 
   Value *CreateExtractElement(Value *Vec, Value *Idx,
                               const Twine &Name = "") {
-    if (Constant *VC = dyn_cast<Constant>(Vec))
-      if (Constant *IC = dyn_cast<Constant>(Idx))
+    if (auto *VC = dyn_cast<Constant>(Vec))
+      if (auto *IC = dyn_cast<Constant>(Idx))
         return Insert(Folder.CreateExtractElement(VC, IC), Name);
     return Insert(ExtractElementInst::Create(Vec, Idx), Name);
   }
@@ -1725,9 +1933,9 @@ public:
 
   Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
                              const Twine &Name = "") {
-    if (Constant *VC = dyn_cast<Constant>(Vec))
-      if (Constant *NC = dyn_cast<Constant>(NewElt))
-        if (Constant *IC = dyn_cast<Constant>(Idx))
+    if (auto *VC = dyn_cast<Constant>(Vec))
+      if (auto *NC = dyn_cast<Constant>(NewElt))
+        if (auto *IC = dyn_cast<Constant>(Idx))
           return Insert(Folder.CreateInsertElement(VC, NC, IC), Name);
     return Insert(InsertElementInst::Create(Vec, NewElt, Idx), Name);
   }
@@ -1739,9 +1947,9 @@ public:
 
   Value *CreateShuffleVector(Value *V1, Value *V2, Value *Mask,
                              const Twine &Name = "") {
-    if (Constant *V1C = dyn_cast<Constant>(V1))
-      if (Constant *V2C = dyn_cast<Constant>(V2))
-        if (Constant *MC = dyn_cast<Constant>(Mask))
+    if (auto *V1C = dyn_cast<Constant>(V1))
+      if (auto *V2C = dyn_cast<Constant>(V2))
+        if (auto *MC = dyn_cast<Constant>(Mask))
           return Insert(Folder.CreateShuffleVector(V1C, V2C, MC), Name);
     return Insert(new ShuffleVectorInst(V1, V2, Mask), Name);
   }
@@ -1755,7 +1963,7 @@ public:
   Value *CreateExtractValue(Value *Agg,
                             ArrayRef<unsigned> Idxs,
                             const Twine &Name = "") {
-    if (Constant *AggC = dyn_cast<Constant>(Agg))
+    if (auto *AggC = dyn_cast<Constant>(Agg))
       return Insert(Folder.CreateExtractValue(AggC, Idxs), Name);
     return Insert(ExtractValueInst::Create(Agg, Idxs), Name);
   }
@@ -1763,8 +1971,8 @@ public:
   Value *CreateInsertValue(Value *Agg, Value *Val,
                            ArrayRef<unsigned> Idxs,
                            const Twine &Name = "") {
-    if (Constant *AggC = dyn_cast<Constant>(Agg))
-      if (Constant *ValC = dyn_cast<Constant>(Val))
+    if (auto *AggC = dyn_cast<Constant>(Agg))
+      if (auto *ValC = dyn_cast<Constant>(Val))
         return Insert(Folder.CreateInsertValue(AggC, ValC, Idxs), Name);
     return Insert(InsertValueInst::Create(Agg, Val, Idxs), Name);
   }
@@ -1778,19 +1986,19 @@ public:
   // Utility creation methods
   //===--------------------------------------------------------------------===//
 
-  /// \brief Return an i1 value testing if \p Arg is null.
+  /// Return an i1 value testing if \p Arg is null.
   Value *CreateIsNull(Value *Arg, const Twine &Name = "") {
     return CreateICmpEQ(Arg, Constant::getNullValue(Arg->getType()),
                         Name);
   }
 
-  /// \brief Return an i1 value testing if \p Arg is not null.
+  /// Return an i1 value testing if \p Arg is not null.
   Value *CreateIsNotNull(Value *Arg, const Twine &Name = "") {
     return CreateICmpNE(Arg, Constant::getNullValue(Arg->getType()),
                         Name);
   }
 
-  /// \brief Return the i64 difference between two pointer values, dividing out
+  /// Return the i64 difference between two pointer values, dividing out
   /// the size of the pointed-to objects.
   ///
   /// This is intended to implement C-style pointer subtraction. As such, the
@@ -1799,7 +2007,7 @@ public:
   Value *CreatePtrDiff(Value *LHS, Value *RHS, const Twine &Name = "") {
     assert(LHS->getType() == RHS->getType() &&
            "Pointer subtraction operand types must match!");
-    PointerType *ArgType = cast<PointerType>(LHS->getType());
+    auto *ArgType = cast<PointerType>(LHS->getType());
     Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
     Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
     Value *Difference = CreateSub(LHS_int, RHS_int);
@@ -1808,35 +2016,62 @@ public:
                            Name);
   }
 
-  /// \brief Create an invariant.group.barrier intrinsic call, that stops
-  /// optimizer to propagate equality using invariant.group metadata.
-  /// If Ptr type is different from pointer to i8, it's casted to pointer to i8
-  /// in the same address space before call and casted back to Ptr type after
-  /// call.
-  Value *CreateInvariantGroupBarrier(Value *Ptr) {
+  /// Create a launder.invariant.group intrinsic call. If Ptr type is
+  /// different from pointer to i8, it's casted to pointer to i8 in the same
+  /// address space before call and casted back to Ptr type after call.
+  Value *CreateLaunderInvariantGroup(Value *Ptr) {
     assert(isa<PointerType>(Ptr->getType()) &&
-           "invariant.group.barrier only applies to pointers.");
+           "launder.invariant.group only applies to pointers.");
+    // FIXME: we could potentially avoid casts to/from i8*.
     auto *PtrType = Ptr->getType();
     auto *Int8PtrTy = getInt8PtrTy(PtrType->getPointerAddressSpace());
     if (PtrType != Int8PtrTy)
       Ptr = CreateBitCast(Ptr, Int8PtrTy);
     Module *M = BB->getParent()->getParent();
-    Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration(
-        M, Intrinsic::invariant_group_barrier, {Int8PtrTy});
+    Function *FnLaunderInvariantGroup = Intrinsic::getDeclaration(
+        M, Intrinsic::launder_invariant_group, {Int8PtrTy});
 
-    assert(FnInvariantGroupBarrier->getReturnType() == Int8PtrTy &&
-           FnInvariantGroupBarrier->getFunctionType()->getParamType(0) ==
+    assert(FnLaunderInvariantGroup->getReturnType() == Int8PtrTy &&
+           FnLaunderInvariantGroup->getFunctionType()->getParamType(0) ==
                Int8PtrTy &&
-           "InvariantGroupBarrier should take and return the same type");
+           "LaunderInvariantGroup should take and return the same type");
 
-    CallInst *Fn = CreateCall(FnInvariantGroupBarrier, {Ptr});
+    CallInst *Fn = CreateCall(FnLaunderInvariantGroup, {Ptr});
 
     if (PtrType != Int8PtrTy)
       return CreateBitCast(Fn, PtrType);
     return Fn;
   }
 
-  /// \brief Return a vector value that contains \arg V broadcasted to \p
+  /// \brief Create a strip.invariant.group intrinsic call. If Ptr type is
+  /// different from pointer to i8, it's casted to pointer to i8 in the same
+  /// address space before call and casted back to Ptr type after call.
+  Value *CreateStripInvariantGroup(Value *Ptr) {
+    assert(isa<PointerType>(Ptr->getType()) &&
+           "strip.invariant.group only applies to pointers.");
+
+    // FIXME: we could potentially avoid casts to/from i8*.
+    auto *PtrType = Ptr->getType();
+    auto *Int8PtrTy = getInt8PtrTy(PtrType->getPointerAddressSpace());
+    if (PtrType != Int8PtrTy)
+      Ptr = CreateBitCast(Ptr, Int8PtrTy);
+    Module *M = BB->getParent()->getParent();
+    Function *FnStripInvariantGroup = Intrinsic::getDeclaration(
+        M, Intrinsic::strip_invariant_group, {Int8PtrTy});
+
+    assert(FnStripInvariantGroup->getReturnType() == Int8PtrTy &&
+           FnStripInvariantGroup->getFunctionType()->getParamType(0) ==
+               Int8PtrTy &&
+           "StripInvariantGroup should take and return the same type");
+
+    CallInst *Fn = CreateCall(FnStripInvariantGroup, {Ptr});
+
+    if (PtrType != Int8PtrTy)
+      return CreateBitCast(Fn, PtrType);
+    return Fn;
+  }
+
+  /// Return a vector value that contains \arg V broadcasted to \p
   /// NumElts elements.
   Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") {
     assert(NumElts > 0 && "Cannot splat to an empty vector!");
@@ -1852,11 +2087,11 @@ public:
     return CreateShuffleVector(V, Undef, Zeros, Name + ".splat");
   }
 
-  /// \brief Return a value that has been extracted from a larger integer type.
+  /// Return a value that has been extracted from a larger integer type.
   Value *CreateExtractInteger(const DataLayout &DL, Value *From,
                               IntegerType *ExtractedTy, uint64_t Offset,
                               const Twine &Name) {
-    IntegerType *IntTy = cast<IntegerType>(From->getType());
+    auto *IntTy = cast<IntegerType>(From->getType());
     assert(DL.getTypeStoreSize(ExtractedTy) + Offset <=
                DL.getTypeStoreSize(IntTy) &&
            "Element extends past full value");
@@ -1877,7 +2112,7 @@ public:
   }
 
 private:
-  /// \brief Helper function that creates an assume intrinsic call that
+  /// Helper function that creates an assume intrinsic call that
   /// represents an alignment assumption on the provided Ptr, Mask, Type
   /// and Offset.
   CallInst *CreateAlignmentAssumptionHelper(const DataLayout &DL,
@@ -1888,7 +2123,7 @@ private:
 
     if (OffsetValue) {
       bool IsOffsetZero = false;
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(OffsetValue))
+      if (const auto *CI = dyn_cast<ConstantInt>(OffsetValue))
         IsOffsetZero = CI->isZero();
 
       if (!IsOffsetZero) {
@@ -1906,7 +2141,7 @@ private:
   }
 
 public:
-  /// \brief Create an assume intrinsic call that represents an alignment
+  /// Create an assume intrinsic call that represents an alignment
   /// assumption on the provided pointer.
   ///
   /// An optional offset can be provided, and if it is provided, the offset
@@ -1917,15 +2152,15 @@ public:
                                       Value *OffsetValue = nullptr) {
     assert(isa<PointerType>(PtrValue->getType()) &&
            "trying to create an alignment assumption on a non-pointer?");
-    PointerType *PtrTy = cast<PointerType>(PtrValue->getType());
+    auto *PtrTy = cast<PointerType>(PtrValue->getType());
     Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
 
     Value *Mask = ConstantInt::get(IntPtrTy, Alignment > 0 ? Alignment - 1 : 0);
     return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
                                            OffsetValue);
   }
-  //
-  /// \brief Create an assume intrinsic call that represents an alignment
+
+  /// Create an assume intrinsic call that represents an alignment
   /// assumption on the provided pointer.
   ///
   /// An optional offset can be provided, and if it is provided, the offset
@@ -1939,7 +2174,7 @@ public:
                                       Value *OffsetValue = nullptr) {
     assert(isa<PointerType>(PtrValue->getType()) &&
            "trying to create an alignment assumption on a non-pointer?");
-    PointerType *PtrTy = cast<PointerType>(PtrValue->getType());
+    auto *PtrTy = cast<PointerType>(PtrValue->getType());
     Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
 
     if (Alignment->getType() != IntPtrTy)
diff --git a/include/llvm/IR/IRPrintingPasses.h b/include/llvm/IR/IRPrintingPasses.h
index 0825e0696cac..e4ac5d4d88a3 100644
--- a/include/llvm/IR/IRPrintingPasses.h
+++ b/include/llvm/IR/IRPrintingPasses.h
@@ -23,6 +23,7 @@
 #include <string>
 
 namespace llvm {
+class Pass;
 class BasicBlockPass;
 class Function;
 class FunctionPass;
@@ -32,18 +33,18 @@ class PreservedAnalyses;
 class raw_ostream;
 template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager;
 
-/// \brief Create and return a pass that writes the module to the specified
+/// Create and return a pass that writes the module to the specified
 /// \c raw_ostream.
 ModulePass *createPrintModulePass(raw_ostream &OS,
                                   const std::string &Banner = "",
                                   bool ShouldPreserveUseListOrder = false);
 
-/// \brief Create and return a pass that prints functions to the specified
+/// Create and return a pass that prints functions to the specified
 /// \c raw_ostream as they are processed.
 FunctionPass *createPrintFunctionPass(raw_ostream &OS,
                                       const std::string &Banner = "");
 
-/// \brief Create and return a pass that writes the BB to the specified
+/// Create and return a pass that writes the BB to the specified
 /// \c raw_ostream.
 BasicBlockPass *createPrintBasicBlockPass(raw_ostream &OS,
                                           const std::string &Banner = "");
@@ -54,7 +55,10 @@ BasicBlockPass *createPrintBasicBlockPass(raw_ostream &OS,
 /// non-printable characters in it.
 void printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name);
 
-/// \brief Pass for printing a Module as LLVM's text IR assembly.
+/// Return true if a pass is for IR printing.
+bool isIRPrintingPass(Pass *P);
+
+/// Pass for printing a Module as LLVM's text IR assembly.
 ///
 /// Note: This pass is for use with the new pass manager. Use the create...Pass
 /// functions above to create passes for use with the legacy pass manager.
@@ -73,7 +77,7 @@ public:
   static StringRef name() { return "PrintModulePass"; }
 };
 
-/// \brief Pass for printing a Function as LLVM's text IR assembly.
+/// Pass for printing a Function as LLVM's text IR assembly.
 ///
 /// Note: This pass is for use with the new pass manager. Use the create...Pass
 /// functions above to create passes for use with the legacy pass manager.
diff --git a/include/llvm/IR/InstVisitor.h b/include/llvm/IR/InstVisitor.h
index 55579819fd34..65074025a083 100644
--- a/include/llvm/IR/InstVisitor.h
+++ b/include/llvm/IR/InstVisitor.h
@@ -32,7 +32,7 @@ namespace llvm {
                visit##CLASS_TO_VISIT(static_cast<CLASS_TO_VISIT&>(I))
 
 
-/// @brief Base class for instruction visitors
+/// Base class for instruction visitors
 ///
 /// Instruction visitors are used when you want to perform different actions
 /// for different kinds of instructions without having to use lots of casts
@@ -213,6 +213,7 @@ public:
   // Handle the special instrinsic instruction classes.
   RetTy visitDbgDeclareInst(DbgDeclareInst &I)    { DELEGATE(DbgInfoIntrinsic);}
   RetTy visitDbgValueInst(DbgValueInst &I)        { DELEGATE(DbgInfoIntrinsic);}
+  RetTy visitDbgLabelInst(DbgLabelInst &I)        { DELEGATE(DbgInfoIntrinsic);}
   RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { DELEGATE(IntrinsicInst); }
   RetTy visitMemSetInst(MemSetInst &I)            { DELEGATE(MemIntrinsic); }
   RetTy visitMemCpyInst(MemCpyInst &I)            { DELEGATE(MemTransferInst); }
@@ -272,6 +273,7 @@ private:
       default:                     DELEGATE(IntrinsicInst);
       case Intrinsic::dbg_declare: DELEGATE(DbgDeclareInst);
       case Intrinsic::dbg_value:   DELEGATE(DbgValueInst);
+      case Intrinsic::dbg_label:   DELEGATE(DbgLabelInst);
       case Intrinsic::memcpy:      DELEGATE(MemCpyInst);
       case Intrinsic::memmove:     DELEGATE(MemMoveInst);
       case Intrinsic::memset:      DELEGATE(MemSetInst);
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
index 871f702f95f2..ad0012048ac9 100644
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -25,6 +25,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
@@ -80,7 +81,7 @@ public:
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
 
-  // \brief Returns true if this terminator relates to exception handling.
+  // Returns true if this terminator relates to exception handling.
   bool isExceptional() const {
     switch (getOpcode()) {
     case Instruction::CatchSwitch:
@@ -117,7 +118,7 @@ public:
       return idx < TermInst->getNumSuccessors();
     }
 
-    /// \brief Proxy object to allow write access in operator[]
+    /// Proxy object to allow write access in operator[]
     class SuccessorProxy {
       Self it;
 
@@ -391,6 +392,37 @@ public:
     return BO;
   }
 
+  static BinaryOperator *CreateFAddFMF(Value *V1, Value *V2,
+                                       BinaryOperator *FMFSource,
+                                       const Twine &Name = "") {
+    return CreateWithCopiedFlags(Instruction::FAdd, V1, V2, FMFSource, Name);
+  }
+  static BinaryOperator *CreateFSubFMF(Value *V1, Value *V2,
+                                       BinaryOperator *FMFSource,
+                                       const Twine &Name = "") {
+    return CreateWithCopiedFlags(Instruction::FSub, V1, V2, FMFSource, Name);
+  }
+  static BinaryOperator *CreateFMulFMF(Value *V1, Value *V2,
+                                       BinaryOperator *FMFSource,
+                                       const Twine &Name = "") {
+    return CreateWithCopiedFlags(Instruction::FMul, V1, V2, FMFSource, Name);
+  }
+  static BinaryOperator *CreateFDivFMF(Value *V1, Value *V2,
+                                       BinaryOperator *FMFSource,
+                                       const Twine &Name = "") {
+    return CreateWithCopiedFlags(Instruction::FDiv, V1, V2, FMFSource, Name);
+  }
+  static BinaryOperator *CreateFRemFMF(Value *V1, Value *V2,
+                                       BinaryOperator *FMFSource,
+                                       const Twine &Name = "") {
+    return CreateWithCopiedFlags(Instruction::FRem, V1, V2, FMFSource, Name);
+  }
+  static BinaryOperator *CreateFNegFMF(Value *Op, BinaryOperator *FMFSource,
+                                       const Twine &Name = "") {
+    Value *Zero = ConstantFP::getNegativeZero(Op->getType());
+    return CreateWithCopiedFlags(Instruction::FSub, Zero, Op, FMFSource);
+  }
+
   static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
                                    const Twine &Name = "") {
     BinaryOperator *BO = Create(Opc, V1, V2, Name);
@@ -556,16 +588,16 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
 /// can be performed with code like:
 ///
 /// if (isa<CastInst>(Instr)) { ... }
-/// @brief Base class of casting instructions.
+/// Base class of casting instructions.
 class CastInst : public UnaryInstruction {
 protected:
-  /// @brief Constructor with insert-before-instruction semantics for subclasses
+  /// Constructor with insert-before-instruction semantics for subclasses
   CastInst(Type *Ty, unsigned iType, Value *S,
            const Twine &NameStr = "", Instruction *InsertBefore = nullptr)
     : UnaryInstruction(Ty, iType, S, InsertBefore) {
     setName(NameStr);
   }
-  /// @brief Constructor with insert-at-end-of-block semantics for subclasses
+  /// Constructor with insert-at-end-of-block semantics for subclasses
   CastInst(Type *Ty, unsigned iType, Value *S,
            const Twine &NameStr, BasicBlock *InsertAtEnd)
     : UnaryInstruction(Ty, iType, S, InsertAtEnd) {
@@ -578,7 +610,7 @@ public:
   /// CastOps category (Instruction::isCast(opcode) returns true). This
   /// constructor has insert-before-instruction semantics to automatically
   /// insert the new CastInst before InsertBefore (if it is non-null).
-  /// @brief Construct any of the CastInst subclasses
+  /// Construct any of the CastInst subclasses
   static CastInst *Create(
     Instruction::CastOps,    ///< The opcode of the cast instruction
     Value *S,                ///< The value to be casted (operand 0)
@@ -591,7 +623,7 @@ public:
   /// CastOps category. This constructor has insert-at-end-of-block semantics
   /// to automatically insert the new CastInst at the end of InsertAtEnd (if
   /// its non-null).
-  /// @brief Construct any of the CastInst subclasses
+  /// Construct any of the CastInst subclasses
   static CastInst *Create(
     Instruction::CastOps,    ///< The opcode for the cast instruction
     Value *S,                ///< The value to be casted (operand 0)
@@ -600,7 +632,7 @@ public:
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
-  /// @brief Create a ZExt or BitCast cast instruction
+  /// Create a ZExt or BitCast cast instruction
   static CastInst *CreateZExtOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     Type *Ty,          ///< The type to which cast should be made
@@ -608,7 +640,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Place to insert the instruction
   );
 
-  /// @brief Create a ZExt or BitCast cast instruction
+  /// Create a ZExt or BitCast cast instruction
   static CastInst *CreateZExtOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     Type *Ty,          ///< The type to which operand is casted
@@ -616,7 +648,7 @@ public:
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
-  /// @brief Create a SExt or BitCast cast instruction
+  /// Create a SExt or BitCast cast instruction
   static CastInst *CreateSExtOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     Type *Ty,          ///< The type to which cast should be made
@@ -624,7 +656,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Place to insert the instruction
   );
 
-  /// @brief Create a SExt or BitCast cast instruction
+  /// Create a SExt or BitCast cast instruction
   static CastInst *CreateSExtOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     Type *Ty,          ///< The type to which operand is casted
@@ -632,7 +664,7 @@ public:
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
-  /// @brief Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
+  /// Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
   static CastInst *CreatePointerCast(
     Value *S,                ///< The pointer value to be casted (operand 0)
     Type *Ty,          ///< The type to which operand is casted
@@ -640,7 +672,7 @@ public:
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
-  /// @brief Create a BitCast, AddrSpaceCast or a PtrToInt cast instruction.
+  /// Create a BitCast, AddrSpaceCast or a PtrToInt cast instruction.
   static CastInst *CreatePointerCast(
     Value *S,                ///< The pointer value to be casted (operand 0)
     Type *Ty,          ///< The type to which cast should be made
@@ -648,7 +680,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Place to insert the instruction
   );
 
-  /// @brief Create a BitCast or an AddrSpaceCast cast instruction.
+  /// Create a BitCast or an AddrSpaceCast cast instruction.
   static CastInst *CreatePointerBitCastOrAddrSpaceCast(
     Value *S,                ///< The pointer value to be casted (operand 0)
     Type *Ty,          ///< The type to which operand is casted
@@ -656,7 +688,7 @@ public:
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
-  /// @brief Create a BitCast or an AddrSpaceCast cast instruction.
+  /// Create a BitCast or an AddrSpaceCast cast instruction.
   static CastInst *CreatePointerBitCastOrAddrSpaceCast(
     Value *S,                ///< The pointer value to be casted (operand 0)
     Type *Ty,          ///< The type to which cast should be made
@@ -664,7 +696,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Place to insert the instruction
   );
 
-  /// @brief Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
+  /// Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
   ///
   /// If the value is a pointer type and the destination an integer type,
   /// creates a PtrToInt cast. If the value is an integer type and the
@@ -677,7 +709,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Place to insert the instruction
   );
 
-  /// @brief Create a ZExt, BitCast, or Trunc for int -> int casts.
+  /// Create a ZExt, BitCast, or Trunc for int -> int casts.
   static CastInst *CreateIntegerCast(
     Value *S,                ///< The pointer value to be casted (operand 0)
     Type *Ty,          ///< The type to which cast should be made
@@ -686,7 +718,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Place to insert the instruction
   );
 
-  /// @brief Create a ZExt, BitCast, or Trunc for int -> int casts.
+  /// Create a ZExt, BitCast, or Trunc for int -> int casts.
   static CastInst *CreateIntegerCast(
     Value *S,                ///< The integer value to be casted (operand 0)
     Type *Ty,          ///< The integer type to which operand is casted
@@ -695,7 +727,7 @@ public:
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
-  /// @brief Create an FPExt, BitCast, or FPTrunc for fp -> fp casts
+  /// Create an FPExt, BitCast, or FPTrunc for fp -> fp casts
   static CastInst *CreateFPCast(
     Value *S,                ///< The floating point value to be casted
     Type *Ty,          ///< The floating point type to cast to
@@ -703,7 +735,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Place to insert the instruction
   );
 
-  /// @brief Create an FPExt, BitCast, or FPTrunc for fp -> fp casts
+  /// Create an FPExt, BitCast, or FPTrunc for fp -> fp casts
   static CastInst *CreateFPCast(
     Value *S,                ///< The floating point value to be casted
     Type *Ty,          ///< The floating point type to cast to
@@ -711,7 +743,7 @@ public:
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
-  /// @brief Create a Trunc or BitCast cast instruction
+  /// Create a Trunc or BitCast cast instruction
   static CastInst *CreateTruncOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     Type *Ty,          ///< The type to which cast should be made
@@ -719,7 +751,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Place to insert the instruction
   );
 
-  /// @brief Create a Trunc or BitCast cast instruction
+  /// Create a Trunc or BitCast cast instruction
   static CastInst *CreateTruncOrBitCast(
     Value *S,                ///< The value to be casted (operand 0)
     Type *Ty,          ///< The type to which operand is casted
@@ -727,19 +759,19 @@ public:
     BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
   );
 
-  /// @brief Check whether it is valid to call getCastOpcode for these types.
+  /// Check whether it is valid to call getCastOpcode for these types.
   static bool isCastable(
     Type *SrcTy, ///< The Type from which the value should be cast.
     Type *DestTy ///< The Type to which the value should be cast.
   );
 
-  /// @brief Check whether a bitcast between these types is valid
+  /// Check whether a bitcast between these types is valid
   static bool isBitCastable(
     Type *SrcTy, ///< The Type from which the value should be cast.
     Type *DestTy ///< The Type to which the value should be cast.
   );
 
-  /// @brief Check whether a bitcast, inttoptr, or ptrtoint cast between these
+  /// Check whether a bitcast, inttoptr, or ptrtoint cast between these
   /// types is valid and a no-op.
   ///
   /// This ensures that any pointer<->integer cast has enough bits in the
@@ -751,7 +783,7 @@ public:
 
   /// Returns the opcode necessary to cast Val into Ty using usual casting
   /// rules.
-  /// @brief Infer the opcode for cast operand and type
+  /// Infer the opcode for cast operand and type
   static Instruction::CastOps getCastOpcode(
     const Value *Val, ///< The value to cast
     bool SrcIsSigned, ///< Whether to treat the source as signed
@@ -763,14 +795,14 @@ public:
   /// only deals with integer source and destination types. To simplify that
   /// logic, this method is provided.
   /// @returns true iff the cast has only integral typed operand and dest type.
-  /// @brief Determine if this is an integer-only cast.
+  /// Determine if this is an integer-only cast.
   bool isIntegerCast() const;
 
   /// A lossless cast is one that does not alter the basic value. It implies
   /// a no-op cast but is more stringent, preventing things like int->float,
   /// long->double, or int->ptr.
   /// @returns true iff the cast is lossless.
-  /// @brief Determine if this is a lossless cast.
+  /// Determine if this is a lossless cast.
   bool isLosslessCast() const;
 
   /// A no-op cast is one that can be effected without changing any bits.
@@ -779,7 +811,7 @@ public:
   /// involving Integer and Pointer types. They are no-op casts if the integer
   /// is the same size as the pointer. However, pointer size varies with
   /// platform.
-  /// @brief Determine if the described cast is a no-op cast.
+  /// Determine if the described cast is a no-op cast.
   static bool isNoopCast(
     Instruction::CastOps Opcode, ///< Opcode of cast
     Type *SrcTy,         ///< SrcTy of cast
@@ -787,7 +819,7 @@ public:
     const DataLayout &DL ///< DataLayout to get the Int Ptr type from.
   );
 
-  /// @brief Determine if this cast is a no-op cast.
+  /// Determine if this cast is a no-op cast.
   ///
   /// \param DL is the DataLayout to determine pointer size.
   bool isNoopCast(const DataLayout &DL) const;
@@ -797,7 +829,7 @@ public:
   /// @returns 0 if the CastInst pair can't be eliminated, otherwise
   /// returns Instruction::CastOps value for a cast that can replace
   /// the pair, casting SrcTy to DstTy.
-  /// @brief Determine if a cast pair is eliminable
+  /// Determine if a cast pair is eliminable
   static unsigned isEliminableCastPair(
     Instruction::CastOps firstOpcode,  ///< Opcode of first cast
     Instruction::CastOps secondOpcode, ///< Opcode of second cast
@@ -809,23 +841,23 @@ public:
     Type *DstIntPtrTy  ///< Integer type corresponding to Ptr DstTy, or null
   );
 
-  /// @brief Return the opcode of this CastInst
+  /// Return the opcode of this CastInst
   Instruction::CastOps getOpcode() const {
     return Instruction::CastOps(Instruction::getOpcode());
   }
 
-  /// @brief Return the source type, as a convenience
+  /// Return the source type, as a convenience
   Type* getSrcTy() const { return getOperand(0)->getType(); }
-  /// @brief Return the destination type, as a convenience
+  /// Return the destination type, as a convenience
   Type* getDestTy() const { return getType(); }
 
   /// This method can be used to determine if a cast from S to DstTy using
   /// Opcode op is valid or not.
   /// @returns true iff the proposed cast is valid.
-  /// @brief Determine if a cast is valid without creating one.
+  /// Determine if a cast is valid without creating one.
   static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy);
 
-  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return I->isCast();
   }
@@ -839,7 +871,7 @@ public:
 //===----------------------------------------------------------------------===//
 
 /// This class is the base class for the comparison instructions.
-/// @brief Abstract base class of comparison instructions.
+/// Abstract base class of comparison instructions.
 class CmpInst : public Instruction {
 public:
   /// This enumeration lists the possible predicates for CmpInst subclasses.
@@ -905,7 +937,7 @@ public:
   /// the two operands.  Optionally (if InstBefore is specified) insert the
   /// instruction into a BasicBlock right before the specified instruction.
   /// The specified Instruction is allowed to be a dereferenced end iterator.
-  /// @brief Create a CmpInst
+  /// Create a CmpInst
   static CmpInst *Create(OtherOps Op,
                          Predicate predicate, Value *S1,
                          Value *S2, const Twine &Name = "",
@@ -914,21 +946,21 @@ public:
   /// Construct a compare instruction, given the opcode, the predicate and the
   /// two operands.  Also automatically insert this instruction to the end of
   /// the BasicBlock specified.
-  /// @brief Create a CmpInst
+  /// Create a CmpInst
   static CmpInst *Create(OtherOps Op, Predicate predicate, Value *S1,
                          Value *S2, const Twine &Name, BasicBlock *InsertAtEnd);
 
-  /// @brief Get the opcode casted to the right type
+  /// Get the opcode casted to the right type
   OtherOps getOpcode() const {
     return static_cast<OtherOps>(Instruction::getOpcode());
   }
 
-  /// @brief Return the predicate for this instruction.
+  /// Return the predicate for this instruction.
   Predicate getPredicate() const {
     return Predicate(getSubclassDataFromInstruction());
   }
 
-  /// @brief Set the predicate for this instruction to the specified value.
+  /// Set the predicate for this instruction to the specified value.
   void setPredicate(Predicate P) { setInstructionSubclassData(P); }
 
   static bool isFPPredicate(Predicate P) {
@@ -947,7 +979,7 @@ public:
   /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
   ///              OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
   /// @returns the inverse predicate for the instruction's current predicate.
-  /// @brief Return the inverse of the instruction's predicate.
+  /// Return the inverse of the instruction's predicate.
   Predicate getInversePredicate() const {
     return getInversePredicate(getPredicate());
   }
@@ -955,7 +987,7 @@ public:
   /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
   ///              OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
   /// @returns the inverse predicate for predicate provided in \p pred.
-  /// @brief Return the inverse of a given predicate
+  /// Return the inverse of a given predicate
   static Predicate getInversePredicate(Predicate pred);
 
   /// For example, EQ->EQ, SLE->SGE, ULT->UGT,
@@ -963,81 +995,109 @@ public:
   /// @returns the predicate that would be the result of exchanging the two
   /// operands of the CmpInst instruction without changing the result
   /// produced.
-  /// @brief Return the predicate as if the operands were swapped
+  /// Return the predicate as if the operands were swapped
   Predicate getSwappedPredicate() const {
     return getSwappedPredicate(getPredicate());
   }
 
   /// This is a static version that you can use without an instruction
   /// available.
-  /// @brief Return the predicate as if the operands were swapped.
+  /// Return the predicate as if the operands were swapped.
   static Predicate getSwappedPredicate(Predicate pred);
 
-  /// @brief Provide more efficient getOperand methods.
+  /// For predicate of kind "is X or equal to 0" returns the predicate "is X".
+  /// For predicate of kind "is X" returns the predicate "is X or equal to 0".
+  /// does not support other kind of predicates.
+  /// @returns the predicate that does not contains is equal to zero if
+  /// it had and vice versa.
+  /// Return the flipped strictness of predicate
+  Predicate getFlippedStrictnessPredicate() const {
+    return getFlippedStrictnessPredicate(getPredicate());
+  }
+
+  /// This is a static version that you can use without an instruction
+  /// available.
+  /// Return the flipped strictness of predicate
+  static Predicate getFlippedStrictnessPredicate(Predicate pred);
+
+  /// For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
+  /// Returns the non-strict version of strict comparisons.
+  Predicate getNonStrictPredicate() const {
+    return getNonStrictPredicate(getPredicate());
+  }
+
+  /// This is a static version that you can use without an instruction
+  /// available.
+  /// @returns the non-strict version of comparison provided in \p pred.
+  /// If \p pred is not a strict comparison predicate, returns \p pred.
+  /// Returns the non-strict version of strict comparisons.
+  static Predicate getNonStrictPredicate(Predicate pred);
+
+  /// Provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
   /// This is just a convenience that dispatches to the subclasses.
-  /// @brief Swap the operands and adjust predicate accordingly to retain
+  /// Swap the operands and adjust predicate accordingly to retain
   /// the same comparison.
   void swapOperands();
 
   /// This is just a convenience that dispatches to the subclasses.
-  /// @brief Determine if this CmpInst is commutative.
+  /// Determine if this CmpInst is commutative.
   bool isCommutative() const;
 
   /// This is just a convenience that dispatches to the subclasses.
-  /// @brief Determine if this is an equals/not equals predicate.
+  /// Determine if this is an equals/not equals predicate.
   bool isEquality() const;
 
   /// @returns true if the comparison is signed, false otherwise.
-  /// @brief Determine if this instruction is using a signed comparison.
+  /// Determine if this instruction is using a signed comparison.
   bool isSigned() const {
     return isSigned(getPredicate());
   }
 
   /// @returns true if the comparison is unsigned, false otherwise.
-  /// @brief Determine if this instruction is using an unsigned comparison.
+  /// Determine if this instruction is using an unsigned comparison.
   bool isUnsigned() const {
     return isUnsigned(getPredicate());
   }
 
   /// For example, ULT->SLT, ULE->SLE, UGT->SGT, UGE->SGE, SLT->Failed assert
   /// @returns the signed version of the unsigned predicate pred.
-  /// @brief return the signed version of a predicate
+  /// return the signed version of a predicate
   static Predicate getSignedPredicate(Predicate pred);
 
   /// For example, ULT->SLT, ULE->SLE, UGT->SGT, UGE->SGE, SLT->Failed assert
   /// @returns the signed version of the predicate for this instruction (which
   /// has to be an unsigned predicate).
-  /// @brief return the signed version of a predicate
+  /// return the signed version of a predicate
   Predicate getSignedPredicate() {
     return getSignedPredicate(getPredicate());
   }
 
   /// This is just a convenience.
-  /// @brief Determine if this is true when both operands are the same.
+  /// Determine if this is true when both operands are the same.
   bool isTrueWhenEqual() const {
     return isTrueWhenEqual(getPredicate());
   }
 
   /// This is just a convenience.
-  /// @brief Determine if this is false when both operands are the same.
+  /// Determine if this is false when both operands are the same.
   bool isFalseWhenEqual() const {
     return isFalseWhenEqual(getPredicate());
   }
 
   /// @returns true if the predicate is unsigned, false otherwise.
-  /// @brief Determine if the predicate is an unsigned operation.
+  /// Determine if the predicate is an unsigned operation.
   static bool isUnsigned(Predicate predicate);
 
   /// @returns true if the predicate is signed, false otherwise.
-  /// @brief Determine if the predicate is an signed operation.
+  /// Determine if the predicate is an signed operation.
   static bool isSigned(Predicate predicate);
 
-  /// @brief Determine if the predicate is an ordered operation.
+  /// Determine if the predicate is an ordered operation.
   static bool isOrdered(Predicate predicate);
 
-  /// @brief Determine if the predicate is an unordered operation.
+  /// Determine if the predicate is an unordered operation.
   static bool isUnordered(Predicate predicate);
 
   /// Determine if the predicate is true when comparing a value with itself.
@@ -1054,7 +1114,7 @@ public:
   /// operands.
   static bool isImpliedFalseByMatchingCmp(Predicate Pred1, Predicate Pred2);
 
-  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::ICmp ||
            I->getOpcode() == Instruction::FCmp;
@@ -1063,7 +1123,7 @@ public:
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
 
-  /// @brief Create a result type for fcmp/icmp
+  /// Create a result type for fcmp/icmp
   static Type* makeCmpResultType(Type* opnd_type) {
     if (VectorType* vt = dyn_cast<VectorType>(opnd_type)) {
       return VectorType::get(Type::getInt1Ty(opnd_type->getContext()),
@@ -1121,7 +1181,7 @@ public:
 
   /// Convenience accessors
 
-  /// \brief Return the outer EH-pad this funclet is nested within.
+  /// Return the outer EH-pad this funclet is nested within.
   ///
   /// Note: This returns the associated CatchSwitchInst if this FuncletPadInst
   /// is a CatchPadInst.
@@ -1157,7 +1217,7 @@ struct OperandTraits<FuncletPadInst>
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(FuncletPadInst, Value)
 
-/// \brief A lightweight accessor for an operand bundle meant to be passed
+/// A lightweight accessor for an operand bundle meant to be passed
 /// around by value.
 struct OperandBundleUse {
   ArrayRef<Use> Inputs;
@@ -1166,7 +1226,7 @@ struct OperandBundleUse {
   explicit OperandBundleUse(StringMapEntry<uint32_t> *Tag, ArrayRef<Use> Inputs)
       : Inputs(Inputs), Tag(Tag) {}
 
-  /// \brief Return true if the operand at index \p Idx in this operand bundle
+  /// Return true if the operand at index \p Idx in this operand bundle
   /// has the attribute A.
   bool operandHasAttr(unsigned Idx, Attribute::AttrKind A) const {
     if (isDeoptOperandBundle())
@@ -1177,12 +1237,12 @@ struct OperandBundleUse {
     return false;
   }
 
-  /// \brief Return the tag of this operand bundle as a string.
+  /// Return the tag of this operand bundle as a string.
   StringRef getTagName() const {
     return Tag->getKey();
   }
 
-  /// \brief Return the tag of this operand bundle as an integer.
+  /// Return the tag of this operand bundle as an integer.
   ///
   /// Operand bundle tags are interned by LLVMContextImpl::getOrInsertBundleTag,
   /// and this function returns the unique integer getOrInsertBundleTag
@@ -1191,22 +1251,22 @@ struct OperandBundleUse {
     return Tag->getValue();
   }
 
-  /// \brief Return true if this is a "deopt" operand bundle.
+  /// Return true if this is a "deopt" operand bundle.
   bool isDeoptOperandBundle() const {
     return getTagID() == LLVMContext::OB_deopt;
   }
 
-  /// \brief Return true if this is a "funclet" operand bundle.
+  /// Return true if this is a "funclet" operand bundle.
   bool isFuncletOperandBundle() const {
     return getTagID() == LLVMContext::OB_funclet;
   }
 
 private:
-  /// \brief Pointer to an entry in LLVMContextImpl::getOrInsertBundleTag.
+  /// Pointer to an entry in LLVMContextImpl::getOrInsertBundleTag.
   StringMapEntry<uint32_t> *Tag;
 };
 
-/// \brief A container for an operand bundle being viewed as a set of values
+/// A container for an operand bundle being viewed as a set of values
 /// rather than a set of uses.
 ///
 /// Unlike OperandBundleUse, OperandBundleDefT owns the memory it carries, and
@@ -1241,7 +1301,7 @@ public:
 using OperandBundleDef = OperandBundleDefT<Value *>;
 using ConstOperandBundleDef = OperandBundleDefT<const Value *>;
 
-/// \brief A mixin to add operand bundle functionality to llvm instruction
+/// A mixin to add operand bundle functionality to llvm instruction
 /// classes.
 ///
 /// OperandBundleUser uses the descriptor area co-allocated with the host User
@@ -1289,21 +1349,21 @@ using ConstOperandBundleDef = OperandBundleDefT<const Value *>;
 /// Currently operand bundle users with hung-off operands are not supported.
 template <typename InstrTy, typename OpIteratorTy> class OperandBundleUser {
 public:
-  /// \brief Return the number of operand bundles associated with this User.
+  /// Return the number of operand bundles associated with this User.
   unsigned getNumOperandBundles() const {
     return std::distance(bundle_op_info_begin(), bundle_op_info_end());
   }
 
-  /// \brief Return true if this User has any operand bundles.
+  /// Return true if this User has any operand bundles.
   bool hasOperandBundles() const { return getNumOperandBundles() != 0; }
 
-  /// \brief Return the index of the first bundle operand in the Use array.
+  /// Return the index of the first bundle operand in the Use array.
   unsigned getBundleOperandsStartIndex() const {
     assert(hasOperandBundles() && "Don't call otherwise!");
     return bundle_op_info_begin()->Begin;
   }
 
-  /// \brief Return the index of the last bundle operand in the Use array.
+  /// Return the index of the last bundle operand in the Use array.
   unsigned getBundleOperandsEndIndex() const {
     assert(hasOperandBundles() && "Don't call otherwise!");
     return bundle_op_info_end()[-1].End;
@@ -1315,7 +1375,7 @@ public:
            Idx < getBundleOperandsEndIndex();
   }
 
-  /// \brief Return the total number operands (not operand bundles) used by
+  /// Return the total number operands (not operand bundles) used by
   /// every operand bundle in this OperandBundleUser.
   unsigned getNumTotalBundleOperands() const {
     if (!hasOperandBundles())
@@ -1328,13 +1388,13 @@ public:
     return End - Begin;
   }
 
-  /// \brief Return the operand bundle at a specific index.
+  /// Return the operand bundle at a specific index.
   OperandBundleUse getOperandBundleAt(unsigned Index) const {
     assert(Index < getNumOperandBundles() && "Index out of bounds!");
     return operandBundleFromBundleOpInfo(*(bundle_op_info_begin() + Index));
   }
 
-  /// \brief Return the number of operand bundles with the tag Name attached to
+  /// Return the number of operand bundles with the tag Name attached to
   /// this instruction.
   unsigned countOperandBundlesOfType(StringRef Name) const {
     unsigned Count = 0;
@@ -1345,7 +1405,7 @@ public:
     return Count;
   }
 
-  /// \brief Return the number of operand bundles with the tag ID attached to
+  /// Return the number of operand bundles with the tag ID attached to
   /// this instruction.
   unsigned countOperandBundlesOfType(uint32_t ID) const {
     unsigned Count = 0;
@@ -1356,7 +1416,7 @@ public:
     return Count;
   }
 
-  /// \brief Return an operand bundle by name, if present.
+  /// Return an operand bundle by name, if present.
   ///
   /// It is an error to call this for operand bundle types that may have
   /// multiple instances of them on the same instruction.
@@ -1372,7 +1432,7 @@ public:
     return None;
   }
 
-  /// \brief Return an operand bundle by tag ID, if present.
+  /// Return an operand bundle by tag ID, if present.
   ///
   /// It is an error to call this for operand bundle types that may have
   /// multiple instances of them on the same instruction.
@@ -1388,7 +1448,7 @@ public:
     return None;
   }
 
-  /// \brief Return the list of operand bundles attached to this instruction as
+  /// Return the list of operand bundles attached to this instruction as
   /// a vector of OperandBundleDefs.
   ///
   /// This function copies the OperandBundeUse instances associated with this
@@ -1400,7 +1460,7 @@ public:
       Defs.emplace_back(getOperandBundleAt(i));
   }
 
-  /// \brief Return the operand bundle for the operand at index OpIdx.
+  /// Return the operand bundle for the operand at index OpIdx.
   ///
   /// It is an error to call this with an OpIdx that does not correspond to an
   /// bundle operand.
@@ -1408,7 +1468,7 @@ public:
     return operandBundleFromBundleOpInfo(getBundleOpInfoForOperand(OpIdx));
   }
 
-  /// \brief Return true if this operand bundle user has operand bundles that
+  /// Return true if this operand bundle user has operand bundles that
   /// may read from the heap.
   bool hasReadingOperandBundles() const {
     // Implementation note: this is a conservative implementation of operand
@@ -1417,7 +1477,7 @@ public:
     return hasOperandBundles();
   }
 
-  /// \brief Return true if this operand bundle user has operand bundles that
+  /// Return true if this operand bundle user has operand bundles that
   /// may write to the heap.
   bool hasClobberingOperandBundles() const {
     for (auto &BOI : bundle_op_infos()) {
@@ -1433,7 +1493,7 @@ public:
     return false;
   }
 
-  /// \brief Return true if the bundle operand at index \p OpIdx has the
+  /// Return true if the bundle operand at index \p OpIdx has the
   /// attribute \p A.
   bool bundleOperandHasAttr(unsigned OpIdx,  Attribute::AttrKind A) const {
     auto &BOI = getBundleOpInfoForOperand(OpIdx);
@@ -1441,7 +1501,7 @@ public:
     return OBU.operandHasAttr(OpIdx - BOI.Begin, A);
   }
 
-  /// \brief Return true if \p Other has the same sequence of operand bundle
+  /// Return true if \p Other has the same sequence of operand bundle
   /// tags with the same number of operands on each one of them as this
   /// OperandBundleUser.
   bool hasIdenticalOperandBundleSchema(
@@ -1453,7 +1513,7 @@ public:
                       Other.bundle_op_info_begin());
   }
 
-  /// \brief Return true if this operand bundle user contains operand bundles
+  /// Return true if this operand bundle user contains operand bundles
   /// with tags other than those specified in \p IDs.
   bool hasOperandBundlesOtherThan(ArrayRef<uint32_t> IDs) const {
     for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) {
@@ -1465,7 +1525,7 @@ public:
   }
 
 protected:
-  /// \brief Is the function attribute S disallowed by some operand bundle on
+  /// Is the function attribute S disallowed by some operand bundle on
   /// this operand bundle user?
   bool isFnAttrDisallowedByOpBundle(StringRef S) const {
     // Operand bundles only possibly disallow readnone, readonly and argmenonly
@@ -1473,7 +1533,7 @@ protected:
     return false;
   }
 
-  /// \brief Is the function attribute A disallowed by some operand bundle on
+  /// Is the function attribute A disallowed by some operand bundle on
   /// this operand bundle user?
   bool isFnAttrDisallowedByOpBundle(Attribute::AttrKind A) const {
     switch (A) {
@@ -1499,18 +1559,18 @@ protected:
     llvm_unreachable("switch has a default case!");
   }
 
-  /// \brief Used to keep track of an operand bundle.  See the main comment on
+  /// Used to keep track of an operand bundle.  See the main comment on
   /// OperandBundleUser above.
   struct BundleOpInfo {
-    /// \brief The operand bundle tag, interned by
+    /// The operand bundle tag, interned by
     /// LLVMContextImpl::getOrInsertBundleTag.
     StringMapEntry<uint32_t> *Tag;
 
-    /// \brief The index in the Use& vector where operands for this operand
+    /// The index in the Use& vector where operands for this operand
     /// bundle starts.
     uint32_t Begin;
 
-    /// \brief The index in the Use& vector where operands for this operand
+    /// The index in the Use& vector where operands for this operand
     /// bundle ends.
     uint32_t End;
 
@@ -1519,7 +1579,7 @@ protected:
     }
   };
 
-  /// \brief Simple helper function to map a BundleOpInfo to an
+  /// Simple helper function to map a BundleOpInfo to an
   /// OperandBundleUse.
   OperandBundleUse
   operandBundleFromBundleOpInfo(const BundleOpInfo &BOI) const {
@@ -1531,7 +1591,7 @@ protected:
   using bundle_op_iterator = BundleOpInfo *;
   using const_bundle_op_iterator = const BundleOpInfo *;
 
-  /// \brief Return the start of the list of BundleOpInfo instances associated
+  /// Return the start of the list of BundleOpInfo instances associated
   /// with this OperandBundleUser.
   bundle_op_iterator bundle_op_info_begin() {
     if (!static_cast<InstrTy *>(this)->hasDescriptor())
@@ -1541,7 +1601,7 @@ protected:
     return reinterpret_cast<bundle_op_iterator>(BytesBegin);
   }
 
-  /// \brief Return the start of the list of BundleOpInfo instances associated
+  /// Return the start of the list of BundleOpInfo instances associated
   /// with this OperandBundleUser.
   const_bundle_op_iterator bundle_op_info_begin() const {
     auto *NonConstThis =
@@ -1549,7 +1609,7 @@ protected:
     return NonConstThis->bundle_op_info_begin();
   }
 
-  /// \brief Return the end of the list of BundleOpInfo instances associated
+  /// Return the end of the list of BundleOpInfo instances associated
   /// with this OperandBundleUser.
   bundle_op_iterator bundle_op_info_end() {
     if (!static_cast<InstrTy *>(this)->hasDescriptor())
@@ -1559,7 +1619,7 @@ protected:
     return reinterpret_cast<bundle_op_iterator>(BytesEnd);
   }
 
-  /// \brief Return the end of the list of BundleOpInfo instances associated
+  /// Return the end of the list of BundleOpInfo instances associated
   /// with this OperandBundleUser.
   const_bundle_op_iterator bundle_op_info_end() const {
     auto *NonConstThis =
@@ -1567,17 +1627,17 @@ protected:
     return NonConstThis->bundle_op_info_end();
   }
 
-  /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end).
+  /// Return the range [\p bundle_op_info_begin, \p bundle_op_info_end).
   iterator_range<bundle_op_iterator> bundle_op_infos() {
     return make_range(bundle_op_info_begin(), bundle_op_info_end());
   }
 
-  /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end).
+  /// Return the range [\p bundle_op_info_begin, \p bundle_op_info_end).
   iterator_range<const_bundle_op_iterator> bundle_op_infos() const {
     return make_range(bundle_op_info_begin(), bundle_op_info_end());
   }
 
-  /// \brief Populate the BundleOpInfo instances and the Use& vector from \p
+  /// Populate the BundleOpInfo instances and the Use& vector from \p
   /// Bundles.  Return the op_iterator pointing to the Use& one past the last
   /// last bundle operand use.
   ///
@@ -1608,7 +1668,7 @@ protected:
     return It;
   }
 
-  /// \brief Return the BundleOpInfo for the operand at index OpIdx.
+  /// Return the BundleOpInfo for the operand at index OpIdx.
   ///
   /// It is an error to call this with an OpIdx that does not correspond to an
   /// bundle operand.
@@ -1620,7 +1680,7 @@ protected:
     llvm_unreachable("Did not find operand bundle for operand!");
   }
 
-  /// \brief Return the total number of values used in \p Bundles.
+  /// Return the total number of values used in \p Bundles.
   static unsigned CountBundleInputs(ArrayRef<OperandBundleDef> Bundles) {
     unsigned Total = 0;
     for (auto &B : Bundles)
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
index 6af9cbfae5de..a3bf25056ee5 100644
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@@ -128,6 +128,7 @@ public:
   const char *getOpcodeName() const { return getOpcodeName(getOpcode()); }
   bool isTerminator() const { return isTerminator(getOpcode()); }
   bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
+  bool isIntDivRem() const { return isIntDivRem(getOpcode()); }
   bool isShift() { return isShift(getOpcode()); }
   bool isCast() const { return isCast(getOpcode()); }
   bool isFuncletPad() const { return isFuncletPad(getOpcode()); }
@@ -142,6 +143,10 @@ public:
     return Opcode >= BinaryOpsBegin && Opcode < BinaryOpsEnd;
   }
 
+  static inline bool isIntDivRem(unsigned Opcode) {
+    return Opcode == UDiv || Opcode == SDiv || Opcode == URem || Opcode == SRem;
+  }
+
   /// Determine if the Opcode is one of the shift instructions.
   static inline bool isShift(unsigned Opcode) {
     return Opcode >= Shl && Opcode <= AShr;
@@ -284,7 +289,7 @@ public:
   /// Return the debug location for this node as a DebugLoc.
   const DebugLoc &getDebugLoc() const { return DbgLoc; }
 
-  /// Set or clear the nsw flag on this instruction, which must be an operator
+  /// Set or clear the nuw flag on this instruction, which must be an operator
   /// which supports this flag. See LangRef.html for the meaning of this flag.
   void setHasNoUnsignedWrap(bool b = true);
 
@@ -535,6 +540,14 @@ public:
   /// matters, isSafeToSpeculativelyExecute may be more appropriate.
   bool mayHaveSideEffects() const { return mayWriteToMemory() || mayThrow(); }
 
+  /// Return true if the instruction can be removed if the result is unused.
+  ///
+  /// When constant folding some instructions cannot be removed even if their
+  /// results are unused. Specifically terminator instructions and calls that
+  /// may have side effects cannot be removed without semantically changing the
+  /// generated program.
+  bool isSafeToRemove() const;
+  
   /// Return true if the instruction is a variety of EH-block.
   bool isEHPad() const {
     switch (getOpcode()) {
@@ -548,6 +561,14 @@ public:
     }
   }
 
+  /// Return a pointer to the next non-debug instruction in the same basic
+  /// block as 'this', or nullptr if no such instruction exists.
+  const Instruction *getNextNonDebugInstruction() const;
+  Instruction *getNextNonDebugInstruction() {
+    return const_cast<Instruction *>(
+        static_cast<const Instruction *>(this)->getNextNonDebugInstruction());
+  }
+
   /// Create a copy of 'this' instruction that is identical in all ways except
   /// the following:
   ///   * The instruction has no parent
@@ -582,7 +603,7 @@ public:
   /// be identical.
   /// @returns true if the specified instruction is the same operation as
   /// the current one.
-  /// @brief Determine if one instruction is the same operation as another.
+  /// Determine if one instruction is the same operation as another.
   bool isSameOperationAs(const Instruction *I, unsigned flags = 0) const;
 
   /// Return true if there are any uses of this instruction in blocks other than
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index c1122d137f24..a2cb84a071f2 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -98,6 +98,10 @@ public:
     return cast<PointerType>(Instruction::getType());
   }
 
+  /// Get allocation size in bits. Returns None if size can't be determined,
+  /// e.g. in case of a VLA.
+  Optional<uint64_t> getAllocationSizeInBits(const DataLayout &DL) const;
+
   /// Return the type that is being allocated by the instruction.
   Type *getAllocatedType() const { return AllocatedType; }
   /// for use only in special circumstances that need to generically
@@ -1346,224 +1350,71 @@ public:
   }
 };
 
-//===----------------------------------------------------------------------===//
-/// This class represents a function call, abstracting a target
-/// machine's calling convention.  This class uses low bit of the SubClassData
-/// field to indicate whether or not this is a tail call.  The rest of the bits
-/// hold the calling convention of the call.
-///
-class CallInst : public Instruction,
-                 public OperandBundleUser<CallInst, User::op_iterator> {
-  friend class OperandBundleUser<CallInst, User::op_iterator>;
-
-  AttributeList Attrs; ///< parameter attributes for call
-  FunctionType *FTy;
-
-  CallInst(const CallInst &CI);
-
-  /// Construct a CallInst given a range of arguments.
-  /// Construct a CallInst from a range of arguments
-  inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
-                  ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
-                  Instruction *InsertBefore);
+class CallInst;
+class InvokeInst;
 
-  inline CallInst(Value *Func, ArrayRef<Value *> Args,
-                  ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
-                  Instruction *InsertBefore)
-      : CallInst(cast<FunctionType>(
-                     cast<PointerType>(Func->getType())->getElementType()),
-                 Func, Args, Bundles, NameStr, InsertBefore) {}
-
-  inline CallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
-                  Instruction *InsertBefore)
-      : CallInst(Func, Args, None, NameStr, InsertBefore) {}
-
-  /// Construct a CallInst given a range of arguments.
-  /// Construct a CallInst from a range of arguments
-  inline CallInst(Value *Func, ArrayRef<Value *> Args,
-                  ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
-                  BasicBlock *InsertAtEnd);
-
-  explicit CallInst(Value *F, const Twine &NameStr,
-                    Instruction *InsertBefore);
-
-  CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
-
-  void init(Value *Func, ArrayRef<Value *> Args,
-            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
-    init(cast<FunctionType>(
-             cast<PointerType>(Func->getType())->getElementType()),
-         Func, Args, Bundles, NameStr);
-  }
-  void init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
-            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
-  void init(Value *Func, const Twine &NameStr);
+template <class T> struct CallBaseParent { using type = Instruction; };
 
-  bool hasDescriptor() const { return HasDescriptor; }
+template <> struct CallBaseParent<InvokeInst> { using type = TerminatorInst; };
 
+//===----------------------------------------------------------------------===//
+/// Base class for all callable instructions (InvokeInst and CallInst)
+/// Holds everything related to calling a function, abstracting from the base
+/// type @p BaseInstTy and the concrete instruction @p InstTy
+///
+template <class InstTy>
+class CallBase : public CallBaseParent<InstTy>::type,
+                 public OperandBundleUser<InstTy, User::op_iterator> {
 protected:
-  // Note: Instruction needs to be a friend here to call cloneImpl.
-  friend class Instruction;
-
-  CallInst *cloneImpl() const;
-
-public:
-  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
-                          ArrayRef<OperandBundleDef> Bundles = None,
-                          const Twine &NameStr = "",
-                          Instruction *InsertBefore = nullptr) {
-    return Create(cast<FunctionType>(
-                      cast<PointerType>(Func->getType())->getElementType()),
-                  Func, Args, Bundles, NameStr, InsertBefore);
-  }
-
-  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
-                          const Twine &NameStr,
-                          Instruction *InsertBefore = nullptr) {
-    return Create(cast<FunctionType>(
-                      cast<PointerType>(Func->getType())->getElementType()),
-                  Func, Args, None, NameStr, InsertBefore);
-  }
-
-  static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
-                          const Twine &NameStr,
-                          Instruction *InsertBefore = nullptr) {
-    return new (unsigned(Args.size() + 1))
-        CallInst(Ty, Func, Args, None, NameStr, InsertBefore);
-  }
-
-  static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
-                          ArrayRef<OperandBundleDef> Bundles = None,
-                          const Twine &NameStr = "",
-                          Instruction *InsertBefore = nullptr) {
-    const unsigned TotalOps =
-        unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
-    const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
-
-    return new (TotalOps, DescriptorBytes)
-        CallInst(Ty, Func, Args, Bundles, NameStr, InsertBefore);
-  }
+  AttributeList Attrs; ///< parameter attributes for callable
+  FunctionType *FTy;
+  using BaseInstTy = typename CallBaseParent<InstTy>::type;
 
-  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
-                          ArrayRef<OperandBundleDef> Bundles,
-                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    const unsigned TotalOps =
-        unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
-    const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+  template <class... ArgsTy>
+  CallBase(AttributeList const &A, FunctionType *FT, ArgsTy &&... Args)
+      : BaseInstTy(std::forward<ArgsTy>(Args)...), Attrs(A), FTy(FT) {}
+  bool hasDescriptor() const { return Value::HasDescriptor; }
 
-    return new (TotalOps, DescriptorBytes)
-        CallInst(Func, Args, Bundles, NameStr, InsertAtEnd);
-  }
+  using BaseInstTy::BaseInstTy;
 
-  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
-                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    return new (unsigned(Args.size() + 1))
-        CallInst(Func, Args, None, NameStr, InsertAtEnd);
-  }
+  using OperandBundleUser<InstTy,
+                          User::op_iterator>::isFnAttrDisallowedByOpBundle;
+  using OperandBundleUser<InstTy, User::op_iterator>::getNumTotalBundleOperands;
+  using OperandBundleUser<InstTy, User::op_iterator>::bundleOperandHasAttr;
+  using Instruction::getSubclassDataFromInstruction;
+  using Instruction::setInstructionSubclassData;
 
-  static CallInst *Create(Value *F, const Twine &NameStr = "",
-                          Instruction *InsertBefore = nullptr) {
-    return new(1) CallInst(F, NameStr, InsertBefore);
-  }
+public:
+  using Instruction::getContext;
+  using OperandBundleUser<InstTy, User::op_iterator>::hasOperandBundles;
+  using OperandBundleUser<InstTy,
+                          User::op_iterator>::getBundleOperandsStartIndex;
 
-  static CallInst *Create(Value *F, const Twine &NameStr,
-                          BasicBlock *InsertAtEnd) {
-    return new(1) CallInst(F, NameStr, InsertAtEnd);
+  static bool classof(const Instruction *I) {
+    llvm_unreachable(
+        "CallBase is not meant to be used as part of the classof hierarchy");
   }
 
-  /// Create a clone of \p CI with a different set of operand bundles and
-  /// insert it before \p InsertPt.
+public:
+  /// Return the parameter attributes for this call.
   ///
-  /// The returned call instruction is identical \p CI in every way except that
-  /// the operand bundles for the new instruction are set to the operand bundles
-  /// in \p Bundles.
-  static CallInst *Create(CallInst *CI, ArrayRef<OperandBundleDef> Bundles,
-                          Instruction *InsertPt = nullptr);
+  AttributeList getAttributes() const { return Attrs; }
 
-  /// Generate the IR for a call to malloc:
-  /// 1. Compute the malloc call's argument as the specified type's size,
-  ///    possibly multiplied by the array size if the array size is not
-  ///    constant 1.
-  /// 2. Call malloc with that argument.
-  /// 3. Bitcast the result of the malloc call to the specified type.
-  static Instruction *CreateMalloc(Instruction *InsertBefore,
-                                   Type *IntPtrTy, Type *AllocTy,
-                                   Value *AllocSize, Value *ArraySize = nullptr,
-                                   Function* MallocF = nullptr,
-                                   const Twine &Name = "");
-  static Instruction *CreateMalloc(BasicBlock *InsertAtEnd,
-                                   Type *IntPtrTy, Type *AllocTy,
-                                   Value *AllocSize, Value *ArraySize = nullptr,
-                                   Function* MallocF = nullptr,
-                                   const Twine &Name = "");
-  static Instruction *CreateMalloc(Instruction *InsertBefore,
-                                   Type *IntPtrTy, Type *AllocTy,
-                                   Value *AllocSize, Value *ArraySize = nullptr,
-                                   ArrayRef<OperandBundleDef> Bundles = None,
-                                   Function* MallocF = nullptr,
-                                   const Twine &Name = "");
-  static Instruction *CreateMalloc(BasicBlock *InsertAtEnd,
-                                   Type *IntPtrTy, Type *AllocTy,
-                                   Value *AllocSize, Value *ArraySize = nullptr,
-                                   ArrayRef<OperandBundleDef> Bundles = None,
-                                   Function* MallocF = nullptr,
-                                   const Twine &Name = "");
-  /// Generate the IR for a call to the builtin free function.
-  static Instruction *CreateFree(Value *Source,
-                                 Instruction *InsertBefore);
-  static Instruction *CreateFree(Value *Source,
-                                 BasicBlock *InsertAtEnd);
-  static Instruction *CreateFree(Value *Source,
-                                 ArrayRef<OperandBundleDef> Bundles,
-                                 Instruction *InsertBefore);
-  static Instruction *CreateFree(Value *Source,
-                                 ArrayRef<OperandBundleDef> Bundles,
-                                 BasicBlock *InsertAtEnd);
+  /// Set the parameter attributes for this call.
+  ///
+  void setAttributes(AttributeList A) { Attrs = A; }
 
   FunctionType *getFunctionType() const { return FTy; }
 
   void mutateFunctionType(FunctionType *FTy) {
-    mutateType(FTy->getReturnType());
+    Value::mutateType(FTy->getReturnType());
     this->FTy = FTy;
   }
 
-  // Note that 'musttail' implies 'tail'.
-  enum TailCallKind { TCK_None = 0, TCK_Tail = 1, TCK_MustTail = 2,
-                      TCK_NoTail = 3 };
-  TailCallKind getTailCallKind() const {
-    return TailCallKind(getSubclassDataFromInstruction() & 3);
-  }
-
-  bool isTailCall() const {
-    unsigned Kind = getSubclassDataFromInstruction() & 3;
-    return Kind == TCK_Tail || Kind == TCK_MustTail;
-  }
-
-  bool isMustTailCall() const {
-    return (getSubclassDataFromInstruction() & 3) == TCK_MustTail;
-  }
-
-  bool isNoTailCall() const {
-    return (getSubclassDataFromInstruction() & 3) == TCK_NoTail;
-  }
-
-  void setTailCall(bool isTC = true) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~3) |
-                               unsigned(isTC ? TCK_Tail : TCK_None));
-  }
-
-  void setTailCallKind(TailCallKind TCK) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~3) |
-                               unsigned(TCK));
-  }
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
   /// Return the number of call arguments.
   ///
   unsigned getNumArgOperands() const {
-    return getNumOperands() - getNumTotalBundleOperands() - 1;
+    return getNumOperands() - getNumTotalBundleOperands() - InstTy::ArgOffset;
   }
 
   /// getArgOperand/setArgOperand - Return/set the i-th call argument.
@@ -1578,46 +1429,112 @@ public:
   }
 
   /// Return the iterator pointing to the beginning of the argument list.
-  op_iterator arg_begin() { return op_begin(); }
+  User::op_iterator arg_begin() { return op_begin(); }
 
   /// Return the iterator pointing to the end of the argument list.
-  op_iterator arg_end() {
+  User::op_iterator arg_end() {
     // [ call args ], [ operand bundles ], callee
-    return op_end() - getNumTotalBundleOperands() - 1;
+    return op_end() - getNumTotalBundleOperands() - InstTy::ArgOffset;
   }
 
   /// Iteration adapter for range-for loops.
-  iterator_range<op_iterator> arg_operands() {
+  iterator_range<User::op_iterator> arg_operands() {
     return make_range(arg_begin(), arg_end());
   }
 
   /// Return the iterator pointing to the beginning of the argument list.
-  const_op_iterator arg_begin() const { return op_begin(); }
+  User::const_op_iterator arg_begin() const { return op_begin(); }
 
   /// Return the iterator pointing to the end of the argument list.
-  const_op_iterator arg_end() const {
+  User::const_op_iterator arg_end() const {
     // [ call args ], [ operand bundles ], callee
-    return op_end() - getNumTotalBundleOperands() - 1;
+    return op_end() - getNumTotalBundleOperands() - InstTy::ArgOffset;
   }
 
   /// Iteration adapter for range-for loops.
-  iterator_range<const_op_iterator> arg_operands() const {
+  iterator_range<User::const_op_iterator> arg_operands() const {
     return make_range(arg_begin(), arg_end());
   }
 
   /// Wrappers for getting the \c Use of a call argument.
   const Use &getArgOperandUse(unsigned i) const {
     assert(i < getNumArgOperands() && "Out of bounds!");
-    return getOperandUse(i);
+    return User::getOperandUse(i);
   }
   Use &getArgOperandUse(unsigned i) {
     assert(i < getNumArgOperands() && "Out of bounds!");
-    return getOperandUse(i);
+    return User::getOperandUse(i);
   }
 
   /// If one of the arguments has the 'returned' attribute, return its
   /// operand value. Otherwise, return nullptr.
-  Value *getReturnedArgOperand() const;
+  Value *getReturnedArgOperand() const {
+    unsigned Index;
+
+    if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
+      return getArgOperand(Index - AttributeList::FirstArgIndex);
+    if (const Function *F = getCalledFunction())
+      if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
+          Index)
+        return getArgOperand(Index - AttributeList::FirstArgIndex);
+
+    return nullptr;
+  }
+
+  User::op_iterator op_begin() {
+    return OperandTraits<CallBase>::op_begin(this);
+  }
+
+  User::const_op_iterator op_begin() const {
+    return OperandTraits<CallBase>::op_begin(const_cast<CallBase *>(this));
+  }
+
+  User::op_iterator op_end() { return OperandTraits<CallBase>::op_end(this); }
+
+  User::const_op_iterator op_end() const {
+    return OperandTraits<CallBase>::op_end(const_cast<CallBase *>(this));
+  }
+
+  Value *getOperand(unsigned i_nocapture) const {
+    assert(i_nocapture < OperandTraits<CallBase>::operands(this) &&
+           "getOperand() out of range!");
+    return cast_or_null<Value>(OperandTraits<CallBase>::op_begin(
+                                   const_cast<CallBase *>(this))[i_nocapture]
+                                   .get());
+  }
+
+  void setOperand(unsigned i_nocapture, Value *Val_nocapture) {
+    assert(i_nocapture < OperandTraits<CallBase>::operands(this) &&
+           "setOperand() out of range!");
+    OperandTraits<CallBase>::op_begin(this)[i_nocapture] = Val_nocapture;
+  }
+
+  unsigned getNumOperands() const {
+    return OperandTraits<CallBase>::operands(this);
+  }
+  template <int Idx_nocapture> Use &Op() {
+    return User::OpFrom<Idx_nocapture>(this);
+  }
+  template <int Idx_nocapture> const Use &Op() const {
+    return User::OpFrom<Idx_nocapture>(this);
+  }
+
+  /// Return the function called, or null if this is an
+  /// indirect function invocation.
+  ///
+  Function *getCalledFunction() const {
+    return dyn_cast<Function>(Op<-InstTy::ArgOffset>());
+  }
+
+  /// Determine whether this call has the given attribute.
+  bool hasFnAttr(Attribute::AttrKind Kind) const {
+    assert(Kind != Attribute::NoBuiltin &&
+           "Use CallBase::isNoBuiltin() to check for Attribute::NoBuiltin");
+    return hasFnAttrImpl(Kind);
+  }
+
+  /// Determine whether this call has the given attribute.
+  bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); }
 
   /// getCallingConv/setCallingConv - Get or set the calling convention of this
   /// function call.
@@ -1631,62 +1548,103 @@ public:
                                (ID << 2));
   }
 
-  /// Return the parameter attributes for this call.
-  ///
-  AttributeList getAttributes() const { return Attrs; }
-
-  /// Set the parameter attributes for this call.
-  ///
-  void setAttributes(AttributeList A) { Attrs = A; }
 
   /// adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attribute::AttrKind Kind);
+  void addAttribute(unsigned i, Attribute::AttrKind Kind) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addAttribute(getContext(), i, Kind);
+    setAttributes(PAL);
+  }
 
   /// adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attribute Attr);
+  void addAttribute(unsigned i, Attribute Attr) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addAttribute(getContext(), i, Attr);
+    setAttributes(PAL);
+  }
 
   /// Adds the attribute to the indicated argument
-  void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
+  void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
+    setAttributes(PAL);
+  }
 
   /// Adds the attribute to the indicated argument
-  void addParamAttr(unsigned ArgNo, Attribute Attr);
+  void addParamAttr(unsigned ArgNo, Attribute Attr) {
+    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
+    setAttributes(PAL);
+  }
 
   /// removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, Attribute::AttrKind Kind);
+  void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.removeAttribute(getContext(), i, Kind);
+    setAttributes(PAL);
+  }
 
   /// removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, StringRef Kind);
+  void removeAttribute(unsigned i, StringRef Kind) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.removeAttribute(getContext(), i, Kind);
+    setAttributes(PAL);
+  }
 
   /// Removes the attribute from the given argument
-  void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
+  void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    AttributeList PAL = getAttributes();
+    PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
+    setAttributes(PAL);
+  }
 
   /// Removes the attribute from the given argument
-  void removeParamAttr(unsigned ArgNo, StringRef Kind);
+  void removeParamAttr(unsigned ArgNo, StringRef Kind) {
+    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    AttributeList PAL = getAttributes();
+    PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
+    setAttributes(PAL);
+  }
 
   /// adds the dereferenceable attribute to the list of attributes.
-  void addDereferenceableAttr(unsigned i, uint64_t Bytes);
+  void addDereferenceableAttr(unsigned i, uint64_t Bytes) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
+    setAttributes(PAL);
+  }
 
   /// adds the dereferenceable_or_null attribute to the list of
   /// attributes.
-  void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
-
-  /// Determine whether this call has the given attribute.
-  bool hasFnAttr(Attribute::AttrKind Kind) const {
-    assert(Kind != Attribute::NoBuiltin &&
-           "Use CallInst::isNoBuiltin() to check for Attribute::NoBuiltin");
-    return hasFnAttrImpl(Kind);
-  }
-
-  /// Determine whether this call has the given attribute.
-  bool hasFnAttr(StringRef Kind) const {
-    return hasFnAttrImpl(Kind);
+  void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+    setAttributes(PAL);
   }
 
   /// Determine whether the return value has the given attribute.
-  bool hasRetAttr(Attribute::AttrKind Kind) const;
+  bool hasRetAttr(Attribute::AttrKind Kind) const {
+    if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
+      return true;
+
+    // Look at the callee, if available.
+    if (const Function *F = getCalledFunction())
+      return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
+    return false;
+  }
 
   /// Determine whether the argument or parameter has the given attribute.
-  bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const;
+  bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
+    assert(ArgNo < getNumArgOperands() && "Param index out of bounds!");
+
+    if (Attrs.hasParamAttribute(ArgNo, Kind))
+      return true;
+    if (const Function *F = getCalledFunction())
+      return F->getAttributes().hasParamAttribute(ArgNo, Kind);
+    return false;
+  }
 
   /// Get the attribute of a given kind at a position.
   Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
@@ -1709,7 +1667,6 @@ public:
     assert(ArgNo < getNumArgOperands() && "Out of bounds");
     return getAttributes().getParamAttr(ArgNo, Kind);
   }
-
   /// Return true if the data operand at index \p i has the attribute \p
   /// A.
   ///
@@ -1723,7 +1680,28 @@ public:
   ///  \p i in [1, arg_size + 1)  -> argument number (\p i - 1)
   ///  \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
   ///     (\p i - 1) in the operand list.
-  bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const;
+  bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
+    // There are getNumOperands() - (InstTy::ArgOffset - 1) data operands.
+    // The last operand is the callee.
+    assert(i < (getNumOperands() - InstTy::ArgOffset + 1) &&
+           "Data operand index out of bounds!");
+
+    // The attribute A can either be directly specified, if the operand in
+    // question is a call argument; or be indirectly implied by the kind of its
+    // containing operand bundle, if the operand is a bundle operand.
+
+    if (i == AttributeList::ReturnIndex)
+      return hasRetAttr(Kind);
+
+    // FIXME: Avoid these i - 1 calculations and update the API to use
+    // zero-based indices.
+    if (i < (getNumArgOperands() + 1))
+      return paramHasAttr(i - 1, Kind);
+
+    assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
+           "Must be either a call argument or an operand bundle!");
+    return bundleOperandHasAttr(i - 1, Kind);
+  }
 
   /// Extract the alignment of the return value.
   unsigned getRetAlignment() const { return Attrs.getRetAlignment(); }
@@ -1745,7 +1723,7 @@ public:
     return Attrs.getDereferenceableOrNullBytes(i);
   }
 
-  /// @brief Determine if the return value is marked with NoAlias attribute.
+  /// Determine if the return value is marked with NoAlias attribute.
   bool returnDoesNotAlias() const {
     return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
   }
@@ -1765,15 +1743,6 @@ public:
   void setIsNoInline() {
     addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
   }
-
-  /// Return true if the call can return twice
-  bool canReturnTwice() const {
-    return hasFnAttr(Attribute::ReturnsTwice);
-  }
-  void setCanReturnTwice() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice);
-  }
-
   /// Determine if the call does not access memory.
   bool doesNotAccessMemory() const {
     return hasFnAttr(Attribute::ReadNone);
@@ -1798,7 +1767,7 @@ public:
     addAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly);
   }
 
-  /// @brief Determine if the call can access memmory only using pointers based
+  /// Determine if the call can access memmory only using pointers based
   /// on its arguments.
   bool onlyAccessesArgMemory() const {
     return hasFnAttr(Attribute::ArgMemOnly);
@@ -1807,7 +1776,7 @@ public:
     addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly);
   }
 
-  /// @brief Determine if the function may only access memory that is
+  /// Determine if the function may only access memory that is
   /// inaccessible from the IR.
   bool onlyAccessesInaccessibleMemory() const {
     return hasFnAttr(Attribute::InaccessibleMemOnly);
@@ -1816,7 +1785,7 @@ public:
     addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly);
   }
 
-  /// @brief Determine if the function may only access memory that is
+  /// Determine if the function may only access memory that is
   /// either inaccessible from the IR or pointed to by its arguments.
   bool onlyAccessesInaccessibleMemOrArgMem() const {
     return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
@@ -1824,26 +1793,28 @@ public:
   void setOnlyAccessesInaccessibleMemOrArgMem() {
     addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOrArgMemOnly);
   }
-
   /// Determine if the call cannot return.
   bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
   void setDoesNotReturn() {
     addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
   }
 
+  /// Determine if the call should not perform indirect branch tracking.
+  bool doesNoCfCheck() const { return hasFnAttr(Attribute::NoCfCheck); }
+
   /// Determine if the call cannot unwind.
   bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
   void setDoesNotThrow() {
     addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
   }
 
-  /// Determine if the call cannot be duplicated.
+  /// Determine if the invoke cannot be duplicated.
   bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); }
   void setCannotDuplicate() {
     addAttribute(AttributeList::FunctionIndex, Attribute::NoDuplicate);
   }
 
-  /// Determine if the call is convergent
+  /// Determine if the invoke is convergent
   bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
   void setConvergent() {
     addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
@@ -1866,18 +1837,10 @@ public:
   bool hasByValArgument() const {
     return Attrs.hasAttrSomewhere(Attribute::ByVal);
   }
-
-  /// Return the function called, or null if this is an
-  /// indirect function invocation.
-  ///
-  Function *getCalledFunction() const {
-    return dyn_cast<Function>(Op<-1>());
-  }
-
   /// Get a pointer to the function that is invoked by this
   /// instruction.
-  const Value *getCalledValue() const { return Op<-1>(); }
-        Value *getCalledValue()       { return Op<-1>(); }
+  const Value *getCalledValue() const { return Op<-InstTy::ArgOffset>(); }
+  Value *getCalledValue() { return Op<-InstTy::ArgOffset>(); }
 
   /// Set the function called.
   void setCalledFunction(Value* Fn) {
@@ -1889,23 +1852,10 @@ public:
     this->FTy = FTy;
     assert(FTy == cast<FunctionType>(
                       cast<PointerType>(Fn->getType())->getElementType()));
-    Op<-1>() = Fn;
+    Op<-InstTy::ArgOffset>() = Fn;
   }
 
-  /// Check if this call is an inline asm statement.
-  bool isInlineAsm() const {
-    return isa<InlineAsm>(Op<-1>());
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Call;
-  }
-  static bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-
-private:
+protected:
   template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
     if (Attrs.hasAttribute(AttributeList::FunctionIndex, Kind))
       return true;
@@ -1920,7 +1870,227 @@ private:
                                              Kind);
     return false;
   }
+};
+
+//===----------------------------------------------------------------------===//
+/// This class represents a function call, abstracting a target
+/// machine's calling convention.  This class uses low bit of the SubClassData
+/// field to indicate whether or not this is a tail call.  The rest of the bits
+/// hold the calling convention of the call.
+///
+class CallInst : public CallBase<CallInst> {
+  friend class OperandBundleUser<CallInst, User::op_iterator>;
+
+  CallInst(const CallInst &CI);
+
+  /// Construct a CallInst given a range of arguments.
+  /// Construct a CallInst from a range of arguments
+  inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
+                  ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+                  Instruction *InsertBefore);
+
+  inline CallInst(Value *Func, ArrayRef<Value *> Args,
+                  ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+                  Instruction *InsertBefore)
+      : CallInst(cast<FunctionType>(
+                     cast<PointerType>(Func->getType())->getElementType()),
+                 Func, Args, Bundles, NameStr, InsertBefore) {}
+
+  inline CallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
+                  Instruction *InsertBefore)
+      : CallInst(Func, Args, None, NameStr, InsertBefore) {}
+
+  /// Construct a CallInst given a range of arguments.
+  /// Construct a CallInst from a range of arguments
+  inline CallInst(Value *Func, ArrayRef<Value *> Args,
+                  ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+                  BasicBlock *InsertAtEnd);
+
+  explicit CallInst(Value *F, const Twine &NameStr, Instruction *InsertBefore);
+
+  CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  void init(Value *Func, ArrayRef<Value *> Args,
+            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
+    init(cast<FunctionType>(
+             cast<PointerType>(Func->getType())->getElementType()),
+         Func, Args, Bundles, NameStr);
+  }
+  void init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
+            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
+  void init(Value *Func, const Twine &NameStr);
+
+protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+
+  CallInst *cloneImpl() const;
+
+public:
+  static constexpr int ArgOffset = 1;
+
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          ArrayRef<OperandBundleDef> Bundles = None,
+                          const Twine &NameStr = "",
+                          Instruction *InsertBefore = nullptr) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, Args, Bundles, NameStr, InsertBefore);
+  }
+
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          const Twine &NameStr,
+                          Instruction *InsertBefore = nullptr) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, Args, None, NameStr, InsertBefore);
+  }
+
+  static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
+                          const Twine &NameStr,
+                          Instruction *InsertBefore = nullptr) {
+    return new (unsigned(Args.size() + 1))
+        CallInst(Ty, Func, Args, None, NameStr, InsertBefore);
+  }
+
+  static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
+                          ArrayRef<OperandBundleDef> Bundles = None,
+                          const Twine &NameStr = "",
+                          Instruction *InsertBefore = nullptr) {
+    const unsigned TotalOps =
+        unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
+    const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+    return new (TotalOps, DescriptorBytes)
+        CallInst(Ty, Func, Args, Bundles, NameStr, InsertBefore);
+  }
+
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          ArrayRef<OperandBundleDef> Bundles,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    const unsigned TotalOps =
+        unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
+    const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+    return new (TotalOps, DescriptorBytes)
+        CallInst(Func, Args, Bundles, NameStr, InsertAtEnd);
+  }
+
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return new (unsigned(Args.size() + 1))
+        CallInst(Func, Args, None, NameStr, InsertAtEnd);
+  }
+
+  static CallInst *Create(Value *F, const Twine &NameStr = "",
+                          Instruction *InsertBefore = nullptr) {
+    return new (1) CallInst(F, NameStr, InsertBefore);
+  }
+
+  static CallInst *Create(Value *F, const Twine &NameStr,
+                          BasicBlock *InsertAtEnd) {
+    return new (1) CallInst(F, NameStr, InsertAtEnd);
+  }
+
+  /// Create a clone of \p CI with a different set of operand bundles and
+  /// insert it before \p InsertPt.
+  ///
+  /// The returned call instruction is identical \p CI in every way except that
+  /// the operand bundles for the new instruction are set to the operand bundles
+  /// in \p Bundles.
+  static CallInst *Create(CallInst *CI, ArrayRef<OperandBundleDef> Bundles,
+                          Instruction *InsertPt = nullptr);
+
+  /// Generate the IR for a call to malloc:
+  /// 1. Compute the malloc call's argument as the specified type's size,
+  ///    possibly multiplied by the array size if the array size is not
+  ///    constant 1.
+  /// 2. Call malloc with that argument.
+  /// 3. Bitcast the result of the malloc call to the specified type.
+  static Instruction *CreateMalloc(Instruction *InsertBefore, Type *IntPtrTy,
+                                   Type *AllocTy, Value *AllocSize,
+                                   Value *ArraySize = nullptr,
+                                   Function *MallocF = nullptr,
+                                   const Twine &Name = "");
+  static Instruction *CreateMalloc(BasicBlock *InsertAtEnd, Type *IntPtrTy,
+                                   Type *AllocTy, Value *AllocSize,
+                                   Value *ArraySize = nullptr,
+                                   Function *MallocF = nullptr,
+                                   const Twine &Name = "");
+  static Instruction *CreateMalloc(Instruction *InsertBefore, Type *IntPtrTy,
+                                   Type *AllocTy, Value *AllocSize,
+                                   Value *ArraySize = nullptr,
+                                   ArrayRef<OperandBundleDef> Bundles = None,
+                                   Function *MallocF = nullptr,
+                                   const Twine &Name = "");
+  static Instruction *CreateMalloc(BasicBlock *InsertAtEnd, Type *IntPtrTy,
+                                   Type *AllocTy, Value *AllocSize,
+                                   Value *ArraySize = nullptr,
+                                   ArrayRef<OperandBundleDef> Bundles = None,
+                                   Function *MallocF = nullptr,
+                                   const Twine &Name = "");
+  /// Generate the IR for a call to the builtin free function.
+  static Instruction *CreateFree(Value *Source, Instruction *InsertBefore);
+  static Instruction *CreateFree(Value *Source, BasicBlock *InsertAtEnd);
+  static Instruction *CreateFree(Value *Source,
+                                 ArrayRef<OperandBundleDef> Bundles,
+                                 Instruction *InsertBefore);
+  static Instruction *CreateFree(Value *Source,
+                                 ArrayRef<OperandBundleDef> Bundles,
+                                 BasicBlock *InsertAtEnd);
+
+  // Note that 'musttail' implies 'tail'.
+  enum TailCallKind {
+    TCK_None = 0,
+    TCK_Tail = 1,
+    TCK_MustTail = 2,
+    TCK_NoTail = 3
+  };
+  TailCallKind getTailCallKind() const {
+    return TailCallKind(getSubclassDataFromInstruction() & 3);
+  }
+
+  bool isTailCall() const {
+    unsigned Kind = getSubclassDataFromInstruction() & 3;
+    return Kind == TCK_Tail || Kind == TCK_MustTail;
+  }
+
+  bool isMustTailCall() const {
+    return (getSubclassDataFromInstruction() & 3) == TCK_MustTail;
+  }
+
+  bool isNoTailCall() const {
+    return (getSubclassDataFromInstruction() & 3) == TCK_NoTail;
+  }
+
+  void setTailCall(bool isTC = true) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~3) |
+                               unsigned(isTC ? TCK_Tail : TCK_None));
+  }
+
+  void setTailCallKind(TailCallKind TCK) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~3) |
+                               unsigned(TCK));
+  }
+
+  /// Return true if the call can return twice
+  bool canReturnTwice() const { return hasFnAttr(Attribute::ReturnsTwice); }
+  void setCanReturnTwice() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice);
+  }
+
+  /// Check if this call is an inline asm statement.
+  bool isInlineAsm() const { return isa<InlineAsm>(Op<-1>()); }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Call;
+  }
+  static bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
 
+private:
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
   void setInstructionSubclassData(unsigned short D) {
@@ -1929,17 +2099,19 @@ private:
 };
 
 template <>
-struct OperandTraits<CallInst> : public VariadicOperandTraits<CallInst, 1> {
-};
+struct OperandTraits<CallBase<CallInst>>
+    : public VariadicOperandTraits<CallBase<CallInst>, 1> {};
 
 CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
                    ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
                    BasicBlock *InsertAtEnd)
-    : Instruction(
-          cast<FunctionType>(cast<PointerType>(Func->getType())
-                                 ->getElementType())->getReturnType(),
-          Instruction::Call, OperandTraits<CallInst>::op_end(this) -
-                                 (Args.size() + CountBundleInputs(Bundles) + 1),
+    : CallBase<CallInst>(
+          cast<FunctionType>(
+              cast<PointerType>(Func->getType())->getElementType())
+              ->getReturnType(),
+          Instruction::Call,
+          OperandTraits<CallBase<CallInst>>::op_end(this) -
+              (Args.size() + CountBundleInputs(Bundles) + 1),
           unsigned(Args.size() + CountBundleInputs(Bundles) + 1), InsertAtEnd) {
   init(Func, Args, Bundles, NameStr);
 }
@@ -1947,19 +2119,14 @@ CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
 CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
                    ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
                    Instruction *InsertBefore)
-    : Instruction(Ty->getReturnType(), Instruction::Call,
-                  OperandTraits<CallInst>::op_end(this) -
-                      (Args.size() + CountBundleInputs(Bundles) + 1),
-                  unsigned(Args.size() + CountBundleInputs(Bundles) + 1),
-                  InsertBefore) {
+    : CallBase<CallInst>(Ty->getReturnType(), Instruction::Call,
+                         OperandTraits<CallBase<CallInst>>::op_end(this) -
+                             (Args.size() + CountBundleInputs(Bundles) + 1),
+                         unsigned(Args.size() + CountBundleInputs(Bundles) + 1),
+                         InsertBefore) {
   init(Ty, Func, Args, Bundles, NameStr);
 }
 
-// Note: if you get compile errors about private methods then
-//       please update your code to use the high-level operand
-//       interfaces. See line 943 above.
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CallInst, Value)
-
 //===----------------------------------------------------------------------===//
 //                               SelectInst Class
 //===----------------------------------------------------------------------===//
@@ -2263,7 +2430,7 @@ public:
 
   /// Return the shuffle mask value for the specified element of the mask.
   /// Return -1 if the element is undef.
-  static int getMaskValue(Constant *Mask, unsigned Elt);
+  static int getMaskValue(const Constant *Mask, unsigned Elt);
 
   /// Return the shuffle mask value of this instruction for the given element
   /// index. Return -1 if the element is undef.
@@ -2273,7 +2440,8 @@ public:
 
   /// Convert the input shuffle mask operand to a vector of integers. Undefined
   /// elements of the mask are returned as -1.
-  static void getShuffleMask(Constant *Mask, SmallVectorImpl<int> &Result);
+  static void getShuffleMask(const Constant *Mask,
+                             SmallVectorImpl<int> &Result);
 
   /// Return the mask for this instruction as a vector of integers. Undefined
   /// elements of the mask are returned as -1.
@@ -2287,6 +2455,176 @@ public:
     return Mask;
   }
 
+  /// Return true if this shuffle returns a vector with a different number of
+  /// elements than its source elements.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2>
+  bool changesLength() const {
+    unsigned NumSourceElts = Op<0>()->getType()->getVectorNumElements();
+    unsigned NumMaskElts = getMask()->getType()->getVectorNumElements();
+    return NumSourceElts != NumMaskElts;
+  }
+
+  /// Return true if this shuffle mask chooses elements from exactly one source
+  /// vector.
+  /// Example: <7,5,undef,7>
+  /// This assumes that vector operands are the same length as the mask.
+  static bool isSingleSourceMask(ArrayRef<int> Mask);
+  static bool isSingleSourceMask(const Constant *Mask) {
+    assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+    SmallVector<int, 16> MaskAsInts;
+    getShuffleMask(Mask, MaskAsInts);
+    return isSingleSourceMask(MaskAsInts);
+  }
+
+  /// Return true if this shuffle chooses elements from exactly one source
+  /// vector without changing the length of that vector.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <3,0,undef,3>
+  /// TODO: Optionally allow length-changing shuffles.
+  bool isSingleSource() const {
+    return !changesLength() && isSingleSourceMask(getMask());
+  }
+
+  /// Return true if this shuffle mask chooses elements from exactly one source
+  /// vector without lane crossings. A shuffle using this mask is not
+  /// necessarily a no-op because it may change the number of elements from its
+  /// input vectors or it may provide demanded bits knowledge via undef lanes.
+  /// Example: <undef,undef,2,3>
+  static bool isIdentityMask(ArrayRef<int> Mask);
+  static bool isIdentityMask(const Constant *Mask) {
+    assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+    SmallVector<int, 16> MaskAsInts;
+    getShuffleMask(Mask, MaskAsInts);
+    return isIdentityMask(MaskAsInts);
+  }
+
+  /// Return true if this shuffle mask chooses elements from exactly one source
+  /// vector without lane crossings and does not change the number of elements
+  /// from its input vectors.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef>
+  /// TODO: Optionally allow length-changing shuffles.
+  bool isIdentity() const {
+    return !changesLength() && isIdentityMask(getShuffleMask());
+  }
+
+  /// Return true if this shuffle mask chooses elements from its source vectors
+  /// without lane crossings. A shuffle using this mask would be
+  /// equivalent to a vector select with a constant condition operand.
+  /// Example: <4,1,6,undef>
+  /// This returns false if the mask does not choose from both input vectors.
+  /// In that case, the shuffle is better classified as an identity shuffle.
+  /// This assumes that vector operands are the same length as the mask
+  /// (a length-changing shuffle can never be equivalent to a vector select).
+  static bool isSelectMask(ArrayRef<int> Mask);
+  static bool isSelectMask(const Constant *Mask) {
+    assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+    SmallVector<int, 16> MaskAsInts;
+    getShuffleMask(Mask, MaskAsInts);
+    return isSelectMask(MaskAsInts);
+  }
+
+  /// Return true if this shuffle chooses elements from its source vectors
+  /// without lane crossings and all operands have the same number of elements.
+  /// In other words, this shuffle is equivalent to a vector select with a
+  /// constant condition operand.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,1,6,3>
+  /// This returns false if the mask does not choose from both input vectors.
+  /// In that case, the shuffle is better classified as an identity shuffle.
+  /// TODO: Optionally allow length-changing shuffles.
+  bool isSelect() const {
+    return !changesLength() && isSelectMask(getMask());
+  }
+
+  /// Return true if this shuffle mask swaps the order of elements from exactly
+  /// one source vector.
+  /// Example: <7,6,undef,4>
+  /// This assumes that vector operands are the same length as the mask.
+  static bool isReverseMask(ArrayRef<int> Mask);
+  static bool isReverseMask(const Constant *Mask) {
+    assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+    SmallVector<int, 16> MaskAsInts;
+    getShuffleMask(Mask, MaskAsInts);
+    return isReverseMask(MaskAsInts);
+  }
+
+  /// Return true if this shuffle swaps the order of elements from exactly
+  /// one source vector.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <3,undef,1,undef>
+  /// TODO: Optionally allow length-changing shuffles.
+  bool isReverse() const {
+    return !changesLength() && isReverseMask(getMask());
+  }
+
+  /// Return true if this shuffle mask chooses all elements with the same value
+  /// as the first element of exactly one source vector.
+  /// Example: <4,undef,undef,4>
+  /// This assumes that vector operands are the same length as the mask.
+  static bool isZeroEltSplatMask(ArrayRef<int> Mask);
+  static bool isZeroEltSplatMask(const Constant *Mask) {
+    assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+    SmallVector<int, 16> MaskAsInts;
+    getShuffleMask(Mask, MaskAsInts);
+    return isZeroEltSplatMask(MaskAsInts);
+  }
+
+  /// Return true if all elements of this shuffle are the same value as the
+  /// first element of exactly one source vector without changing the length
+  /// of that vector.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,0,undef,0>
+  /// TODO: Optionally allow length-changing shuffles.
+  /// TODO: Optionally allow splats from other elements.
+  bool isZeroEltSplat() const {
+    return !changesLength() && isZeroEltSplatMask(getMask());
+  }
+
+  /// Return true if this shuffle mask is a transpose mask.
+  /// Transpose vector masks transpose a 2xn matrix. They read corresponding
+  /// even- or odd-numbered vector elements from two n-dimensional source
+  /// vectors and write each result into consecutive elements of an
+  /// n-dimensional destination vector. Two shuffles are necessary to complete
+  /// the transpose, one for the even elements and another for the odd elements.
+  /// This description closely follows how the TRN1 and TRN2 AArch64
+  /// instructions operate.
+  ///
+  /// For example, a simple 2x2 matrix can be transposed with:
+  ///
+  ///   ; Original matrix
+  ///   m0 = < a, b >
+  ///   m1 = < c, d >
+  ///
+  ///   ; Transposed matrix
+  ///   t0 = < a, c > = shufflevector m0, m1, < 0, 2 >
+  ///   t1 = < b, d > = shufflevector m0, m1, < 1, 3 >
+  ///
+  /// For matrices having greater than n columns, the resulting nx2 transposed
+  /// matrix is stored in two result vectors such that one vector contains
+  /// interleaved elements from all the even-numbered rows and the other vector
+  /// contains interleaved elements from all the odd-numbered rows. For example,
+  /// a 2x4 matrix can be transposed with:
+  ///
+  ///   ; Original matrix
+  ///   m0 = < a, b, c, d >
+  ///   m1 = < e, f, g, h >
+  ///
+  ///   ; Transposed matrix
+  ///   t0 = < a, e, c, g > = shufflevector m0, m1 < 0, 4, 2, 6 >
+  ///   t1 = < b, f, d, h > = shufflevector m0, m1 < 1, 5, 3, 7 >
+  static bool isTransposeMask(ArrayRef<int> Mask);
+  static bool isTransposeMask(const Constant *Mask) {
+    assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+    SmallVector<int, 16> MaskAsInts;
+    getShuffleMask(Mask, MaskAsInts);
+    return isTransposeMask(MaskAsInts);
+  }
+
+  /// Return true if this shuffle transposes the elements of its inputs without
+  /// changing the length of the vectors. This operation may also be known as a
+  /// merge or interleave. See the description for isTransposeMask() for the
+  /// exact specification.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <0,4,2,6>
+  bool isTranspose() const {
+    return !changesLength() && isTransposeMask(getMask());
+  }
+
   /// Change values in a shuffle permute mask assuming the two vector operands
   /// of length InVecNumElts have swapped position.
   static void commuteShuffleMask(MutableArrayRef<int> Mask,
@@ -3547,13 +3885,9 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
 /// Invoke instruction.  The SubclassData field is used to hold the
 /// calling convention of the call.
 ///
-class InvokeInst : public TerminatorInst,
-                   public OperandBundleUser<InvokeInst, User::op_iterator> {
+class InvokeInst : public CallBase<InvokeInst> {
   friend class OperandBundleUser<InvokeInst, User::op_iterator>;
 
-  AttributeList Attrs;
-  FunctionType *FTy;
-
   InvokeInst(const InvokeInst &BI);
 
   /// Construct an InvokeInst given a range of arguments.
@@ -3580,7 +3914,6 @@ class InvokeInst : public TerminatorInst,
                     unsigned Values, const Twine &NameStr,
                     BasicBlock *InsertAtEnd);
 
-  bool hasDescriptor() const { return HasDescriptor; }
 
   void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
             ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
@@ -3601,6 +3934,7 @@ protected:
   InvokeInst *cloneImpl() const;
 
 public:
+  static constexpr int ArgOffset = 3;
   static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             const Twine &NameStr,
@@ -3674,299 +4008,15 @@ public:
   static InvokeInst *Create(InvokeInst *II, ArrayRef<OperandBundleDef> Bundles,
                             Instruction *InsertPt = nullptr);
 
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  FunctionType *getFunctionType() const { return FTy; }
-
-  void mutateFunctionType(FunctionType *FTy) {
-    mutateType(FTy->getReturnType());
-    this->FTy = FTy;
-  }
-
-  /// Return the number of invoke arguments.
-  ///
-  unsigned getNumArgOperands() const {
-    return getNumOperands() - getNumTotalBundleOperands() - 3;
-  }
-
-  /// getArgOperand/setArgOperand - Return/set the i-th invoke argument.
-  ///
-  Value *getArgOperand(unsigned i) const {
-    assert(i < getNumArgOperands() && "Out of bounds!");
-    return getOperand(i);
-  }
-  void setArgOperand(unsigned i, Value *v) {
-    assert(i < getNumArgOperands() && "Out of bounds!");
-    setOperand(i, v);
-  }
-
-  /// Return the iterator pointing to the beginning of the argument list.
-  op_iterator arg_begin() { return op_begin(); }
-
-  /// Return the iterator pointing to the end of the argument list.
-  op_iterator arg_end() {
-    // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee
-    return op_end() - getNumTotalBundleOperands() - 3;
-  }
-
-  /// Iteration adapter for range-for loops.
-  iterator_range<op_iterator> arg_operands() {
-    return make_range(arg_begin(), arg_end());
-  }
-
-  /// Return the iterator pointing to the beginning of the argument list.
-  const_op_iterator arg_begin() const { return op_begin(); }
-
-  /// Return the iterator pointing to the end of the argument list.
-  const_op_iterator arg_end() const {
-    // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee
-    return op_end() - getNumTotalBundleOperands() - 3;
-  }
-
-  /// Iteration adapter for range-for loops.
-  iterator_range<const_op_iterator> arg_operands() const {
-    return make_range(arg_begin(), arg_end());
-  }
-
-  /// Wrappers for getting the \c Use of a invoke argument.
-  const Use &getArgOperandUse(unsigned i) const {
-    assert(i < getNumArgOperands() && "Out of bounds!");
-    return getOperandUse(i);
-  }
-  Use &getArgOperandUse(unsigned i) {
-    assert(i < getNumArgOperands() && "Out of bounds!");
-    return getOperandUse(i);
-  }
-
-  /// If one of the arguments has the 'returned' attribute, return its
-  /// operand value. Otherwise, return nullptr.
-  Value *getReturnedArgOperand() const;
-
-  /// getCallingConv/setCallingConv - Get or set the calling convention of this
-  /// function call.
-  CallingConv::ID getCallingConv() const {
-    return static_cast<CallingConv::ID>(getSubclassDataFromInstruction());
-  }
-  void setCallingConv(CallingConv::ID CC) {
-    auto ID = static_cast<unsigned>(CC);
-    assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
-    setInstructionSubclassData(ID);
-  }
-
-  /// Return the parameter attributes for this invoke.
-  ///
-  AttributeList getAttributes() const { return Attrs; }
-
-  /// Set the parameter attributes for this invoke.
-  ///
-  void setAttributes(AttributeList A) { Attrs = A; }
-
-  /// adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attribute::AttrKind Kind);
-
-  /// adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attribute Attr);
-
-  /// Adds the attribute to the indicated argument
-  void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
-
-  /// removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, Attribute::AttrKind Kind);
-
-  /// removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, StringRef Kind);
-
-  /// Removes the attribute from the given argument
-  void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
-
-  /// adds the dereferenceable attribute to the list of attributes.
-  void addDereferenceableAttr(unsigned i, uint64_t Bytes);
-
-  /// adds the dereferenceable_or_null attribute to the list of
-  /// attributes.
-  void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
-
-  /// Determine whether this call has the given attribute.
-  bool hasFnAttr(Attribute::AttrKind Kind) const {
-    assert(Kind != Attribute::NoBuiltin &&
-           "Use CallInst::isNoBuiltin() to check for Attribute::NoBuiltin");
-    return hasFnAttrImpl(Kind);
-  }
-
-  /// Determine whether this call has the given attribute.
-  bool hasFnAttr(StringRef Kind) const {
-    return hasFnAttrImpl(Kind);
-  }
-
-  /// Determine whether the return value has the given attribute.
-  bool hasRetAttr(Attribute::AttrKind Kind) const;
-
-  /// Determine whether the argument or parameter has the given attribute.
-  bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const;
-
-  /// Get the attribute of a given kind at a position.
-  Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
-    return getAttributes().getAttribute(i, Kind);
-  }
-
-  /// Get the attribute of a given kind at a position.
-  Attribute getAttribute(unsigned i, StringRef Kind) const {
-    return getAttributes().getAttribute(i, Kind);
-  }
-
-  /// Return true if the data operand at index \p i has the attribute \p
-  /// A.
-  ///
-  /// Data operands include invoke arguments and values used in operand bundles,
-  /// but does not include the invokee operand, or the two successor blocks.
-  /// This routine dispatches to the underlying AttributeList or the
-  /// OperandBundleUser as appropriate.
-  ///
-  /// The index \p i is interpreted as
-  ///
-  ///  \p i == Attribute::ReturnIndex  -> the return value
-  ///  \p i in [1, arg_size + 1)  -> argument number (\p i - 1)
-  ///  \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
-  ///     (\p i - 1) in the operand list.
-  bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const;
-
-  /// Extract the alignment of the return value.
-  unsigned getRetAlignment() const { return Attrs.getRetAlignment(); }
-
-  /// Extract the alignment for a call or parameter (0=unknown).
-  unsigned getParamAlignment(unsigned ArgNo) const {
-    return Attrs.getParamAlignment(ArgNo);
-  }
-
-  /// Extract the number of dereferenceable bytes for a call or
-  /// parameter (0=unknown).
-  uint64_t getDereferenceableBytes(unsigned i) const {
-    return Attrs.getDereferenceableBytes(i);
-  }
-
-  /// Extract the number of dereferenceable_or_null bytes for a call or
-  /// parameter (0=unknown).
-  uint64_t getDereferenceableOrNullBytes(unsigned i) const {
-    return Attrs.getDereferenceableOrNullBytes(i);
-  }
-
-  /// @brief Determine if the return value is marked with NoAlias attribute.
-  bool returnDoesNotAlias() const {
-    return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
-  }
-
-  /// Return true if the call should not be treated as a call to a
-  /// builtin.
-  bool isNoBuiltin() const {
-    // We assert in hasFnAttr if one passes in Attribute::NoBuiltin, so we have
-    // to check it by hand.
-    return hasFnAttrImpl(Attribute::NoBuiltin) &&
-      !hasFnAttrImpl(Attribute::Builtin);
-  }
-
-  /// Determine if the call requires strict floating point semantics.
-  bool isStrictFP() const { return hasFnAttr(Attribute::StrictFP); }
-
-  /// Return true if the call should not be inlined.
-  bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
-  void setIsNoInline() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
-  }
-
-  /// Determine if the call does not access memory.
-  bool doesNotAccessMemory() const {
-    return hasFnAttr(Attribute::ReadNone);
-  }
-  void setDoesNotAccessMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
-  }
-
-  /// Determine if the call does not access or only reads memory.
-  bool onlyReadsMemory() const {
-    return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
-  }
-  void setOnlyReadsMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
-  }
-
-  /// Determine if the call does not access or only writes memory.
-  bool doesNotReadMemory() const {
-    return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly);
-  }
-  void setDoesNotReadMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly);
-  }
-
-  /// @brief Determine if the call access memmory only using it's pointer
-  /// arguments.
-  bool onlyAccessesArgMemory() const {
-    return hasFnAttr(Attribute::ArgMemOnly);
-  }
-  void setOnlyAccessesArgMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly);
-  }
-
-  /// @brief Determine if the function may only access memory that is
-  /// inaccessible from the IR.
-  bool onlyAccessesInaccessibleMemory() const {
-    return hasFnAttr(Attribute::InaccessibleMemOnly);
-  }
-  void setOnlyAccessesInaccessibleMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly);
-  }
-
-  /// @brief Determine if the function may only access memory that is
-  /// either inaccessible from the IR or pointed to by its arguments.
-  bool onlyAccessesInaccessibleMemOrArgMem() const {
-    return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
-  }
-  void setOnlyAccessesInaccessibleMemOrArgMem() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOrArgMemOnly);
-  }
-
-  /// Determine if the call cannot return.
-  bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
-  void setDoesNotReturn() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
-  }
+  /// Determine if the call should not perform indirect branch tracking.
+  bool doesNoCfCheck() const { return hasFnAttr(Attribute::NoCfCheck); }
 
   /// Determine if the call cannot unwind.
   bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
   void setDoesNotThrow() {
     addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
   }
-
-  /// Determine if the invoke cannot be duplicated.
-  bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); }
-  void setCannotDuplicate() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoDuplicate);
-  }
-
-  /// Determine if the invoke is convergent
-  bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
-  void setConvergent() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
-  }
-  void setNotConvergent() {
-    removeAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
-  }
-
-  /// Determine if the call returns a structure through first
-  /// pointer argument.
-  bool hasStructRetAttr() const {
-    if (getNumArgOperands() == 0)
-      return false;
-
-    // Be friendly and also check the callee.
-    return paramHasAttr(0, Attribute::StructRet);
-  }
-
-  /// Determine if any call argument is an aggregate passed by value.
-  bool hasByValArgument() const {
-    return Attrs.hasAttrSomewhere(Attribute::ByVal);
-  }
-
+  
   /// Return the function called, or null if this is an
   /// indirect function invocation.
   ///
@@ -4031,20 +4081,6 @@ public:
   }
 
 private:
-  template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
-    if (Attrs.hasAttribute(AttributeList::FunctionIndex, Kind))
-      return true;
-
-    // Operand bundles override attributes on the called function, but don't
-    // override attributes directly present on the invoke instruction.
-    if (isFnAttrDisallowedByOpBundle(Kind))
-      return false;
-
-    if (const Function *F = getCalledFunction())
-      return F->getAttributes().hasAttribute(AttributeList::FunctionIndex,
-                                             Kind);
-    return false;
-  }
 
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
@@ -4054,16 +4090,17 @@ private:
 };
 
 template <>
-struct OperandTraits<InvokeInst> : public VariadicOperandTraits<InvokeInst, 3> {
-};
+struct OperandTraits<CallBase<InvokeInst>>
+    : public VariadicOperandTraits<CallBase<InvokeInst>, 3> {};
 
 InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                        BasicBlock *IfException, ArrayRef<Value *> Args,
                        ArrayRef<OperandBundleDef> Bundles, unsigned Values,
                        const Twine &NameStr, Instruction *InsertBefore)
-    : TerminatorInst(Ty->getReturnType(), Instruction::Invoke,
-                     OperandTraits<InvokeInst>::op_end(this) - Values, Values,
-                     InsertBefore) {
+    : CallBase<InvokeInst>(Ty->getReturnType(), Instruction::Invoke,
+                           OperandTraits<CallBase<InvokeInst>>::op_end(this) -
+                               Values,
+                           Values, InsertBefore) {
   init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr);
 }
 
@@ -4071,15 +4108,16 @@ InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal,
                        BasicBlock *IfException, ArrayRef<Value *> Args,
                        ArrayRef<OperandBundleDef> Bundles, unsigned Values,
                        const Twine &NameStr, BasicBlock *InsertAtEnd)
-    : TerminatorInst(
-          cast<FunctionType>(cast<PointerType>(Func->getType())
-                                 ->getElementType())->getReturnType(),
-          Instruction::Invoke, OperandTraits<InvokeInst>::op_end(this) - Values,
-          Values, InsertAtEnd) {
+    : CallBase<InvokeInst>(
+          cast<FunctionType>(
+              cast<PointerType>(Func->getType())->getElementType())
+              ->getReturnType(),
+          Instruction::Invoke,
+          OperandTraits<CallBase<InvokeInst>>::op_end(this) - Values, Values,
+          InsertAtEnd) {
   init(Func, IfNormal, IfException, Args, Bundles, NameStr);
 }
 
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
 
 //===----------------------------------------------------------------------===//
 //                              ResumeInst Class
@@ -5190,6 +5228,26 @@ public:
   }
 };
 
+/// A helper function that returns the pointer operand of a load or store
+/// instruction. Returns nullptr if not load or store.
+inline Value *getLoadStorePointerOperand(Value *V) {
+  if (auto *Load = dyn_cast<LoadInst>(V))
+    return Load->getPointerOperand();
+  if (auto *Store = dyn_cast<StoreInst>(V))
+    return Store->getPointerOperand();
+  return nullptr;
+}
+
+/// A helper function that returns the pointer operand of a load, store
+/// or GEP instruction. Returns nullptr if not load, store, or GEP.
+inline Value *getPointerOperand(Value *V) {
+  if (auto *Ptr = getLoadStorePointerOperand(V))
+    return Ptr;
+  if (auto *Gep = dyn_cast<GetElementPtrInst>(V))
+    return Gep->getPointerOperand();
+  return nullptr;
+}
+
 } // end namespace llvm
 
 #endif // LLVM_IR_INSTRUCTIONS_H
diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h
index 2ca0a24cbae1..6650afcca7fb 100644
--- a/include/llvm/IR/IntrinsicInst.h
+++ b/include/llvm/IR/IntrinsicInst.h
@@ -93,6 +93,10 @@ namespace llvm {
       return cast<MetadataAsValue>(getArgOperand(2))->getMetadata();
     }
 
+    /// Get the size (in bits) of the variable, or fragment of the variable that
+    /// is described.
+    Optional<uint64_t> getFragmentSizeInBits() const;
+
     /// \name Casting methods
     /// @{
     static bool classof(const IntrinsicInst *I) {
@@ -100,6 +104,7 @@ namespace llvm {
       case Intrinsic::dbg_declare:
       case Intrinsic::dbg_value:
       case Intrinsic::dbg_addr:
+      case Intrinsic::dbg_label:
         return true;
       default: return false;
       }
@@ -159,6 +164,32 @@ namespace llvm {
     /// @}
   };
 
+  /// This represents the llvm.dbg.label instruction.
+  class DbgLabelInst : public DbgInfoIntrinsic {
+  public:
+    DILabel *getLabel() const {
+      return cast<DILabel>(getRawVariable());
+    }
+
+    Metadata *getRawVariable() const {
+      return cast<MetadataAsValue>(getArgOperand(0))->getMetadata();
+    }
+
+    Metadata *getRawExpression() const {
+      return nullptr;
+    }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    /// @{
+    static bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::dbg_label;
+    }
+    static bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+    /// @}
+  };
+
   /// This is the common base class for constrained floating point intrinsics.
   class ConstrainedFPIntrinsic : public IntrinsicInst {
   public:
@@ -243,6 +274,8 @@ namespace llvm {
       return cast<PointerType>(getRawDest()->getType())->getAddressSpace();
     }
 
+    unsigned getDestAlignment() const { return getParamAlignment(ARG_DEST); }
+
     /// Set the specified arguments of the instruction.
     void setDest(Value *Ptr) {
       assert(getRawDest()->getType() == Ptr->getType() &&
@@ -250,6 +283,13 @@ namespace llvm {
       setArgOperand(ARG_DEST, Ptr);
     }
 
+    void setDestAlignment(unsigned Align) {
+      removeParamAttr(ARG_DEST, Attribute::Alignment);
+      if (Align > 0)
+        addParamAttr(ARG_DEST,
+                     Attribute::getWithAlignment(getContext(), Align));
+    }
+
     void setLength(Value *L) {
       assert(getLength()->getType() == L->getType() &&
              "setLength called with value of wrong type!");
@@ -257,6 +297,71 @@ namespace llvm {
     }
   };
 
+  /// Common base class for all memory transfer intrinsics. Simply provides
+  /// common methods.
+  template <class BaseCL> class MemTransferBase : public BaseCL {
+  private:
+    enum { ARG_SOURCE = 1 };
+
+  public:
+    /// Return the arguments to the instruction.
+    Value *getRawSource() const {
+      return const_cast<Value *>(BaseCL::getArgOperand(ARG_SOURCE));
+    }
+    const Use &getRawSourceUse() const {
+      return BaseCL::getArgOperandUse(ARG_SOURCE);
+    }
+    Use &getRawSourceUse() { return BaseCL::getArgOperandUse(ARG_SOURCE); }
+
+    /// This is just like getRawSource, but it strips off any cast
+    /// instructions that feed it, giving the original input.  The returned
+    /// value is guaranteed to be a pointer.
+    Value *getSource() const { return getRawSource()->stripPointerCasts(); }
+
+    unsigned getSourceAddressSpace() const {
+      return cast<PointerType>(getRawSource()->getType())->getAddressSpace();
+    }
+
+    unsigned getSourceAlignment() const {
+      return BaseCL::getParamAlignment(ARG_SOURCE);
+    }
+
+    void setSource(Value *Ptr) {
+      assert(getRawSource()->getType() == Ptr->getType() &&
+             "setSource called with pointer of wrong type!");
+      BaseCL::setArgOperand(ARG_SOURCE, Ptr);
+    }
+
+    void setSourceAlignment(unsigned Align) {
+      BaseCL::removeParamAttr(ARG_SOURCE, Attribute::Alignment);
+      if (Align > 0)
+        BaseCL::addParamAttr(ARG_SOURCE, Attribute::getWithAlignment(
+                                             BaseCL::getContext(), Align));
+    }
+  };
+
+  /// Common base class for all memset intrinsics. Simply provides
+  /// common methods.
+  template <class BaseCL> class MemSetBase : public BaseCL {
+  private:
+    enum { ARG_VALUE = 1 };
+
+  public:
+    Value *getValue() const {
+      return const_cast<Value *>(BaseCL::getArgOperand(ARG_VALUE));
+    }
+    const Use &getValueUse() const {
+      return BaseCL::getArgOperandUse(ARG_VALUE);
+    }
+    Use &getValueUse() { return BaseCL::getArgOperandUse(ARG_VALUE); }
+
+    void setValue(Value *Val) {
+      assert(getValue()->getType() == Val->getType() &&
+             "setValue called with value of wrong type!");
+      BaseCL::setArgOperand(ARG_VALUE, Val);
+    }
+  };
+
   // The common base class for the atomic memset/memmove/memcpy intrinsics
   // i.e. llvm.element.unordered.atomic.memset/memcpy/memmove
   class AtomicMemIntrinsic : public MemIntrinsicBase<AtomicMemIntrinsic> {
@@ -299,23 +404,8 @@ namespace llvm {
 
   /// This class represents atomic memset intrinsic
   // i.e. llvm.element.unordered.atomic.memset
-  class AtomicMemSetInst : public AtomicMemIntrinsic {
-  private:
-    enum { ARG_VALUE = 1 };
-
+  class AtomicMemSetInst : public MemSetBase<AtomicMemIntrinsic> {
   public:
-    Value *getValue() const {
-      return const_cast<Value *>(getArgOperand(ARG_VALUE));
-    }
-    const Use &getValueUse() const { return getArgOperandUse(ARG_VALUE); }
-    Use &getValueUse() { return getArgOperandUse(ARG_VALUE); }
-
-    void setValue(Value *Val) {
-      assert(getValue()->getType() == Val->getType() &&
-             "setValue called with value of wrong type!");
-      setArgOperand(ARG_VALUE, Val);
-    }
-
     static bool classof(const IntrinsicInst *I) {
       return I->getIntrinsicID() == Intrinsic::memset_element_unordered_atomic;
     }
@@ -326,33 +416,8 @@ namespace llvm {
 
   // This class wraps the atomic memcpy/memmove intrinsics
   // i.e. llvm.element.unordered.atomic.memcpy/memmove
-  class AtomicMemTransferInst : public AtomicMemIntrinsic {
-  private:
-    enum { ARG_SOURCE = 1 };
-
+  class AtomicMemTransferInst : public MemTransferBase<AtomicMemIntrinsic> {
   public:
-    /// Return the arguments to the instruction.
-    Value *getRawSource() const {
-      return const_cast<Value *>(getArgOperand(ARG_SOURCE));
-    }
-    const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); }
-    Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); }
-
-    /// This is just like getRawSource, but it strips off any cast
-    /// instructions that feed it, giving the original input.  The returned
-    /// value is guaranteed to be a pointer.
-    Value *getSource() const { return getRawSource()->stripPointerCasts(); }
-
-    unsigned getSourceAddressSpace() const {
-      return cast<PointerType>(getRawSource()->getType())->getAddressSpace();
-    }
-
-    void setSource(Value *Ptr) {
-      assert(getRawSource()->getType() == Ptr->getType() &&
-             "setSource called with pointer of wrong type!");
-      setArgOperand(ARG_SOURCE, Ptr);
-    }
-
     static bool classof(const IntrinsicInst *I) {
       switch (I->getIntrinsicID()) {
       case Intrinsic::memcpy_element_unordered_atomic:
@@ -394,17 +459,9 @@ namespace llvm {
   /// This is the common base class for memset/memcpy/memmove.
   class MemIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
   private:
-    enum { ARG_ALIGN = 3, ARG_VOLATILE = 4 };
+    enum { ARG_VOLATILE = 3 };
 
   public:
-    ConstantInt *getAlignmentCst() const {
-      return cast<ConstantInt>(const_cast<Value *>(getArgOperand(ARG_ALIGN)));
-    }
-
-    unsigned getAlignment() const {
-      return getAlignmentCst()->getZExtValue();
-    }
-
     ConstantInt *getVolatileCst() const {
       return cast<ConstantInt>(
           const_cast<Value *>(getArgOperand(ARG_VOLATILE)));
@@ -414,14 +471,8 @@ namespace llvm {
       return !getVolatileCst()->isZero();
     }
 
-    void setAlignment(Constant *A) { setArgOperand(ARG_ALIGN, A); }
-
     void setVolatile(Constant *V) { setArgOperand(ARG_VOLATILE, V); }
 
-    Type *getAlignmentType() const {
-      return getArgOperand(ARG_ALIGN)->getType();
-    }
-
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static bool classof(const IntrinsicInst *I) {
       switch (I->getIntrinsicID()) {
@@ -438,19 +489,8 @@ namespace llvm {
   };
 
   /// This class wraps the llvm.memset intrinsic.
-  class MemSetInst : public MemIntrinsic {
+  class MemSetInst : public MemSetBase<MemIntrinsic> {
   public:
-    /// Return the arguments to the instruction.
-    Value *getValue() const { return const_cast<Value*>(getArgOperand(1)); }
-    const Use &getValueUse() const { return getArgOperandUse(1); }
-    Use &getValueUse() { return getArgOperandUse(1); }
-
-    void setValue(Value *Val) {
-      assert(getValue()->getType() == Val->getType() &&
-             "setValue called with value of wrong type!");
-      setArgOperand(1, Val);
-    }
-
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static bool classof(const IntrinsicInst *I) {
       return I->getIntrinsicID() == Intrinsic::memset;
@@ -461,28 +501,8 @@ namespace llvm {
   };
 
   /// This class wraps the llvm.memcpy/memmove intrinsics.
-  class MemTransferInst : public MemIntrinsic {
+  class MemTransferInst : public MemTransferBase<MemIntrinsic> {
   public:
-    /// Return the arguments to the instruction.
-    Value *getRawSource() const { return const_cast<Value*>(getArgOperand(1)); }
-    const Use &getRawSourceUse() const { return getArgOperandUse(1); }
-    Use &getRawSourceUse() { return getArgOperandUse(1); }
-
-    /// This is just like getRawSource, but it strips off any cast
-    /// instructions that feed it, giving the original input.  The returned
-    /// value is guaranteed to be a pointer.
-    Value *getSource() const { return getRawSource()->stripPointerCasts(); }
-
-    unsigned getSourceAddressSpace() const {
-      return cast<PointerType>(getRawSource()->getType())->getAddressSpace();
-    }
-
-    void setSource(Value *Ptr) {
-      assert(getRawSource()->getType() == Ptr->getType() &&
-             "setSource called with pointer of wrong type!");
-      setArgOperand(1, Ptr);
-    }
-
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static bool classof(const IntrinsicInst *I) {
       return I->getIntrinsicID() == Intrinsic::memcpy ||
@@ -551,23 +571,8 @@ namespace llvm {
   /// This class represents any memset intrinsic
   // i.e. llvm.element.unordered.atomic.memset
   // and  llvm.memset
-  class AnyMemSetInst : public AnyMemIntrinsic {
-  private:
-    enum { ARG_VALUE = 1 };
-
+  class AnyMemSetInst : public MemSetBase<AnyMemIntrinsic> {
   public:
-    Value *getValue() const {
-      return const_cast<Value *>(getArgOperand(ARG_VALUE));
-    }
-    const Use &getValueUse() const { return getArgOperandUse(ARG_VALUE); }
-    Use &getValueUse() { return getArgOperandUse(ARG_VALUE); }
-
-    void setValue(Value *Val) {
-      assert(getValue()->getType() == Val->getType() &&
-             "setValue called with value of wrong type!");
-      setArgOperand(ARG_VALUE, Val);
-    }
-
     static bool classof(const IntrinsicInst *I) {
       switch (I->getIntrinsicID()) {
       case Intrinsic::memset:
@@ -585,33 +590,8 @@ namespace llvm {
   // This class wraps any memcpy/memmove intrinsics
   // i.e. llvm.element.unordered.atomic.memcpy/memmove
   // and  llvm.memcpy/memmove
-  class AnyMemTransferInst : public AnyMemIntrinsic {
-  private:
-    enum { ARG_SOURCE = 1 };
-
+  class AnyMemTransferInst : public MemTransferBase<AnyMemIntrinsic> {
   public:
-    /// Return the arguments to the instruction.
-    Value *getRawSource() const {
-      return const_cast<Value *>(getArgOperand(ARG_SOURCE));
-    }
-    const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); }
-    Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); }
-
-    /// This is just like getRawSource, but it strips off any cast
-    /// instructions that feed it, giving the original input.  The returned
-    /// value is guaranteed to be a pointer.
-    Value *getSource() const { return getRawSource()->stripPointerCasts(); }
-
-    unsigned getSourceAddressSpace() const {
-      return cast<PointerType>(getRawSource()->getType())->getAddressSpace();
-    }
-
-    void setSource(Value *Ptr) {
-      assert(getRawSource()->getType() == Ptr->getType() &&
-             "setSource called with pointer of wrong type!");
-      setArgOperand(ARG_SOURCE, Ptr);
-    }
-
     static bool classof(const IntrinsicInst *I) {
       switch (I->getIntrinsicID()) {
       case Intrinsic::memcpy:
diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h
index fc79da7ae0e6..e1e17f983ff8 100644
--- a/include/llvm/IR/Intrinsics.h
+++ b/include/llvm/IR/Intrinsics.h
@@ -39,7 +39,7 @@ namespace Intrinsic {
 
     // Get the intrinsic enums generated from Intrinsics.td
 #define GET_INTRINSIC_ENUM_VALUES
-#include "llvm/IR/Intrinsics.gen"
+#include "llvm/IR/IntrinsicEnums.inc"
 #undef GET_INTRINSIC_ENUM_VALUES
     , num_intrinsics
   };
@@ -97,7 +97,7 @@ namespace Intrinsic {
   /// intrinsic. This is returned by getIntrinsicInfoTableEntries.
   struct IITDescriptor {
     enum IITDescriptorKind {
-      Void, VarArg, MMX, Token, Metadata, Half, Float, Double,
+      Void, VarArg, MMX, Token, Metadata, Half, Float, Double, Quad,
       Integer, Vector, Pointer, Struct,
       Argument, ExtendArgument, TruncArgument, HalfVecArgument,
       SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index a2a1f26292ce..64455573ff19 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -117,6 +117,7 @@ def IntrHasSideEffects : IntrinsicProperty;
 
 class LLVMType<ValueType vt> {
   ValueType VT = vt;
+  int isAny = 0;
 }
 
 class LLVMQualPointerType<LLVMType elty, int addrspace>
@@ -131,6 +132,8 @@ class LLVMPointerType<LLVMType elty>
 class LLVMAnyPointerType<LLVMType elty>
   : LLVMType<iPTRAny>{
   LLVMType ElTy = elty;
+
+  let isAny = 1;
 }
 
 // Match the type of another intrinsic parameter.  Number is an index into the
@@ -163,10 +166,12 @@ class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
 class LLVMHalfElementsVectorType<int num> : LLVMMatchType<num>;
 
 def llvm_void_ty       : LLVMType<isVoid>;
-def llvm_any_ty        : LLVMType<Any>;
-def llvm_anyint_ty     : LLVMType<iAny>;
-def llvm_anyfloat_ty   : LLVMType<fAny>;
-def llvm_anyvector_ty  : LLVMType<vAny>;
+let isAny = 1 in {
+  def llvm_any_ty        : LLVMType<Any>;
+  def llvm_anyint_ty     : LLVMType<iAny>;
+  def llvm_anyfloat_ty   : LLVMType<fAny>;
+  def llvm_anyvector_ty  : LLVMType<vAny>;
+}
 def llvm_i1_ty         : LLVMType<i1>;
 def llvm_i8_ty         : LLVMType<i8>;
 def llvm_i16_ty        : LLVMType<i16>;
@@ -249,7 +254,6 @@ def llvm_v8f64_ty      : LLVMType<v8f64>;    //  8 x double
 
 def llvm_vararg_ty     : LLVMType<isVoid>;   // this means vararg here
 
-
 //===----------------------------------------------------------------------===//
 // Intrinsic Definitions.
 //===----------------------------------------------------------------------===//
@@ -390,17 +394,17 @@ def int_instrprof_value_profile : Intrinsic<[],
 
 def int_memcpy  : Intrinsic<[],
                              [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
-                              llvm_i32_ty, llvm_i1_ty],
+                              llvm_i1_ty],
                             [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
                              WriteOnly<0>, ReadOnly<1>]>;
 def int_memmove : Intrinsic<[],
                             [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
-                             llvm_i32_ty, llvm_i1_ty],
+                             llvm_i1_ty],
                             [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
                              ReadOnly<1>]>;
 def int_memset  : Intrinsic<[],
                             [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
-                             llvm_i32_ty, llvm_i1_ty],
+                             llvm_i1_ty],
                             [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
 
 // FIXME: Add version of these floating point intrinsics which allow non-default
@@ -573,6 +577,10 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
   def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
   def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
   def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+  def int_fshl : Intrinsic<[llvm_anyint_ty],
+      [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
+  def int_fshr : Intrinsic<[llvm_anyint_ty],
+      [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
 }
 
 //===------------------------ Debugger Intrinsics -------------------------===//
@@ -595,6 +603,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
                                        [llvm_metadata_ty,
                                         llvm_metadata_ty,
                                         llvm_metadata_ty]>;
+  def int_dbg_label        : Intrinsic<[],
+                                       [llvm_metadata_ty]>;
 }
 
 //===------------------ Exception Handling Intrinsics----------------------===//
@@ -706,16 +716,26 @@ def int_invariant_end   : Intrinsic<[],
                                      llvm_anyptr_ty],
                                     [IntrArgMemOnly, NoCapture<2>]>;
 
-// invariant.group.barrier can't be marked with 'readnone' (IntrNoMem),
+// launder.invariant.group can't be marked with 'readnone' (IntrNoMem),
 // because it would cause CSE of two barriers with the same argument.
-// Readonly and argmemonly says that barrier only reads its argument and
-// it can be CSE only if memory didn't change between 2 barriers call,
-// which is valid.
+// Inaccessiblememonly says that the barrier doesn't read the argument,
+// but it changes state not accessible to this module. This way
+// we can DSE through the barrier because it doesn't read the value
+// after store. Although the barrier doesn't modify any memory it
+// can't be marked as readonly, because it would be possible to
+// CSE 2 barriers with store in between.
 // The argument also can't be marked with 'returned' attribute, because
 // it would remove barrier.
-def int_invariant_group_barrier : Intrinsic<[llvm_anyptr_ty],
+// Note that it is still experimental, which means that its semantics
+// might change in the future.
+def int_launder_invariant_group : Intrinsic<[llvm_anyptr_ty],
                                             [LLVMMatchType<0>],
-                                            [IntrReadMem, IntrArgMemOnly]>;
+                                            [IntrInaccessibleMemOnly, IntrSpeculatable]>;
+
+
+def int_strip_invariant_group : Intrinsic<[llvm_anyptr_ty],
+                                          [LLVMMatchType<0>],
+                                          [IntrSpeculatable, IntrNoMem]>;
 
 //===------------------------ Stackmap Intrinsics -------------------------===//
 //
@@ -768,6 +788,7 @@ def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
 def int_coro_end : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_i1_ty], []>;
 
 def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
 
 def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
@@ -874,6 +895,10 @@ def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty],
                                       [llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty],
                                       [IntrNoMem]>;
 
+// Create a branch funnel that implements an indirect call to a limited set of
+// callees. This needs to be a musttail call.
+def int_icall_branch_funnel : Intrinsic<[], [llvm_vararg_ty], []>;
+
 def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
                                  [IntrReadMem, IntrArgMemOnly]>;
 
@@ -883,6 +908,10 @@ def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
 // Takes a pointer to a string and the length of the string.
 def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
                                      [NoCapture<0>, ReadOnly<0>, IntrWriteMem]>;
+// Typed event logging for x-ray.
+// Takes a numeric type tag, a pointer to a string and the length of the string.
+def int_xray_typedevent : Intrinsic<[], [llvm_i16_ty, llvm_ptr_ty, llvm_i32_ty],
+                                        [NoCapture<1>, ReadOnly<1>, IntrWriteMem]>;
 //===----------------------------------------------------------------------===//
 
 //===------ Memory intrinsics with element-wise atomicity guarantees ------===//
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td
index 65c9aaab975d..688e863c1afe 100644
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -146,6 +146,14 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
   class AdvSIMD_CvtFPToFx_Intrinsic
     : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
                 [IntrNoMem]>;
+
+  class AdvSIMD_1Arg_Intrinsic
+    : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+  class AdvSIMD_Dot_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
+                [IntrNoMem]>;
 }
 
 // Arithmetic ops
@@ -244,7 +252,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
   // Vector Max
   def int_aarch64_neon_smax : AdvSIMD_2VectorArg_Intrinsic;
   def int_aarch64_neon_umax : AdvSIMD_2VectorArg_Intrinsic;
-  def int_aarch64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_fmax : AdvSIMD_2FloatArg_Intrinsic;
   def int_aarch64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic;
 
   // Vector Max Across Lanes
@@ -256,7 +264,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
   // Vector Min
   def int_aarch64_neon_smin : AdvSIMD_2VectorArg_Intrinsic;
   def int_aarch64_neon_umin : AdvSIMD_2VectorArg_Intrinsic;
-  def int_aarch64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_fmin : AdvSIMD_2FloatArg_Intrinsic;
   def int_aarch64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic;
 
   // Vector Min/Max Number
@@ -354,7 +362,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
   def int_aarch64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic;
 
   // Vector Absolute Value
-  def int_aarch64_neon_abs : AdvSIMD_1IntArg_Intrinsic;
+  def int_aarch64_neon_abs : AdvSIMD_1Arg_Intrinsic;
 
   // Vector Saturating Absolute Value
   def int_aarch64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic;
@@ -412,6 +420,10 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
   // Scalar FP Inexact Narrowing
   def int_aarch64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty],
                                         [IntrNoMem]>;
+
+  // v8.2-A Dot Product
+  def int_aarch64_neon_udot : AdvSIMD_Dot_Intrinsic;
+  def int_aarch64_neon_sdot : AdvSIMD_Dot_Intrinsic;
 }
 
 let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
@@ -572,6 +584,14 @@ def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
 def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
 
 let TargetPrefix = "aarch64" in {
+  class FPCR_Get_Intrinsic
+    : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
+}
+
+// FPCR
+def int_aarch64_get_fpcr : FPCR_Get_Intrinsic;
+
+let TargetPrefix = "aarch64" in {
   class Crypto_AES_DataKey_Intrinsic
     : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
 
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index cc08fe683272..8555db01645f 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -17,6 +17,13 @@ class AMDGPUReadPreloadRegisterIntrinsic
 class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<name>;
 
+// Used to tag image and resource intrinsics with information used to generate
+// mem operands.
+class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = 0> {
+  int RsrcArg = rsrcarg;
+  bit IsImage = isimage;
+}
+
 let TargetPrefix = "r600" in {
 
 multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz {
@@ -69,6 +76,59 @@ def int_r600_cube : Intrinsic<
   [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable]
 >;
 
+def int_r600_store_stream_output : Intrinsic<
+  [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []
+>;
+
+class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [
+  llvm_v4f32_ty, // Coord
+  llvm_i32_ty,   // offset_x
+  llvm_i32_ty,   // offset_y,
+  llvm_i32_ty,   // offset_z,
+  llvm_i32_ty,   // resource_id
+  llvm_i32_ty,   // samplerid
+  llvm_i32_ty,   // coord_type_x
+  llvm_i32_ty,   // coord_type_y
+  llvm_i32_ty,   // coord_type_z
+  llvm_i32_ty],  // coord_type_w
+  [IntrNoMem]
+>;
+
+class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [
+    llvm_v4i32_ty, // Coord
+    llvm_i32_ty,   // offset_x
+    llvm_i32_ty,   // offset_y,
+    llvm_i32_ty,   // offset_z,
+    llvm_i32_ty,   // resource_id
+    llvm_i32_ty,   // samplerid
+    llvm_i32_ty,   // coord_type_x
+    llvm_i32_ty,   // coord_type_y
+    llvm_i32_ty,   // coord_type_z
+    llvm_i32_ty],  // coord_type_w
+    [IntrNoMem]
+>;
+
+def int_r600_store_swizzle :
+  Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []
+>;
+
+def int_r600_tex : TextureIntrinsicFloatInput;
+def int_r600_texc : TextureIntrinsicFloatInput;
+def int_r600_txl : TextureIntrinsicFloatInput;
+def int_r600_txlc : TextureIntrinsicFloatInput;
+def int_r600_txb : TextureIntrinsicFloatInput;
+def int_r600_txbc : TextureIntrinsicFloatInput;
+def int_r600_txf : TextureIntrinsicInt32Input;
+def int_r600_txq : TextureIntrinsicInt32Input;
+def int_r600_ddx : TextureIntrinsicFloatInput;
+def int_r600_ddy : TextureIntrinsicFloatInput;
+
+def int_r600_dot4 : Intrinsic<[llvm_float_ty],
+  [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_r600_kill : Intrinsic<[], [llvm_float_ty], []>;
+
 } // End TargetPrefix = "r600"
 
 let TargetPrefix = "amdgcn" in {
@@ -83,22 +143,22 @@ defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
 
 def int_amdgcn_dispatch_ptr :
   GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
   [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_queue_ptr :
   GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
   [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_kernarg_segment_ptr :
   GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
   [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_implicitarg_ptr :
   GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
   [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_groupstaticsize :
@@ -111,7 +171,7 @@ def int_amdgcn_dispatch_id :
 
 def int_amdgcn_implicit_buffer_ptr :
   GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
   [IntrNoMem, IntrSpeculatable]>;
 
 // Set EXEC to the 64-bit value given.
@@ -238,6 +298,26 @@ def int_amdgcn_cvt_pkrtz : Intrinsic<
   [IntrNoMem, IntrSpeculatable]
 >;
 
+def int_amdgcn_cvt_pknorm_i16 : Intrinsic<
+  [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_cvt_pknorm_u16 : Intrinsic<
+  [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_cvt_pk_i16 : Intrinsic<
+  [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_cvt_pk_u16 : Intrinsic<
+  [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
+
 def int_amdgcn_class : Intrinsic<
   [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
@@ -280,6 +360,12 @@ def int_amdgcn_sffbh :
   [IntrNoMem, IntrSpeculatable]
 >;
 
+// v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support.
+def int_amdgcn_fmad_ftz :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]
+>;
 
 // Fields should mirror atomicrmw
 class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
@@ -295,163 +381,414 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
 def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
 def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
 
-class AMDGPUImageLoad<bit NoMem = 0> : Intrinsic <
-  [llvm_anyfloat_ty], // vdata(VGPR)
-  [llvm_anyint_ty,    // vaddr(VGPR)
-   llvm_anyint_ty,    // rsrc(SGPR)
-   llvm_i32_ty,       // dmask(imm)
-   llvm_i1_ty,        // glc(imm)
-   llvm_i1_ty,        // slc(imm)
-   llvm_i1_ty,        // lwe(imm)
-   llvm_i1_ty],       // da(imm)
-  !if(NoMem, [IntrNoMem], [IntrReadMem])>;
+class AMDGPULDSF32Intrin<string clang_builtin> :
+  GCCBuiltin<clang_builtin>,
+  Intrinsic<[llvm_float_ty],
+    [LLVMQualPointerType<llvm_float_ty, 3>,
+    llvm_float_ty,
+    llvm_i32_ty, // ordering
+    llvm_i32_ty, // scope
+    llvm_i1_ty], // isVolatile
+    [IntrArgMemOnly, NoCapture<0>]
+>;
 
-def int_amdgcn_image_load : AMDGPUImageLoad;
-def int_amdgcn_image_load_mip : AMDGPUImageLoad;
-def int_amdgcn_image_getresinfo : AMDGPUImageLoad<1>;
+def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
+def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
+def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;
 
-class AMDGPUImageStore : Intrinsic <
-  [],
-  [llvm_anyfloat_ty,  // vdata(VGPR)
-   llvm_anyint_ty,    // vaddr(VGPR)
-   llvm_anyint_ty,    // rsrc(SGPR)
-   llvm_i32_ty,       // dmask(imm)
-   llvm_i1_ty,        // glc(imm)
-   llvm_i1_ty,        // slc(imm)
-   llvm_i1_ty,        // lwe(imm)
-   llvm_i1_ty],       // da(imm)
-  []>;
-
-def int_amdgcn_image_store : AMDGPUImageStore;
-def int_amdgcn_image_store_mip : AMDGPUImageStore;
-
-class AMDGPUImageSample<bit NoMem = 0> : Intrinsic <
-    [llvm_anyfloat_ty], // vdata(VGPR)
-    [llvm_anyfloat_ty,  // vaddr(VGPR)
-     llvm_anyint_ty,    // rsrc(SGPR)
-     llvm_v4i32_ty,     // sampler(SGPR)
-     llvm_i32_ty,       // dmask(imm)
-     llvm_i1_ty,        // unorm(imm)
-     llvm_i1_ty,        // glc(imm)
-     llvm_i1_ty,        // slc(imm)
-     llvm_i1_ty,        // lwe(imm)
-     llvm_i1_ty],       // da(imm)
-     !if(NoMem, [IntrNoMem], [IntrReadMem])>;
-
-// Basic sample
-def int_amdgcn_image_sample : AMDGPUImageSample;
-def int_amdgcn_image_sample_cl : AMDGPUImageSample;
-def int_amdgcn_image_sample_d : AMDGPUImageSample;
-def int_amdgcn_image_sample_d_cl : AMDGPUImageSample;
-def int_amdgcn_image_sample_l : AMDGPUImageSample;
-def int_amdgcn_image_sample_b : AMDGPUImageSample;
-def int_amdgcn_image_sample_b_cl : AMDGPUImageSample;
-def int_amdgcn_image_sample_lz : AMDGPUImageSample;
-def int_amdgcn_image_sample_cd : AMDGPUImageSample;
-def int_amdgcn_image_sample_cd_cl : AMDGPUImageSample;
-
-// Sample with comparison
-def int_amdgcn_image_sample_c : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_cl : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_d : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_d_cl : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_l : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_b : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_b_cl : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_lz : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_cd : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_cd_cl : AMDGPUImageSample;
-
-// Sample with offsets
-def int_amdgcn_image_sample_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_cl_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_d_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_d_cl_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_l_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_b_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_b_cl_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_lz_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_cd_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_cd_cl_o : AMDGPUImageSample;
-
-// Sample with comparison and offsets
-def int_amdgcn_image_sample_c_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_cl_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_d_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_d_cl_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_l_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_b_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_b_cl_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_lz_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_cd_o : AMDGPUImageSample;
-def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUImageSample;
-
-// Basic gather4
-def int_amdgcn_image_gather4 : AMDGPUImageSample;
-def int_amdgcn_image_gather4_cl : AMDGPUImageSample;
-def int_amdgcn_image_gather4_l : AMDGPUImageSample;
-def int_amdgcn_image_gather4_b : AMDGPUImageSample;
-def int_amdgcn_image_gather4_b_cl : AMDGPUImageSample;
-def int_amdgcn_image_gather4_lz : AMDGPUImageSample;
-
-// Gather4 with comparison
-def int_amdgcn_image_gather4_c : AMDGPUImageSample;
-def int_amdgcn_image_gather4_c_cl : AMDGPUImageSample;
-def int_amdgcn_image_gather4_c_l : AMDGPUImageSample;
-def int_amdgcn_image_gather4_c_b : AMDGPUImageSample;
-def int_amdgcn_image_gather4_c_b_cl : AMDGPUImageSample;
-def int_amdgcn_image_gather4_c_lz : AMDGPUImageSample;
-
-// Gather4 with offsets
-def int_amdgcn_image_gather4_o : AMDGPUImageSample;
-def int_amdgcn_image_gather4_cl_o : AMDGPUImageSample;
-def int_amdgcn_image_gather4_l_o : AMDGPUImageSample;
-def int_amdgcn_image_gather4_b_o : AMDGPUImageSample;
-def int_amdgcn_image_gather4_b_cl_o : AMDGPUImageSample;
-def int_amdgcn_image_gather4_lz_o : AMDGPUImageSample;
-
-// Gather4 with comparison and offsets
-def int_amdgcn_image_gather4_c_o : AMDGPUImageSample;
-def int_amdgcn_image_gather4_c_cl_o : AMDGPUImageSample;
-def int_amdgcn_image_gather4_c_l_o : AMDGPUImageSample;
-def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample;
-def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample;
-def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample;
-
-def int_amdgcn_image_getlod : AMDGPUImageSample<1>;
-
-class AMDGPUImageAtomic : Intrinsic <
-  [llvm_i32_ty],
-  [llvm_i32_ty,       // vdata(VGPR)
-   llvm_anyint_ty,    // vaddr(VGPR)
-   llvm_v8i32_ty,     // rsrc(SGPR)
-   llvm_i1_ty,        // r128(imm)
-   llvm_i1_ty,        // da(imm)
-   llvm_i1_ty],       // slc(imm)
-  []>;
-
-def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_sub : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_smin : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_umin : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_smax : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_umax : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_and : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_or : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_xor : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_inc : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_dec : AMDGPUImageAtomic;
-def int_amdgcn_image_atomic_cmpswap : Intrinsic <
-  [llvm_i32_ty],
-  [llvm_i32_ty,       // src(VGPR)
-   llvm_i32_ty,       // cmp(VGPR)
-   llvm_anyint_ty,    // vaddr(VGPR)
-   llvm_v8i32_ty,     // rsrc(SGPR)
-   llvm_i1_ty,        // r128(imm)
-   llvm_i1_ty,        // da(imm)
-   llvm_i1_ty],       // slc(imm)
-  []>;
+} // TargetPrefix = "amdgcn"
+
+// New-style image intrinsics
+
+//////////////////////////////////////////////////////////////////////////
+// Dimension-aware image intrinsics framework
+//////////////////////////////////////////////////////////////////////////
+
+// Helper class to represent (type, name) combinations of arguments. The
+// argument names are explanatory and used as DAG operand names for codegen
+// pattern matching.
+class AMDGPUArg<LLVMType ty, string name> {
+  LLVMType Type = ty;
+  string Name = name;
+}
+
+// Return [AMDGPUArg<basety, names[0]>, AMDGPUArg<LLVMMatchType<0>, names[1]>, ...]
+class makeArgList<list<string> names, LLVMType basety> {
+  list<AMDGPUArg> ret =
+    !listconcat([AMDGPUArg<basety, names[0]>],
+                !foreach(name, !tail(names), AMDGPUArg<LLVMMatchType<0>, name>));
+}
+
+// Return arglist, with LLVMMatchType's references shifted by 'shift'.
+class arglistmatchshift<list<AMDGPUArg> arglist, int shift> {
+  list<AMDGPUArg> ret =
+    !foreach(arg, arglist,
+             !if(!isa<LLVMMatchType>(arg.Type),
+                 AMDGPUArg<LLVMMatchType<!add(!cast<LLVMMatchType>(arg.Type).Number, shift)>,
+                           arg.Name>,
+                 arg));
+}
+
+// Return the concatenation of the given arglists. LLVMMatchType's are adjusted
+// accordingly, and shifted by an additional 'shift'.
+class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> {
+  list<AMDGPUArg> ret =
+    !foldl([]<AMDGPUArg>, arglists, lhs, rhs,
+           !listconcat(
+             lhs,
+             arglistmatchshift<rhs,
+                               !add(shift, !foldl(0, lhs, a, b,
+                                                  !add(a, b.Type.isAny)))>.ret));
+}
+
+// Represent texture/image types / dimensionality.
+class AMDGPUDimProps<string name, list<string> coord_names, list<string> slice_names> {
+  AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME);
+  string Name = name; // e.g. "2darraymsaa"
+  bit DA = 0; // DA bit in MIMG encoding
+
+  list<AMDGPUArg> CoordSliceArgs =
+    makeArgList<!listconcat(coord_names, slice_names), llvm_anyfloat_ty>.ret;
+  list<AMDGPUArg> CoordSliceIntArgs =
+    makeArgList<!listconcat(coord_names, slice_names), llvm_anyint_ty>.ret;
+  list<AMDGPUArg> GradientArgs =
+    makeArgList<!listconcat(!foreach(name, coord_names, "d" # name # "dh"),
+                            !foreach(name, coord_names, "d" # name # "dv")),
+                llvm_anyfloat_ty>.ret;
+
+  bits<8> NumCoords = !size(CoordSliceArgs);
+  bits<8> NumGradients = !size(GradientArgs);
+}
+
+def AMDGPUDim1D : AMDGPUDimProps<"1d", ["s"], []>;
+def AMDGPUDim2D : AMDGPUDimProps<"2d", ["s", "t"], []>;
+def AMDGPUDim3D : AMDGPUDimProps<"3d", ["s", "t", "r"], []>;
+let DA = 1 in {
+  def AMDGPUDimCube : AMDGPUDimProps<"cube", ["s", "t"], ["face"]>;
+  def AMDGPUDim1DArray : AMDGPUDimProps<"1darray", ["s"], ["slice"]>;
+  def AMDGPUDim2DArray : AMDGPUDimProps<"2darray", ["s", "t"], ["slice"]>;
+}
+def AMDGPUDim2DMsaa : AMDGPUDimProps<"2dmsaa", ["s", "t"], ["fragid"]>;
+let DA = 1 in {
+  def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<"2darraymsaa", ["s", "t"], ["slice", "fragid"]>;
+}
+
+def AMDGPUDims {
+  list<AMDGPUDimProps> NoMsaa = [AMDGPUDim1D, AMDGPUDim2D, AMDGPUDim3D,
+                                 AMDGPUDimCube, AMDGPUDim1DArray,
+                                 AMDGPUDim2DArray];
+  list<AMDGPUDimProps> Msaa = [AMDGPUDim2DMsaa, AMDGPUDim2DArrayMsaa];
+  list<AMDGPUDimProps> All = !listconcat(NoMsaa, Msaa);
+}
+
+// Represent sample variants, i.e. _C, _O, _B, ... and combinations thereof.
+class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr> {
+  string UpperCaseMod = ucmod;
+  string LowerCaseMod = lcmod;
+
+  // {offset} {bias} {z-compare}
+  list<AMDGPUArg> ExtraAddrArgs = extra_addr;
+  bit Gradients = 0;
+
+  // Name of the {lod} or {clamp} argument that is appended to the coordinates,
+  // if any.
+  string LodOrClamp = "";
+}
+
+// AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE
+// AMDGPUSampleVariantsNoGradients: variants supported by IMAGE_GATHER4
+defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
+  multiclass AMDGPUSampleHelper_Offset<string ucmod, string lcmod,
+                                       list<AMDGPUArg> extra_addr> {
+    def NAME#lcmod : AMDGPUSampleVariant<ucmod, lcmod, extra_addr>;
+    def NAME#lcmod#_o : AMDGPUSampleVariant<
+        ucmod#"_O", lcmod#"_o", !listconcat([AMDGPUArg<llvm_i32_ty, "offset">], extra_addr)>;
+  }
+
+  multiclass AMDGPUSampleHelper_Compare<string ucmod, string lcmod,
+                                        list<AMDGPUArg> extra_addr> {
+    defm NAME : AMDGPUSampleHelper_Offset<ucmod, lcmod, extra_addr>;
+    defm NAME : AMDGPUSampleHelper_Offset<
+        "_C"#ucmod, "_c"#lcmod, !listconcat(extra_addr, [AMDGPUArg<llvm_float_ty, "zcompare">])>;
+  }
+
+  multiclass AMDGPUSampleHelper_Clamp<string ucmod, string lcmod,
+                                      list<AMDGPUArg> extra_addr> {
+    defm NAME : AMDGPUSampleHelper_Compare<ucmod, lcmod, extra_addr>;
+    let LodOrClamp = "clamp" in
+    defm NAME : AMDGPUSampleHelper_Compare<ucmod#"_CL", lcmod#"_cl", extra_addr>;
+  }
+
+  defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
+    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
+    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
+        "_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
+    let LodOrClamp = "lod" in
+    defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_L", "_l", []>;
+    defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_LZ", "_lz", []>;
+  }
+
+  let Gradients = 1 in {
+    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_D", "_d", []>;
+    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_CD", "_cd", []>;
+  }
+}
+
+// Helper class to capture the profile of a dimension-aware image intrinsic.
+// This information is used to generate the intrinsic's type and to inform
+// codegen pattern matching.
+class AMDGPUDimProfile<string opmod,
+                       AMDGPUDimProps dim> {
+  AMDGPUDimProps Dim = dim;
+  string OpMod = opmod; // the corresponding instruction is named IMAGE_OpMod
+
+  // These are entended to be overwritten by subclasses
+  bit IsSample = 0;
+  bit IsAtomic = 0;
+  list<LLVMType> RetTypes = [];
+  list<AMDGPUArg> DataArgs = [];
+  list<AMDGPUArg> ExtraAddrArgs = [];
+  bit Gradients = 0;
+  string LodClampMip = "";
+
+  int NumRetAndDataAnyTypes =
+    !foldl(0, !listconcat(RetTypes, !foreach(arg, DataArgs, arg.Type)), a, b,
+           !add(a, b.isAny));
+
+  list<AMDGPUArg> AddrArgs =
+    arglistconcat<[ExtraAddrArgs,
+                   !if(Gradients, dim.GradientArgs, []),
+                   !listconcat(!if(IsSample, dim.CoordSliceArgs, dim.CoordSliceIntArgs),
+                               !if(!eq(LodClampMip, ""),
+                                   []<AMDGPUArg>,
+                                   [AMDGPUArg<LLVMMatchType<0>, LodClampMip>]))],
+                  NumRetAndDataAnyTypes>.ret;
+  list<LLVMType> AddrTypes = !foreach(arg, AddrArgs, arg.Type);
+  list<AMDGPUArg> AddrDefaultArgs =
+    !foreach(arg, AddrArgs,
+             AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)),
+                           !if(IsSample, llvm_float_ty, llvm_i32_ty), arg.Type),
+                       arg.Name>);
+  list<AMDGPUArg> AddrA16Args =
+    !foreach(arg, AddrArgs,
+             AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)),
+                           !if(IsSample, llvm_half_ty, llvm_i16_ty), arg.Type),
+                       arg.Name>);
+}
+
+class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod, base.Dim> {
+  let IsSample = base.IsSample;
+  let IsAtomic = base.IsAtomic;
+  let RetTypes = base.RetTypes;
+  let DataArgs = base.DataArgs;
+  let ExtraAddrArgs = base.ExtraAddrArgs;
+  let Gradients = base.Gradients;
+  let LodClampMip = base.LodClampMip;
+}
+
+class AMDGPUDimSampleProfile<string opmod,
+                             AMDGPUDimProps dim,
+                             AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
+  let IsSample = 1;
+  let RetTypes = [llvm_anyfloat_ty];
+  let ExtraAddrArgs = sample.ExtraAddrArgs;
+  let Gradients = sample.Gradients;
+  let LodClampMip = sample.LodOrClamp;
+}
+
+class AMDGPUDimNoSampleProfile<string opmod,
+                               AMDGPUDimProps dim,
+                               list<LLVMType> retty,
+                               list<AMDGPUArg> dataargs,
+                               bit Mip = 0> : AMDGPUDimProfile<opmod, dim> {
+  let RetTypes = retty;
+  let DataArgs = dataargs;
+  let LodClampMip = !if(Mip, "mip", "");
+}
+
+class AMDGPUDimAtomicProfile<string opmod,
+                             AMDGPUDimProps dim,
+                             list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> {
+  let RetTypes = [llvm_anyint_ty];
+  let DataArgs = dataargs;
+  let IsAtomic = 1;
+}
+
+class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> {
+  let RetTypes = [llvm_anyfloat_ty];
+  let DataArgs = [];
+  let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">];
+  let LodClampMip = "mip";
+}
+
+// All dimension-aware intrinsics are derived from this class.
+class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
+                              list<IntrinsicProperty> props,
+                              list<SDNodeProperty> sdnodeprops> : Intrinsic<
+    P_.RetTypes,        // vdata(VGPR) -- for load/atomic-with-return
+    !listconcat(
+      !foreach(arg, P_.DataArgs, arg.Type),      // vdata(VGPR) -- for store/atomic
+      !if(P_.IsAtomic, [], [llvm_i32_ty]),       // dmask(imm)
+      P_.AddrTypes,                              // vaddr(VGPR)
+      [llvm_v8i32_ty],                           // rsrc(SGPR)
+      !if(P_.IsSample, [llvm_v4i32_ty,           // samp(SGPR)
+                        llvm_i1_ty], []),        // unorm(imm)
+      [llvm_i32_ty,                              // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
+       llvm_i32_ty]),                            // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+      props, "", sdnodeprops>,
+  AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes),
+                           !if(P_.IsAtomic, 0, 1)), 1> {
+  AMDGPUDimProfile P = P_;
+
+  AMDGPUImageDimIntrinsic Intr = !cast<AMDGPUImageDimIntrinsic>(NAME);
+
+  let TargetPrefix = "amdgcn";
+}
+
+// Marker class for intrinsics with a DMask that determines the returned
+// channels.
+class AMDGPUImageDMaskIntrinsic;
+
+defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
+
+  //////////////////////////////////////////////////////////////////////////
+  // Load and store intrinsics
+  //////////////////////////////////////////////////////////////////////////
+  multiclass AMDGPUImageDimIntrinsicsNoMsaa<string opmod,
+                                            list<LLVMType> retty,
+                                            list<AMDGPUArg> dataargs,
+                                            list<IntrinsicProperty> props,
+                                            list<SDNodeProperty> sdnodeprops,
+                                            bit Mip = 0> {
+    foreach dim = AMDGPUDims.NoMsaa in {
+      def !strconcat(NAME, "_", dim.Name)
+        : AMDGPUImageDimIntrinsic<
+            AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
+            props, sdnodeprops>;
+    }
+  }
+
+  multiclass AMDGPUImageDimIntrinsicsAll<string opmod,
+                                         list<LLVMType> retty,
+                                         list<AMDGPUArg> dataargs,
+                                         list<IntrinsicProperty> props,
+                                         list<SDNodeProperty> sdnodeprops,
+                                         bit Mip = 0> {
+    foreach dim = AMDGPUDims.All in {
+      def !strconcat(NAME, "_", dim.Name)
+        : AMDGPUImageDimIntrinsic<
+            AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
+            props, sdnodeprops>;
+    }
+  }
+
+  defm int_amdgcn_image_load
+    : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_anyfloat_ty], [], [IntrReadMem],
+                                  [SDNPMemOperand]>,
+      AMDGPUImageDMaskIntrinsic;
+  defm int_amdgcn_image_load_mip
+    : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_anyfloat_ty], [],
+                                     [IntrReadMem], [SDNPMemOperand], 1>,
+      AMDGPUImageDMaskIntrinsic;
+
+  defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<
+              "STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
+              [IntrWriteMem], [SDNPMemOperand]>;
+  defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa<
+              "STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
+              [IntrWriteMem], [SDNPMemOperand], 1>;
+
+  //////////////////////////////////////////////////////////////////////////
+  // sample and getlod intrinsics
+  //////////////////////////////////////////////////////////////////////////
+  multiclass AMDGPUImageDimSampleDims<string opmod,
+                                      AMDGPUSampleVariant sample,
+                                      bit NoMem = 0> {
+    foreach dim = AMDGPUDims.NoMsaa in {
+      def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
+          AMDGPUDimSampleProfile<opmod, dim, sample>,
+          !if(NoMem, [IntrNoMem], [IntrReadMem]),
+          !if(NoMem, [], [SDNPMemOperand])>;
+    }
+  }
+
+  foreach sample = AMDGPUSampleVariants in {
+    defm int_amdgcn_image_sample # sample.LowerCaseMod
+      : AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>,
+        AMDGPUImageDMaskIntrinsic;
+  }
+
+  defm int_amdgcn_image_getlod
+    : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>,
+      AMDGPUImageDMaskIntrinsic;
+
+  //////////////////////////////////////////////////////////////////////////
+  // getresinfo intrinsics
+  //////////////////////////////////////////////////////////////////////////
+  foreach dim = AMDGPUDims.All in {
+    def !strconcat("int_amdgcn_image_getresinfo_", dim.Name)
+      : AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>,
+        AMDGPUImageDMaskIntrinsic;
+  }
+
+  //////////////////////////////////////////////////////////////////////////
+  // gather4 intrinsics
+  //////////////////////////////////////////////////////////////////////////
+  foreach sample = AMDGPUSampleVariantsNoGradients in {
+    foreach dim = [AMDGPUDim2D, AMDGPUDimCube, AMDGPUDim2DArray] in {
+      def int_amdgcn_image_gather4 # sample.LowerCaseMod # _ # dim.Name:
+          AMDGPUImageDimIntrinsic<
+              AMDGPUDimSampleProfile<"GATHER4" # sample.UpperCaseMod, dim, sample>,
+              [IntrReadMem], [SDNPMemOperand]>;
+    }
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////
+// atomic intrinsics
+//////////////////////////////////////////////////////////////////////////
+defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
+  multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> {
+    foreach dim = AMDGPUDims.All in {
+      def !strconcat(NAME, "_", dim.Name)
+        : AMDGPUImageDimIntrinsic<
+            AMDGPUDimAtomicProfile<opmod, dim, dataargs>,
+            [], [SDNPMemOperand]>;
+    }
+  }
+
+  multiclass AMDGPUImageDimAtomic<string opmod> {
+    defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>;
+  }
+
+  defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
+  defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">;
+  defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
+  defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
+  defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">;
+  defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">;
+  defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">;
+  defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
+  defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
+  defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
+
+  // TODO: INC/DEC are weird: they seem to have a vdata argument in hardware,
+  //       even though it clearly shouldn't be needed
+  defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">;
+  defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">;
+
+  defm int_amdgcn_image_atomic_cmpswap :
+      AMDGPUImageDimAtomicX<"ATOMIC_CMPSWAP", [AMDGPUArg<LLVMMatchType<0>, "src">,
+                                               AMDGPUArg<LLVMMatchType<0>, "cmp">]>;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Buffer intrinsics
+//////////////////////////////////////////////////////////////////////////
+
+let TargetPrefix = "amdgcn" in {
+
+defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
 
 class AMDGPUBufferLoad : Intrinsic <
   [llvm_anyfloat_ty],
@@ -460,7 +797,8 @@ class AMDGPUBufferLoad : Intrinsic <
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty,        // glc(imm)
    llvm_i1_ty],       // slc(imm)
-  [IntrReadMem]>;
+  [IntrReadMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<0>;
 def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
 def int_amdgcn_buffer_load : AMDGPUBufferLoad;
 
@@ -472,7 +810,8 @@ class AMDGPUBufferStore : Intrinsic <
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty,        // glc(imm)
    llvm_i1_ty],       // slc(imm)
-  [IntrWriteMem]>;
+  [IntrWriteMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<1>;
 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
 def int_amdgcn_buffer_store : AMDGPUBufferStore;
 
@@ -487,7 +826,8 @@ def int_amdgcn_tbuffer_load : Intrinsic <
      llvm_i32_ty,     // nfmt(imm)
      llvm_i1_ty,     // glc(imm)
      llvm_i1_ty],    // slc(imm)
-    []>;
+    [IntrReadMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<0>;
 
 def int_amdgcn_tbuffer_store : Intrinsic <
     [],
@@ -501,7 +841,8 @@ def int_amdgcn_tbuffer_store : Intrinsic <
      llvm_i32_ty,    // nfmt(imm)
      llvm_i1_ty,     // glc(imm)
      llvm_i1_ty],    // slc(imm)
-    []>;
+    [IntrWriteMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<1>;
 
 class AMDGPUBufferAtomic : Intrinsic <
   [llvm_i32_ty],
@@ -510,7 +851,8 @@ class AMDGPUBufferAtomic : Intrinsic <
    llvm_i32_ty,       // vindex(VGPR)
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty],       // slc(imm)
-  []>;
+  [], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<1, 0>;
 def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
 def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
 def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
@@ -529,7 +871,10 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
    llvm_i32_ty,       // vindex(VGPR)
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty],       // slc(imm)
-  []>;
+  [], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<2, 0>;
+
+} // defset AMDGPUBufferIntrinsics
 
 // Uses that do not set the done bit should set IntrWriteMem on the
 // call site.
@@ -731,6 +1076,19 @@ def int_amdgcn_readlane :
   GCCBuiltin<"__builtin_amdgcn_readlane">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
 
+// The value to write and lane select arguments must be uniform across the
+// currently active threads of the current wave. Otherwise, the result is
+// undefined.
+def int_amdgcn_writelane :
+  GCCBuiltin<"__builtin_amdgcn_writelane">,
+  Intrinsic<[llvm_i32_ty], [
+    llvm_i32_ty,    // uniform value to write: returned by the selected lane
+    llvm_i32_ty,    // uniform lane select
+    llvm_i32_ty     // returned by all lanes other than the selected one
+  ],
+  [IntrNoMem, IntrConvergent]
+>;
+
 def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
   [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
@@ -829,6 +1187,109 @@ def int_amdgcn_ds_bpermute :
   GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
 
+//===----------------------------------------------------------------------===//
+// Deep learning intrinsics.
+//===----------------------------------------------------------------------===//
+
+// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c)
+//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
+def int_amdgcn_fdot2 :
+  GCCBuiltin<"__builtin_amdgcn_fdot2">,
+  Intrinsic<
+    [llvm_float_ty], // %r
+    [
+      llvm_v2f16_ty, // %a
+      llvm_v2f16_ty, // %b
+      llvm_float_ty  // %c
+    ],
+    [IntrNoMem, IntrSpeculatable]
+  >;
+
+// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c)
+//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
+def int_amdgcn_sdot2 :
+  GCCBuiltin<"__builtin_amdgcn_sdot2">,
+  Intrinsic<
+    [llvm_i32_ty], // %r
+    [
+      llvm_v2i16_ty, // %a
+      llvm_v2i16_ty, // %b
+      llvm_i32_ty    // %c
+    ],
+    [IntrNoMem, IntrSpeculatable]
+  >;
+
+// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c)
+//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
+def int_amdgcn_udot2 :
+  GCCBuiltin<"__builtin_amdgcn_udot2">,
+  Intrinsic<
+    [llvm_i32_ty], // %r
+    [
+      llvm_v2i16_ty, // %a
+      llvm_v2i16_ty, // %b
+      llvm_i32_ty    // %c
+    ],
+    [IntrNoMem, IntrSpeculatable]
+  >;
+
+// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c)
+//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
+def int_amdgcn_sdot4 :
+  GCCBuiltin<"__builtin_amdgcn_sdot4">,
+  Intrinsic<
+    [llvm_i32_ty], // %r
+    [
+      llvm_i32_ty, // %a
+      llvm_i32_ty, // %b
+      llvm_i32_ty  // %c
+    ],
+    [IntrNoMem, IntrSpeculatable]
+  >;
+
+// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c)
+//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
+def int_amdgcn_udot4 :
+  GCCBuiltin<"__builtin_amdgcn_udot4">,
+  Intrinsic<
+    [llvm_i32_ty], // %r
+    [
+      llvm_i32_ty, // %a
+      llvm_i32_ty, // %b
+      llvm_i32_ty  // %c
+    ],
+    [IntrNoMem, IntrSpeculatable]
+  >;
+
+// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c)
+//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
+//        %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
+def int_amdgcn_sdot8 :
+  GCCBuiltin<"__builtin_amdgcn_sdot8">,
+  Intrinsic<
+    [llvm_i32_ty], // %r
+    [
+      llvm_i32_ty, // %a
+      llvm_i32_ty, // %b
+      llvm_i32_ty  // %c
+    ],
+    [IntrNoMem, IntrSpeculatable]
+  >;
+
+// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c)
+//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
+//        %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
+def int_amdgcn_udot8 :
+  GCCBuiltin<"__builtin_amdgcn_udot8">,
+  Intrinsic<
+    [llvm_i32_ty], // %r
+    [
+      llvm_i32_ty, // %a
+      llvm_i32_ty, // %b
+      llvm_i32_ty  // %c
+    ],
+    [IntrNoMem, IntrSpeculatable]
+  >;
 
 //===----------------------------------------------------------------------===//
 // Special Intrinsics for backend internal use only. No frontend
diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index fe3861301689..f25d2f1dbb5d 100644
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -369,6 +369,10 @@ class Neon_3Arg_Long_Intrinsic
   : Intrinsic<[llvm_anyvector_ty],
               [LLVMMatchType<0>, LLVMTruncatedType<0>, LLVMTruncatedType<0>],
               [IntrNoMem]>;
+
+class Neon_1FloatArg_Intrinsic
+  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
 class Neon_CvtFxToFP_Intrinsic
   : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
 class Neon_CvtFPToFx_Intrinsic
@@ -591,8 +595,8 @@ def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
 def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
 def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
 
-// Vector Rounding
-def int_arm_neon_vrintn : Neon_1Arg_Intrinsic;
+// Vector and Scalar Rounding.
+def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
 def int_arm_neon_vrintx : Neon_1Arg_Intrinsic;
 def int_arm_neon_vrinta : Neon_1Arg_Intrinsic;
 def int_arm_neon_vrintz : Neon_1Arg_Intrinsic;
@@ -616,6 +620,18 @@ def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                   [llvm_anyptr_ty, llvm_i32_ty],
                                   [IntrReadMem, IntrArgMemOnly]>;
 
+def int_arm_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                                    [LLVMAnyPointerType<LLVMMatchType<0>>],
+                                    [IntrReadMem, IntrArgMemOnly]>;
+def int_arm_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                     LLVMMatchType<0>],
+                                    [LLVMAnyPointerType<LLVMMatchType<0>>],
+                                    [IntrReadMem, IntrArgMemOnly]>;
+def int_arm_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                     LLVMMatchType<0>, LLVMMatchType<0>],
+                                    [LLVMAnyPointerType<LLVMMatchType<0>>],
+                                    [IntrReadMem, IntrArgMemOnly]>;
+
 // Vector load N-element structure to one lane.
 // Source operands are: the address, the N input vectors (since only one
 // lane is assigned), the lane number, and the alignment.
@@ -636,6 +652,20 @@ def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                        LLVMMatchType<0>, llvm_i32_ty,
                                        llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
 
+// Vector load N-element structure to all lanes.
+// Source operands are the address and alignment.
+def int_arm_neon_vld2dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                                     [llvm_anyptr_ty, llvm_i32_ty],
+                                     [IntrReadMem, IntrArgMemOnly]>;
+def int_arm_neon_vld3dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                      LLVMMatchType<0>],
+                                     [llvm_anyptr_ty, llvm_i32_ty],
+                                     [IntrReadMem, IntrArgMemOnly]>;
+def int_arm_neon_vld4dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                      LLVMMatchType<0>, LLVMMatchType<0>],
+                                     [llvm_anyptr_ty, llvm_i32_ty],
+                                     [IntrReadMem, IntrArgMemOnly]>;
+
 // Interleaving vector stores from N-element structures.
 // Source operands are: the address, the N vectors, and the alignment.
 def int_arm_neon_vst1 : Intrinsic<[],
@@ -655,6 +685,20 @@ def int_arm_neon_vst4 : Intrinsic<[],
                                    LLVMMatchType<1>, llvm_i32_ty],
                                   [IntrArgMemOnly]>;
 
+def int_arm_neon_vst1x2 : Intrinsic<[],
+                                    [llvm_anyptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<1>],
+                                    [IntrArgMemOnly, NoCapture<0>]>;
+def int_arm_neon_vst1x3 : Intrinsic<[],
+                                    [llvm_anyptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<1>, LLVMMatchType<1>],
+                                    [IntrArgMemOnly, NoCapture<0>]>;
+def int_arm_neon_vst1x4 : Intrinsic<[],
+                                    [llvm_anyptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<1>, LLVMMatchType<1>,
+                                     LLVMMatchType<1>],
+                                    [IntrArgMemOnly, NoCapture<0>]>;
+
 // Vector store N-element structure from one lane.
 // Source operands are: the address, the N vectors, the lane number, and
 // the alignment.
@@ -713,4 +757,14 @@ def int_arm_neon_sha256h: SHA_3Arg_v4i32_Intrinsic;
 def int_arm_neon_sha256h2: SHA_3Arg_v4i32_Intrinsic;
 def int_arm_neon_sha256su1: SHA_3Arg_v4i32_Intrinsic;
 
+// Armv8.2-A dot product instructions
+class Neon_Dot_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty],
+              [LLVMMatchType<0>, llvm_anyvector_ty,
+               LLVMMatchType<1>],
+              [IntrNoMem]>;
+def int_arm_neon_udot : Neon_Dot_Intrinsic;
+def int_arm_neon_sdot : Neon_Dot_Intrinsic;
+
+
 } // end TargetPrefix
diff --git a/include/llvm/IR/IntrinsicsHexagon.td b/include/llvm/IR/IntrinsicsHexagon.td
index 5c96702bca76..25f4215d68a8 100644
--- a/include/llvm/IR/IntrinsicsHexagon.td
+++ b/include/llvm/IR/IntrinsicsHexagon.td
@@ -21,6 +21,13 @@ let TargetPrefix = "hexagon" in {
                               list<IntrinsicProperty> properties>
     : GCCBuiltin<!strconcat("__builtin_", GCCIntSuffix)>,
       Intrinsic<ret_types, param_types, properties>;
+
+  /// Hexagon_NonGCC_Intrinsic - Base class for bitcode convertible Hexagon
+  /// intrinsics.
+  class Hexagon_NonGCC_Intrinsic<list<LLVMType> ret_types,
+                                 list<LLVMType> param_types,
+                                 list<IntrinsicProperty> properties>
+    : Intrinsic<ret_types, param_types, properties>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -424,13 +431,13 @@ class Hexagon_mem_memsisi_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
                           [llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty,
                            llvm_i32_ty],
-                          [IntrArgMemOnly]>;
+                          [IntrWriteMem]>;
 
 class Hexagon_mem_memdisi_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
                           [llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty,
                            llvm_i32_ty],
-                          [IntrArgMemOnly]>;
+                          [IntrWriteMem]>;
 
 class Hexagon_mem_memmemsisi_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
@@ -442,13 +449,13 @@ class Hexagon_mem_memsisisi_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
                           [llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty,
                            llvm_i32_ty, llvm_i32_ty],
-                          [IntrArgMemOnly]>;
+                          [IntrWriteMem]>;
 
 class Hexagon_mem_memdisisi_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
                           [llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty,
                            llvm_i32_ty, llvm_i32_ty],
-                          [IntrArgMemOnly]>;
+                          [IntrWriteMem]>;
 
 class Hexagon_v256_v256v256_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
@@ -636,41 +643,6 @@ class Hexagon_df_dfdfdfqi_Intrinsic<string GCCIntSuffix>
 // This one below will not be auto-generated,
 // so make sure, you don't overwrite this one.
 //
-// BUILTIN_INFO(SI_to_SXTHI_asrh,SI_ftype_SI,1)
-//
-def int_hexagon_SI_to_SXTHI_asrh :
-Hexagon_si_si_Intrinsic<"SI_to_SXTHI_asrh">;
-//
-// BUILTIN_INFO_NONCONST(brev_ldd,PTR_ftype_PTRPTRSI,3)
-//
-def int_hexagon_brev_ldd :
-Hexagon_mem_memmemsi_Intrinsic<"brev_ldd">;
-//
-// BUILTIN_INFO_NONCONST(brev_ldw,PTR_ftype_PTRPTRSI,3)
-//
-def int_hexagon_brev_ldw :
-Hexagon_mem_memmemsi_Intrinsic<"brev_ldw">;
-//
-// BUILTIN_INFO_NONCONST(brev_ldh,PTR_ftype_PTRPTRSI,3)
-//
-def int_hexagon_brev_ldh :
-Hexagon_mem_memmemsi_Intrinsic<"brev_ldh">;
-//
-// BUILTIN_INFO_NONCONST(brev_lduh,PTR_ftype_PTRPTRSI,3)
-//
-def int_hexagon_brev_lduh :
-Hexagon_mem_memmemsi_Intrinsic<"brev_lduh">;
-//
-// BUILTIN_INFO_NONCONST(brev_ldb,PTR_ftype_PTRPTRSI,3)
-//
-def int_hexagon_brev_ldb :
-Hexagon_mem_memmemsi_Intrinsic<"brev_ldb">;
-//
-// BUILTIN_INFO_NONCONST(brev_ldub,PTR_ftype_PTRPTRSI,3)
-//
-def int_hexagon_brev_ldub :
-Hexagon_mem_memmemsi_Intrinsic<"brev_ldub">;
-//
 // BUILTIN_INFO_NONCONST(circ_ldd,PTR_ftype_PTRPTRSISI,4)
 //
 def int_hexagon_circ_ldd :
@@ -702,31 +674,6 @@ def int_hexagon_circ_ldub :
 Hexagon_mem_memmemsisi_Intrinsic<"circ_ldub">;
 
 //
-// BUILTIN_INFO_NONCONST(brev_stb,PTR_ftype_PTRSISI,3)
-//
-def int_hexagon_brev_stb :
-Hexagon_mem_memsisi_Intrinsic<"brev_stb">;
-//
-// BUILTIN_INFO_NONCONST(brev_sthhi,PTR_ftype_PTRSISI,3)
-//
-def int_hexagon_brev_sthhi :
-Hexagon_mem_memsisi_Intrinsic<"brev_sthhi">;
-//
-// BUILTIN_INFO_NONCONST(brev_sth,PTR_ftype_PTRSISI,3)
-//
-def int_hexagon_brev_sth :
-Hexagon_mem_memsisi_Intrinsic<"brev_sth">;
-//
-// BUILTIN_INFO_NONCONST(brev_stw,PTR_ftype_PTRSISI,3)
-//
-def int_hexagon_brev_stw :
-Hexagon_mem_memsisi_Intrinsic<"brev_stw">;
-//
-// BUILTIN_INFO_NONCONST(brev_std,PTR_ftype_PTRSISI,3)
-//
-def int_hexagon_brev_std :
-Hexagon_mem_memdisi_Intrinsic<"brev_std">;
-//
 // BUILTIN_INFO_NONCONST(circ_std,PTR_ftype_PTRDISISI,4)
 //
 def int_hexagon_circ_std :
@@ -9300,6 +9247,60 @@ Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentq_128B">;
 def int_hexagon_V6_vmaskedstorentnq_128B :
 Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentnq_128B">;
 
+multiclass Hexagon_custom_circ_ld_Intrinsic<LLVMType ElTy> {
+  def NAME#_pci : Hexagon_NonGCC_Intrinsic<
+    [ElTy, llvm_ptr_ty],
+    [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty],
+    [IntrArgMemOnly, NoCapture<3>]>;
+  def NAME#_pcr : Hexagon_NonGCC_Intrinsic<
+    [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_ptr_ty],
+    [IntrArgMemOnly, NoCapture<2>]>;
+}
+
+defm int_hexagon_L2_loadrub : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadrb : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadruh : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadrh : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadri : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadrd : Hexagon_custom_circ_ld_Intrinsic<llvm_i64_ty>;
+
+multiclass Hexagon_custom_circ_st_Intrinsic<LLVMType ElTy> {
+  def NAME#_pci : Hexagon_NonGCC_Intrinsic<
+    [llvm_ptr_ty],
+    [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ElTy, llvm_ptr_ty],
+    [IntrArgMemOnly, NoCapture<4>]>;
+  def NAME#_pcr : Hexagon_NonGCC_Intrinsic<
+    [llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, ElTy, llvm_ptr_ty],
+    [IntrArgMemOnly, NoCapture<3>]>;
+}
+
+defm int_hexagon_S2_storerb : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storerh : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storerf : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storeri : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storerd : Hexagon_custom_circ_st_Intrinsic<llvm_i64_ty>;
+
+// The front-end emits the intrinsic call with only two arguments. The third
+// argument from the builtin is already used by front-end to write to memory
+// by generating a store.
+class Hexagon_custom_brev_ld_Intrinsic<LLVMType ElTy>
+ : Hexagon_NonGCC_Intrinsic<
+    [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
+    [IntrReadMem]>;
+
+def int_hexagon_L2_loadrub_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadrb_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadruh_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadrh_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadri_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadrd_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i64_ty>;
+
+def int_hexagon_S2_storerb_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_stb">;
+def int_hexagon_S2_storerh_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_sth">;
+def int_hexagon_S2_storerf_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_sthhi">;
+def int_hexagon_S2_storeri_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_stw">;
+def int_hexagon_S2_storerd_pbr : Hexagon_mem_memdisi_Intrinsic<"brev_std">;
+
 
 ///
 /// HexagonV62 intrinsics
diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
index 73622ce9303f..7f694f68969e 100644
--- a/include/llvm/IR/IntrinsicsNVVM.td
+++ b/include/llvm/IR/IntrinsicsNVVM.td
@@ -3884,96 +3884,100 @@ def int_nvvm_match_all_sync_i64p :
 //
 
 // WMMA.LOAD
-class NVVM_WMMA_LD_ALSTS<string Abc, string Layout, string Space,
-                         string Type, LLVMType regty, int WithStride>
+class NVVM_WMMA_LD_GALSTS<string Geometry, string Abc, string Layout,
+                          string Type, LLVMType regty, int WithStride>
   : Intrinsic<!if(!eq(Abc#Type,"cf16"),
                   [regty, regty, regty, regty],
                   [regty, regty, regty, regty,
                    regty, regty, regty, regty]),
-              !if(WithStride, [llvm_ptr_ty, llvm_i32_ty], [llvm_ptr_ty]),
-              [], // Properties must be set during instantiation.
-              "llvm.nvvm.wmma.load."#Abc#".sync."#Layout#".m16n16k16"
-                #Space
-                #!if(WithStride,".stride","")
-                #"."#Type>;
-
-multiclass NVVM_WMMA_LD_ALST<string Abc, string Layout, string Space,
-                           string Type, LLVMType regty> {
-  def _stride: NVVM_WMMA_LD_ALSTS<Abc, Layout, Space, Type, regty, 1>;
-  def NAME   : NVVM_WMMA_LD_ALSTS<Abc, Layout, Space, Type, regty, 0>;
+              !if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
+              [IntrReadMem, IntrArgMemOnly, ReadOnly<0>, NoCapture<0>],
+              "llvm.nvvm.wmma."
+                # Geometry
+                # ".load"
+                # "." # Abc
+                # "." # Layout
+                # !if(WithStride, ".stride", "")
+                # "." # Type>;
+
+multiclass NVVM_WMMA_LD_GALT<string Geometry, string Abc, string Layout,
+                             string Type, LLVMType regty> {
+  def _stride: NVVM_WMMA_LD_GALSTS<Geometry, Abc, Layout, Type, regty, 1>;
+  def NAME   : NVVM_WMMA_LD_GALSTS<Geometry, Abc, Layout, Type, regty, 0>;
 }
 
-multiclass NVVM_WMMA_LD_ALT<string Abc, string Layout,
-                        string Type, LLVMType regty> {
-  defm _global: NVVM_WMMA_LD_ALST<Abc, Layout, ".global", Type, regty>;
-  defm _shared: NVVM_WMMA_LD_ALST<Abc, Layout, ".shared", Type, regty>;
-  defm NAME:    NVVM_WMMA_LD_ALST<Abc, Layout,        "", Type, regty>;
+multiclass NVVM_WMMA_LD_GAT<string Geometry, string Abc,
+                           string Type, LLVMType regty> {
+  defm _row: NVVM_WMMA_LD_GALT<Geometry, Abc, "row", Type, regty>;
+  defm _col: NVVM_WMMA_LD_GALT<Geometry, Abc, "col", Type, regty>;
 }
 
-multiclass NVVM_WMMA_LD_AT<string Abc, string Type, LLVMType regty> {
-  defm _row: NVVM_WMMA_LD_ALT<Abc, "row", Type, regty>;
-  defm _col: NVVM_WMMA_LD_ALT<Abc, "col", Type, regty>;
+multiclass NVVM_WMMA_LD_G<string Geometry> {
+  defm _a_f16: NVVM_WMMA_LD_GAT<Geometry, "a", "f16", llvm_v2f16_ty>;
+  defm _b_f16: NVVM_WMMA_LD_GAT<Geometry, "b", "f16", llvm_v2f16_ty>;
+  defm _c_f16: NVVM_WMMA_LD_GAT<Geometry, "c", "f16", llvm_v2f16_ty>;
+  defm _c_f32: NVVM_WMMA_LD_GAT<Geometry, "c", "f32", llvm_float_ty>;
 }
 
-// For some reason ReadOnly<N> and NoCapture<N> confuses tblgen if they are
-// passed to Intrinsic<> form inside of a multiclass. Setting them globally
-// outside of the multiclass works.
-let IntrProperties = [IntrReadMem, IntrArgMemOnly,
-                      ReadOnly<0>, NoCapture<0>] in {
-  defm int_nvvm_wmma_load_a_f16: NVVM_WMMA_LD_AT<"a", "f16", llvm_v2f16_ty>;
-  defm int_nvvm_wmma_load_b_f16: NVVM_WMMA_LD_AT<"b", "f16", llvm_v2f16_ty>;
-  defm int_nvvm_wmma_load_c_f16: NVVM_WMMA_LD_AT<"c", "f16", llvm_v2f16_ty>;
-  defm int_nvvm_wmma_load_c_f32: NVVM_WMMA_LD_AT<"c", "f32", llvm_float_ty>;
+multiclass NVVM_WMMA_LD {
+  defm _m32n8k16_load: NVVM_WMMA_LD_G<"m32n8k16">;
+  defm _m16n16k16_load: NVVM_WMMA_LD_G<"m16n16k16">;
+  defm _m8n32k16_load: NVVM_WMMA_LD_G<"m8n32k16">;
 }
 
+defm int_nvvm_wmma: NVVM_WMMA_LD;
+
 // WMMA.STORE.D
-class NVVM_WMMA_STD_LSTS<string Layout, string Space,
-                         string Type, LLVMType regty, int WithStride,
-                         // This is only used to create a typed empty array we
-                         // need to pass to !if below.
-                         list<LLVMType>Empty=[]>
+class NVVM_WMMA_STD_GLSTS<string Geometry, string Layout,
+                          string Type, LLVMType regty, int WithStride,
+                          // This is only used to create a typed empty array we
+                          // need to pass to !if below.
+                          list<LLVMType>Empty=[]>
   : Intrinsic<[],
               !listconcat(
-                [llvm_ptr_ty],
+                [llvm_anyptr_ty],
                 !if(!eq(Type,"f16"),
                     [regty, regty, regty, regty],
                     [regty, regty, regty, regty,
                      regty, regty, regty, regty]),
                 !if(WithStride, [llvm_i32_ty], Empty)),
-              [], // Properties must be set during instantiation.
-              "llvm.nvvm.wmma.store.d.sync."#Layout
-                   #".m16n16k16"#Space
-                   #!if(WithStride,".stride","")
-                   #"."#Type>;
-
-multiclass NVVM_WMMA_STD_LST<string Layout, string Space,
-                            string Type, LLVMType regty> {
-  def _stride: NVVM_WMMA_STD_LSTS<Layout, Space, Type, regty, 1>;
-  def NAME:    NVVM_WMMA_STD_LSTS<Layout, Space, Type, regty, 0>;
+              [IntrWriteMem, IntrArgMemOnly, WriteOnly<0>, NoCapture<0>],
+              "llvm.nvvm.wmma."
+                   # Geometry
+                   # ".store.d"
+                   # "." # Layout
+                   # !if(WithStride, ".stride", "")
+                   # "." # Type>;
+
+multiclass NVVM_WMMA_STD_GLT<string Geometry, string Layout,
+                             string Type, LLVMType regty> {
+  def _stride: NVVM_WMMA_STD_GLSTS<Geometry, Layout, Type, regty, 1>;
+  def NAME:    NVVM_WMMA_STD_GLSTS<Geometry, Layout, Type, regty, 0>;
 }
 
-multiclass NVVM_WMMA_STD_LT<string Layout, string Type, LLVMType regty> {
-  defm _global: NVVM_WMMA_STD_LST<Layout, ".global", Type, regty>;
-  defm _shared: NVVM_WMMA_STD_LST<Layout, ".shared", Type, regty>;
-  defm    NAME: NVVM_WMMA_STD_LST<Layout,        "", Type, regty>;
+multiclass NVVM_WMMA_STD_GT<string Geometry, string Type, LLVMType regty> {
+  defm _row: NVVM_WMMA_STD_GLT<Geometry, "row", Type, regty>;
+  defm _col: NVVM_WMMA_STD_GLT<Geometry, "col", Type, regty>;
 }
-
-multiclass NVVM_WMMA_STD_T<string Type, LLVMType regty> {
-  defm _row: NVVM_WMMA_STD_LT<"row", Type, regty>;
-  defm _col: NVVM_WMMA_STD_LT<"col", Type, regty>;
+multiclass NVVM_WMMA_STD_G<string Geometry> {
+  defm _d_f16: NVVM_WMMA_STD_GT<Geometry, "f16", llvm_v2f16_ty>;
+  defm _d_f32: NVVM_WMMA_STD_GT<Geometry, "f32", llvm_float_ty>;
 }
 
-let IntrProperties = [IntrWriteMem, IntrArgMemOnly,
-                      WriteOnly<0>, NoCapture<0>] in {
-  defm int_nvvm_wmma_store_d_f16: NVVM_WMMA_STD_T<"f16", llvm_v2f16_ty>;
-  defm int_nvvm_wmma_store_d_f32: NVVM_WMMA_STD_T<"f32", llvm_float_ty>;
+multiclass NVVM_WMMA_STD {
+  defm _m32n8k16_store:  NVVM_WMMA_STD_G<"m32n8k16">;
+  defm _m16n16k16_store: NVVM_WMMA_STD_G<"m16n16k16">;
+  defm _m8n32k16_store:  NVVM_WMMA_STD_G<"m8n32k16">;
 }
 
+defm int_nvvm_wmma: NVVM_WMMA_STD;
+
 // WMMA.MMA
-class NVVM_WMMA_MMA_ABDCS<string ALayout, string BLayout,
-                          string DType, LLVMType d_regty,
-                          string CType, LLVMType c_regty,
-                          string Satfinite = "">
+class NVVM_WMMA_MMA_GABDCS<string Geometry,
+                           string ALayout, string BLayout,
+                           string DType, LLVMType d_regty,
+                           string CType, LLVMType c_regty,
+                           string Satfinite = "">
   : Intrinsic<!if(!eq(DType,"f16"),
                       [d_regty, d_regty, d_regty, d_regty],
                       [d_regty, d_regty, d_regty, d_regty,
@@ -3990,39 +3994,54 @@ class NVVM_WMMA_MMA_ABDCS<string ALayout, string BLayout,
                       [c_regty, c_regty, c_regty, c_regty,
                        c_regty, c_regty, c_regty, c_regty])),
               [IntrNoMem],
-              "llvm.nvvm.wmma.mma.sync."#ALayout#"."#BLayout
-                 #".m16n16k16."#DType#"."#CType#Satfinite>;
-
-multiclass NVVM_WMMA_MMA_ABDC<string ALayout, string BLayout,
-                              string DType, LLVMType d_regty,
-                              string CType, LLVMType c_regty> {
-  def NAME : NVVM_WMMA_MMA_ABDCS<ALayout, BLayout,
-                                 DType, d_regty,
-                                 CType, c_regty>;
-  def _satfinite: NVVM_WMMA_MMA_ABDCS<ALayout, BLayout,
-                                      DType, d_regty,
-                                      CType, c_regty,".satfinite">;
+              "llvm.nvvm.wmma."
+                # Geometry
+                # ".mma"
+                # "." # ALayout
+                # "." # BLayout
+                # "." # DType
+                # "." # CType
+                # Satfinite> {
 }
 
-multiclass NVVM_WMMA_MMA_ABD<string ALayout, string BLayout,
+multiclass NVVM_WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
+                               string DType, LLVMType d_regty,
+                               string CType, LLVMType c_regty> {
+  def NAME : NVVM_WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
+                                  DType, d_regty, CType, c_regty>;
+  def _satfinite: NVVM_WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
+                                       DType, d_regty, CType, c_regty,".satfinite">;
+}
+
+multiclass NVVM_WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
                               string DType, LLVMType d_regty> {
-  defm _f16: NVVM_WMMA_MMA_ABDC<ALayout, BLayout, DType, d_regty,
+  defm _f16: NVVM_WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_regty,
                                 "f16", llvm_v2f16_ty>;
-  defm _f32: NVVM_WMMA_MMA_ABDC<ALayout, BLayout, DType, d_regty,
+  defm _f32: NVVM_WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_regty,
                                 "f32", llvm_float_ty>;
 }
 
-multiclass NVVM_WMMA_MMA_AB<string ALayout, string BLayout> {
-  defm _f16: NVVM_WMMA_MMA_ABD<ALayout, BLayout, "f16", llvm_v2f16_ty>;
-  defm _f32: NVVM_WMMA_MMA_ABD<ALayout, BLayout, "f32", llvm_float_ty>;
+multiclass NVVM_WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
+  defm _f16: NVVM_WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", llvm_v2f16_ty>;
+  defm _f32: NVVM_WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", llvm_float_ty>;
+}
+
+multiclass NVVM_WMMA_MMA_GA<string Geometry, string ALayout> {
+  defm _col: NVVM_WMMA_MMA_GAB<Geometry, ALayout, "col">;
+  defm _row: NVVM_WMMA_MMA_GAB<Geometry, ALayout, "row">;
+}
+
+multiclass NVVM_WMMA_MMA_G<string Geometry> {
+  defm _col: NVVM_WMMA_MMA_GA<Geometry, "col">;
+  defm _row: NVVM_WMMA_MMA_GA<Geometry, "row">;
 }
 
-multiclass NVVM_WMMA_MMA_A<string ALayout> {
-  defm _col: NVVM_WMMA_MMA_AB<ALayout, "col">;
-  defm _row: NVVM_WMMA_MMA_AB<ALayout, "row">;
+multiclass NVVM_WMMA_MMA {
+  defm _m32n8k16_mma : NVVM_WMMA_MMA_G<"m32n8k16">;
+  defm _m16n16k16_mma : NVVM_WMMA_MMA_G<"m16n16k16">;
+  defm _m8n32k16_mma : NVVM_WMMA_MMA_G<"m8n32k16">;
 }
 
-defm int_nvvm_wmma_mma_sync_col: NVVM_WMMA_MMA_A<"col">;
-defm int_nvvm_wmma_mma_sync_row: NVVM_WMMA_MMA_A<"row">;
+defm int_nvvm_wmma : NVVM_WMMA_MMA;
 
 } // let TargetPrefix = "nvvm"
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index 6321bb81b8cb..c4e753af25ca 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -36,8 +36,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
 
   // Intrinsics used to generate ctr-based loops. These should only be
   // generated by the PowerPC backend!
+  // The branch intrinsic is marked as NoDuplicate because loop rotation will
+  // attempt to duplicate it forming loops where a block reachable from one
+  // instance of it can contain another.
   def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>;
-  def int_ppc_is_decremented_ctr_nonzero : Intrinsic<[llvm_i1_ty], [], []>;
+  def int_ppc_is_decremented_ctr_nonzero :
+    Intrinsic<[llvm_i1_ty], [], [IntrNoDuplicate]>;
 
   // Intrinsics for [double]word extended forms of divide instructions
   def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
@@ -57,6 +61,29 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_bpermd : GCCBuiltin<"__builtin_bpermd">,
                        Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
                                  [IntrNoMem]>;
+
+  def int_ppc_truncf128_round_to_odd
+      : GCCBuiltin<"__builtin_truncf128_round_to_odd">,
+        Intrinsic <[llvm_double_ty], [llvm_f128_ty], [IntrNoMem]>;
+  def int_ppc_sqrtf128_round_to_odd
+      : GCCBuiltin<"__builtin_sqrtf128_round_to_odd">,
+        Intrinsic <[llvm_f128_ty], [llvm_f128_ty], [IntrNoMem]>;
+  def int_ppc_addf128_round_to_odd
+      : GCCBuiltin<"__builtin_addf128_round_to_odd">,
+        Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
+  def int_ppc_subf128_round_to_odd
+      : GCCBuiltin<"__builtin_subf128_round_to_odd">,
+        Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
+  def int_ppc_mulf128_round_to_odd
+      : GCCBuiltin<"__builtin_mulf128_round_to_odd">,
+        Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
+  def int_ppc_divf128_round_to_odd
+      : GCCBuiltin<"__builtin_divf128_round_to_odd">,
+        Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
+  def int_ppc_fmaf128_round_to_odd
+      : GCCBuiltin<"__builtin_fmaf128_round_to_odd">,
+        Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
+
 }
 
 
diff --git a/include/llvm/IR/IntrinsicsWebAssembly.td b/include/llvm/IR/IntrinsicsWebAssembly.td
index 640ef627bc46..7afc755a1e37 100644
--- a/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -8,19 +8,60 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// \brief This file defines all of the WebAssembly-specific intrinsics.
+/// This file defines all of the WebAssembly-specific intrinsics.
 ///
 //===----------------------------------------------------------------------===//
 
 let TargetPrefix = "wasm" in {  // All intrinsics start with "llvm.wasm.".
 
-// Note that current_memory is not IntrNoMem because it must be sequenced with
-// respect to grow_memory calls.
+// Query the current memory size, and increase the current memory size.
+// Note that memory.size is not IntrNoMem because it must be sequenced with
+// respect to memory.grow calls.
+def int_wasm_memory_size : Intrinsic<[llvm_anyint_ty],
+                                     [llvm_i32_ty],
+                                     [IntrReadMem]>;
+def int_wasm_memory_grow : Intrinsic<[llvm_anyint_ty],
+                                     [llvm_i32_ty, LLVMMatchType<0>],
+                                     []>;
+
+// These are the old names.
+def int_wasm_mem_size : Intrinsic<[llvm_anyint_ty],
+                                  [llvm_i32_ty],
+                                  [IntrReadMem]>;
+def int_wasm_mem_grow : Intrinsic<[llvm_anyint_ty],
+                                  [llvm_i32_ty, LLVMMatchType<0>],
+                                  []>;
+
+// These are the old old names. They also lack the immediate field.
 def int_wasm_current_memory : Intrinsic<[llvm_anyint_ty], [], [IntrReadMem]>;
 def int_wasm_grow_memory : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>;
 
+//===----------------------------------------------------------------------===//
 // Exception handling intrinsics
-def int_wasm_throw: Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [Throws]>;
-def int_wasm_rethrow: Intrinsic<[], [], [Throws]>;
+//===----------------------------------------------------------------------===//
+
+// throw / rethrow
+def int_wasm_throw : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty],
+                               [Throws, IntrNoReturn]>;
+def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>;
+
+// Since wasm does not use landingpad instructions, these instructions return
+// exception pointer and selector values until we lower them in WasmEHPrepare.
+def int_wasm_get_exception : Intrinsic<[llvm_ptr_ty], [llvm_token_ty],
+                                       [IntrHasSideEffects]>;
+def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty],
+                                        [IntrHasSideEffects]>;
+
+// wasm.catch returns the pointer to the exception object caught by wasm 'catch'
+// instruction.
+def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty],
+                               [IntrHasSideEffects]>;
+
+// WebAssembly EH must maintain the landingpads in the order assigned to them
+// by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is
+// used in order to give them the indices in WasmEHPrepare.
+def int_wasm_landingpad_index: Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
 
+// Returns LSDA address of the current function.
+def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 }
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index bd6177c5b3d9..905afc130d8f 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -63,6 +63,12 @@ let TargetPrefix = "x86" in {
               Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>;
 }
 
+// Read processor ID.
+let TargetPrefix = "x86" in {
+  def int_x86_rdpid : GCCBuiltin<"__builtin_ia32_rdpid">,
+              Intrinsic<[llvm_i32_ty], [], []>;
+}
+
 //===----------------------------------------------------------------------===//
 // CET SS
 let TargetPrefix = "x86" in {
@@ -174,12 +180,6 @@ let TargetPrefix = "x86" in {
 
 // Arithmetic ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse_sqrt_ps : GCCBuiltin<"__builtin_ia32_sqrtps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
-                        [IntrNoMem]>;
   def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
                         [IntrNoMem]>;
@@ -211,6 +211,8 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  // NOTE: This comparison intrinsic is not used by clang as long as the
+  //       distinction in signaling behaviour is not implemented.
   def int_x86_sse_cmp_ps :
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
@@ -263,12 +265,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse_cvttss2si64 : GCCBuiltin<"__builtin_ia32_cvttss2si64">,
               Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtsi2ss : // TODO: Remove this intrinsic.
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtsi642ss : // TODO: Remove this intrinsic.
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i64_ty], [IntrNoMem]>;
 
   def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
@@ -304,12 +300,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // FP arithmetic ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse2_sqrt_pd : GCCBuiltin<"__builtin_ia32_sqrtpd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
-                        [IntrNoMem]>;
   def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_v2f64_ty], [IntrNoMem]>;
@@ -329,6 +319,8 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  // NOTE: This comparison intrinsic is not used by clang as long as the
+  //       distinction in signaling behaviour is not implemented.
   def int_x86_sse2_cmp_pd :
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
@@ -402,9 +394,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem, Commutative]>;
@@ -468,8 +457,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Conversion ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
   def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse2_cvttpd2dq : GCCBuiltin<"__builtin_ia32_cvttpd2dq">,
@@ -488,18 +475,9 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse2_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_cvttsd2si64">,
               Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtsi2sd : // TODO: Remove this intrinsic.
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtsi642sd : // TODO: Remove this intrinsic.
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i64_ty], [IntrNoMem]>;
   def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtss2sd : // TODO: Remove this intrinsic.
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse_cvttpd2pi: GCCBuiltin<"__builtin_ia32_cvttpd2pi">,
@@ -797,13 +775,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                         [IntrNoMem]>;
 }
 
-// Vector multiply
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_pmuldq          : GCCBuiltin<"__builtin_ia32_pmuldq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem, Commutative]>;
-}
-
 // Vector insert
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_insertps       : GCCBuiltin<"__builtin_ia32_insertps128">,
@@ -982,11 +953,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
                   llvm_v8f32_ty], [IntrNoMem]>;
 
-  def int_x86_avx_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
-
   def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">,
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
 
@@ -1033,325 +999,99 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_vpermilvarps256">,
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermi2var_d_128 :
-       GCCBuiltin<"__builtin_ia32_vpermi2vard128_mask">,
-        Intrinsic<[llvm_v4i32_ty],
-        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpermi2vard256_mask">,
-          Intrinsic<[llvm_v8i32_ty],
-          [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_d_512 :
-        GCCBuiltin<"__builtin_ia32_vpermi2vard512_mask">,
-          Intrinsic<[llvm_v16i32_ty],
-          [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_hi_128 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varhi128_mask">,
-          Intrinsic<[llvm_v8i16_ty],
-          [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_hi_256 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varhi256_mask">,
-          Intrinsic<[llvm_v16i16_ty],
-          [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_hi_512 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varhi512_mask">,
-          Intrinsic<[llvm_v32i16_ty],
-          [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_pd_128 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varpd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_pd_256 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_pd_512 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varpd512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_ps_128 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varps128_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_ps_256 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varps256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_ps_512 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varps512_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varq128_mask">,
-          Intrinsic<[llvm_v2i64_ty],
-          [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varq256_mask">,
-          Intrinsic<[llvm_v4i64_ty],
-          [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermi2var_q_512 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varq512_mask">,
-          Intrinsic<[llvm_v8i64_ty],
-          [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_d_512:
-        GCCBuiltin<"__builtin_ia32_vpermt2vard512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                  llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_q_512:
-        GCCBuiltin<"__builtin_ia32_vpermt2varq512_mask">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                  llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_ps_512:
-        GCCBuiltin<"__builtin_ia32_vpermt2varps512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty,
-                  llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_pd_512:
-        GCCBuiltin<"__builtin_ia32_vpermt2varpd512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty,
-                  llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_d_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2vard128_mask">,
-          Intrinsic<[llvm_v4i32_ty],
-          [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_d_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2vard128_maskz">,
-          Intrinsic<[llvm_v4i32_ty],
-          [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2vard256_mask">,
-          Intrinsic<[llvm_v8i32_ty],
-          [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2vard256_maskz">,
-          Intrinsic<[llvm_v8i32_ty],
-          [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_d_512 :
-        GCCBuiltin<"__builtin_ia32_vpermt2vard512_maskz">,
-          Intrinsic<[llvm_v16i32_ty],
-          [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_hi_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varhi128_mask">,
-          Intrinsic<[llvm_v8i16_ty],
-          [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_hi_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varhi128_maskz">,
-          Intrinsic<[llvm_v8i16_ty],
-          [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_hi_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varhi256_mask">,
-          Intrinsic<[llvm_v16i16_ty],
-          [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_hi_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varhi256_maskz">,
-          Intrinsic<[llvm_v16i16_ty],
-          [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_hi_512 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varhi512_mask">,
-          Intrinsic<[llvm_v32i16_ty],
-          [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_hi_512 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varhi512_maskz">,
-          Intrinsic<[llvm_v32i16_ty],
-          [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_pd_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varpd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2i64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_pd_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varpd128_maskz">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2i64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_pd_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4i64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_pd_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varpd256_maskz">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4i64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_pd_512 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varpd512_maskz">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8i64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_ps_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varps128_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4i32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_ps_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varps128_maskz">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4i32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_ps_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varps256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8i32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_ps_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varps256_maskz">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8i32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_ps_512 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varps512_maskz">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermt2var_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varq128_mask">,
-          Intrinsic<[llvm_v2i64_ty],
-          [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vpermt2var_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varq128_maskz">,
-          Intrinsic<[llvm_v2i64_ty],
-          [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_d_128 :
+       GCCBuiltin<"__builtin_ia32_vpermi2vard128">,
+       Intrinsic<[llvm_v4i32_ty],
+                 [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermt2var_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varq256_mask">,
-          Intrinsic<[llvm_v4i64_ty],
-          [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_d_256 :
+        GCCBuiltin<"__builtin_ia32_vpermi2vard256">,
+        Intrinsic<[llvm_v8i32_ty],
+                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_maskz_vpermt2var_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varq256_maskz">,
-          Intrinsic<[llvm_v4i64_ty],
-          [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_d_512 :
+        GCCBuiltin<"__builtin_ia32_vpermi2vard512">,
+        Intrinsic<[llvm_v16i32_ty],
+                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty],
+                  [IntrNoMem]>;
 
-  def int_x86_avx512_maskz_vpermt2var_q_512 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varq512_maskz">,
-          Intrinsic<[llvm_v8i64_ty],
-          [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_hi_128 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varhi128">,
+        Intrinsic<[llvm_v8i16_ty],
+                  [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermi2var_qi_128 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varqi128_mask">,
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-          llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_hi_256 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varhi256">,
+        Intrinsic<[llvm_v16i16_ty],
+                  [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty],
+                  [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermt2var_qi_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varqi128_mask">,
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-          llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_hi_512 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varhi512">,
+        Intrinsic<[llvm_v32i16_ty],
+                  [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty],
+                  [IntrNoMem]>;
 
-  def int_x86_avx512_maskz_vpermt2var_qi_128 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varqi128_maskz">,
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-          llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_pd_128 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varpd128">,
+        Intrinsic<[llvm_v2f64_ty],
+                  [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_vpermi2var_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varpd256">,
+        Intrinsic<[llvm_v4f64_ty],
+                  [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_vpermi2var_pd_512 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varpd512">,
+        Intrinsic<[llvm_v8f64_ty],
+                  [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_vpermi2var_ps_128 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varps128">,
+        Intrinsic<[llvm_v4f32_ty],
+                  [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_vpermi2var_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varps256">,
+        Intrinsic<[llvm_v8f32_ty],
+                  [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_vpermi2var_ps_512 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varps512">,
+        Intrinsic<[llvm_v16f32_ty],
+                  [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty],
+                  [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermi2var_qi_256 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varqi256_mask">,
-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-          llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_q_128 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varq128">,
+        Intrinsic<[llvm_v2i64_ty],
+                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermt2var_qi_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varqi256_mask">,
-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-          llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_q_256 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varq256">,
+        Intrinsic<[llvm_v4i64_ty],
+                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_maskz_vpermt2var_qi_256 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varqi256_maskz">,
-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-          llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_q_512 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varq512">,
+        Intrinsic<[llvm_v8i64_ty],
+                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermi2var_qi_512 :
-        GCCBuiltin<"__builtin_ia32_vpermi2varqi512_mask">,
-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
-          llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_qi_128 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varqi128">,
+        Intrinsic<[llvm_v16i8_ty],
+                  [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermt2var_qi_512 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varqi512_mask">,
-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
-          llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_qi_256 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varqi256">,
+        Intrinsic<[llvm_v32i8_ty],
+                  [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_maskz_vpermt2var_qi_512 :
-        GCCBuiltin<"__builtin_ia32_vpermt2varqi512_maskz">,
-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
-          llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_vpermi2var_qi_512 :
+        GCCBuiltin<"__builtin_ia32_vpermi2varqi512">,
+        Intrinsic<[llvm_v64i8_ty],
+                  [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>;
 
   def int_x86_avx512_vpermilvar_pd_512 :
         GCCBuiltin<"__builtin_ia32_vpermilvarpd512">,
@@ -1450,8 +1190,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Vector convert
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
   def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">,
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
   def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
@@ -1512,29 +1250,23 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
                   llvm_v4i64_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_fpclass_pd_128 :
-         GCCBuiltin<"__builtin_ia32_fpclasspd128_mask">,
-          Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],
+  def int_x86_avx512_fpclass_pd_128 :
+          Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_i32_ty],
           [IntrNoMem]>;
-  def int_x86_avx512_mask_fpclass_pd_256 :
-         GCCBuiltin<"__builtin_ia32_fpclasspd256_mask">,
-          Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_i8_ty],
+  def int_x86_avx512_fpclass_pd_256 :
+          Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_i32_ty],
           [IntrNoMem]>;
-  def int_x86_avx512_mask_fpclass_pd_512 :
-         GCCBuiltin<"__builtin_ia32_fpclasspd512_mask">,
-          Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_i8_ty],
+  def int_x86_avx512_fpclass_pd_512 :
+          Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_i32_ty],
           [IntrNoMem]>;
-  def int_x86_avx512_mask_fpclass_ps_128 :
-         GCCBuiltin<"__builtin_ia32_fpclassps128_mask">,
-          Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],
+  def int_x86_avx512_fpclass_ps_128 :
+          Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_i32_ty],
           [IntrNoMem]>;
-  def int_x86_avx512_mask_fpclass_ps_256 :
-         GCCBuiltin<"__builtin_ia32_fpclassps256_mask">,
-          Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_i8_ty],
+  def int_x86_avx512_fpclass_ps_256 :
+          Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_i32_ty],
           [IntrNoMem]>;
-  def int_x86_avx512_mask_fpclass_ps_512 :
-         GCCBuiltin<"__builtin_ia32_fpclassps512_mask">,
-          Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_i16_ty],
+  def int_x86_avx512_fpclass_ps_512 :
+          Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_i32_ty],
           [IntrNoMem]>;
   def int_x86_avx512_mask_fpclass_sd :
          GCCBuiltin<"__builtin_ia32_fpclasssd_mask">,
@@ -1600,11 +1332,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_maskstoreps256">,
         Intrinsic<[], [llvm_ptr_ty,
                   llvm_v8i32_ty, llvm_v8f32_ty], [IntrArgMemOnly]>;
-
-  def int_x86_avx512_mask_store_ss :
-        GCCBuiltin<"__builtin_ia32_storess_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
 }
 
 // BITALG bits shuffle
@@ -1661,12 +1388,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
                          llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmul_dq : GCCBuiltin<"__builtin_ia32_pmuldq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
   def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
                          llvm_v16i16_ty], [IntrNoMem, Commutative]>;
@@ -1870,15 +1591,9 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
                          llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx512_mask_pmul_hr_sw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmul_hr_sw_256 : GCCBuiltin<"__builtin_ia32_pmulhrsw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmul_hr_sw_512 : GCCBuiltin<"__builtin_ia32_pmulhrsw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pmul_hr_sw_512 : GCCBuiltin<"__builtin_ia32_pmulhrsw512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_v32i16_ty], [IntrNoMem, Commutative]>;
 }
 
 // Vector blend
@@ -2025,81 +1740,81 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
                         [IntrNoMem]>;
 
-  def int_x86_avx512_mask_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128_mask">,
+  def int_x86_avx512_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prorv_d_256 : GCCBuiltin<"__builtin_ia32_prorvd256_mask">,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prorv_d_256 : GCCBuiltin<"__builtin_ia32_prorvd256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prorv_d_512 : GCCBuiltin<"__builtin_ia32_prorvd512_mask">,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prorv_d_512 : GCCBuiltin<"__builtin_ia32_prorvd512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prorv_q_128 : GCCBuiltin<"__builtin_ia32_prorvq128_mask">,
+                         llvm_v16i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prorv_q_128 : GCCBuiltin<"__builtin_ia32_prorvq128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prorv_q_256 : GCCBuiltin<"__builtin_ia32_prorvq256_mask">,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_prorv_q_256 : GCCBuiltin<"__builtin_ia32_prorvq256">,
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prorv_q_512 : GCCBuiltin<"__builtin_ia32_prorvq512_mask">,
+                         llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_prorv_q_512 : GCCBuiltin<"__builtin_ia32_prorvq512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+                         llvm_v8i64_ty], [IntrNoMem]>;
 
-   def int_x86_avx512_mask_prol_d_128 : GCCBuiltin<"__builtin_ia32_prold128_mask">,
+   def int_x86_avx512_prol_d_128 : GCCBuiltin<"__builtin_ia32_prold128">,
               Intrinsic<[llvm_v4i32_ty] , [llvm_v4i32_ty,
-                         llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prol_d_256 : GCCBuiltin<"__builtin_ia32_prold256_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prol_d_256 : GCCBuiltin<"__builtin_ia32_prold256">,
               Intrinsic<[llvm_v8i32_ty] , [llvm_v8i32_ty,
-                         llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prol_d_512 : GCCBuiltin<"__builtin_ia32_prold512_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prol_d_512 : GCCBuiltin<"__builtin_ia32_prold512">,
               Intrinsic<[llvm_v16i32_ty] , [llvm_v16i32_ty,
-                         llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prol_q_128 : GCCBuiltin<"__builtin_ia32_prolq128_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prol_q_128 : GCCBuiltin<"__builtin_ia32_prolq128">,
               Intrinsic<[llvm_v2i64_ty] , [llvm_v2i64_ty,
-                         llvm_i32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prol_q_256 : GCCBuiltin<"__builtin_ia32_prolq256_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prol_q_256 : GCCBuiltin<"__builtin_ia32_prolq256">,
               Intrinsic<[llvm_v4i64_ty] , [llvm_v4i64_ty,
-                         llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prol_q_512 : GCCBuiltin<"__builtin_ia32_prolq512_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prol_q_512 : GCCBuiltin<"__builtin_ia32_prolq512">,
               Intrinsic<[llvm_v8i64_ty] , [llvm_v8i64_ty,
-                         llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+                         llvm_i32_ty], [IntrNoMem]>;
 
 
-  def int_x86_avx512_mask_prolv_d_128 : GCCBuiltin<"__builtin_ia32_prolvd128_mask">,
+  def int_x86_avx512_prolv_d_128 : GCCBuiltin<"__builtin_ia32_prolvd128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prolv_d_256 : GCCBuiltin<"__builtin_ia32_prolvd256_mask">,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prolv_d_256 : GCCBuiltin<"__builtin_ia32_prolvd256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prolv_d_512 : GCCBuiltin<"__builtin_ia32_prolvd512_mask">,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prolv_d_512 : GCCBuiltin<"__builtin_ia32_prolvd512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prolv_q_128 : GCCBuiltin<"__builtin_ia32_prolvq128_mask">,
+                         llvm_v16i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_prolv_q_128 : GCCBuiltin<"__builtin_ia32_prolvq128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prolv_q_256 : GCCBuiltin<"__builtin_ia32_prolvq256_mask">,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_prolv_q_256 : GCCBuiltin<"__builtin_ia32_prolvq256">,
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_prolv_q_512 : GCCBuiltin<"__builtin_ia32_prolvq512_mask">,
+                         llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_prolv_q_512 : GCCBuiltin<"__builtin_ia32_prolvq512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pror_d_128 : GCCBuiltin<"__builtin_ia32_prord128_mask">,
+                         llvm_v8i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_pror_d_128 : GCCBuiltin<"__builtin_ia32_prord128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pror_d_256 : GCCBuiltin<"__builtin_ia32_prord256_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pror_d_256 : GCCBuiltin<"__builtin_ia32_prord256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pror_d_512 : GCCBuiltin<"__builtin_ia32_prord512_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pror_d_512 : GCCBuiltin<"__builtin_ia32_prord512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pror_q_128 : GCCBuiltin<"__builtin_ia32_prorq128_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pror_q_128 : GCCBuiltin<"__builtin_ia32_prorq128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pror_q_256 : GCCBuiltin<"__builtin_ia32_prorq256_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pror_q_256 : GCCBuiltin<"__builtin_ia32_prorq256">,
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pror_q_512 : GCCBuiltin<"__builtin_ia32_prorq512_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pror_q_512 : GCCBuiltin<"__builtin_ia32_prorq512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+                         llvm_i32_ty], [IntrNoMem]>;
 
 }
 
@@ -2188,754 +1903,115 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // FMA3 and FMA4
 
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_fma_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss3">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd3">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma4_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_fma_vfmsub_ss : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_sd : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_ps : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_pd : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_ps_256 : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_pd_256 : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_ss : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_sd : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_ps : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_pd : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_ps_256 : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_pd_256 : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_ss : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_sd : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_ps : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_pd : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_ps_256 : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_pd_256 : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmaddsub_ps_256 :
-               GCCBuiltin<"__builtin_ia32_vfmaddsubps256">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmaddsub_pd_256 :
-              GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsubadd_ps : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsubadd_pd : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsubadd_ps_256 : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsubadd_pd_256 : // TODO: remove this intrinsic
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmadd_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmadd_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask3">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmadd_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddpd128_maskz">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmadd_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmadd_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask3">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmadd_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddpd256_maskz">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmadd_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmadd_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask3">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmadd_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddpd512_maskz">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmadd_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmadd_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddps128_mask3">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmadd_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddps128_maskz">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmadd_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmadd_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddps256_mask3">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmadd_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddps256_maskz">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmadd_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmadd_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddps512_mask3">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmadd_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddps512_maskz">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmaddsub_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmaddsub_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask3">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmaddsub_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_maskz">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmaddsub_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmaddsub_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask3">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmaddsub_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_maskz">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmaddsub_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmaddsub_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask3">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmaddsub_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_maskz">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmaddsub_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmaddsub_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask3">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmaddsub_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubps128_maskz">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmaddsub_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmaddsub_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask3">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmaddsub_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubps256_maskz">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmaddsub_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmaddsub_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask3">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmaddsub_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfmaddsubps512_maskz">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-
-  def int_x86_avx512_mask_vfmadd_sd :
-         GCCBuiltin<"__builtin_ia32_vfmaddsd3_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfmadd_ss :
-         GCCBuiltin<"__builtin_ia32_vfmaddss3_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmadd_sd :
-         GCCBuiltin<"__builtin_ia32_vfmaddsd3_maskz">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_vfmadd_ss :
-         GCCBuiltin<"__builtin_ia32_vfmaddss3_maskz">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmadd_sd :
-         GCCBuiltin<"__builtin_ia32_vfmaddsd3_mask3">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmadd_ss :
-         GCCBuiltin<"__builtin_ia32_vfmaddss3_mask3">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsub_sd :
-         GCCBuiltin<"__builtin_ia32_vfmsubsd3_mask3">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsub_ss :
-         GCCBuiltin<"__builtin_ia32_vfmsubss3_mask3">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsub_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask3">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsub_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask3">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsub_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask3">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsub_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfmsubps128_mask3">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsub_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfmsubps256_mask3">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsub_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfmsubps512_mask3">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsubadd_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask3">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsubadd_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask3">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsubadd_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask3">,
+  def int_x86_avx512_vfmadd_pd_512 :
           Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsubadd_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask3">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask3_vfmsubadd_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask3">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfmsubadd_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask3">,
+  def int_x86_avx512_vfmadd_ps_512 :
           Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfnmadd_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfnmadd_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vfnmadd_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">,
+  // TODO: Can we use 2 vfmadds+shufflevector?
+  def int_x86_avx512_vfmaddsub_pd_512 :
           Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfnmadd_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vfnmadd_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfnmadd_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">,
+  def int_x86_avx512_vfmaddsub_ps_512 :
           Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfnmsub_sd :
-         GCCBuiltin<"__builtin_ia32_vfnmsubsd3_mask3">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfnmsub_ss :
-         GCCBuiltin<"__builtin_ia32_vfnmsubss3_mask3">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfnmsub_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfnmsub_pd_128 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask3">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfnmsub_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfnmsub_pd_256 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask3">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfnmsub_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfnmsub_pd_512 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask3">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfnmsub_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfnmsub_ps_128 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask3">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vfnmsub_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfnmsub_ps_256 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask3">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vfnmsub_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask3_vfnmsub_ps_512 :
-         GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask3">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
-          llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vfmadd_f64 :
+          Intrinsic<[llvm_double_ty],
+                    [llvm_double_ty, llvm_double_ty, llvm_double_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vfmadd_f32 :
+          Intrinsic<[llvm_float_ty],
+                    [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpmadd52h_uq_128 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52huq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpmadd52h_uq_128 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52huq128_maskz">,
+  def int_x86_avx512_vpmadd52h_uq_128 :
+              GCCBuiltin<"__builtin_ia32_vpmadd52huq128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpmadd52l_uq_128 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52luq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpmadd52l_uq_128 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52luq128_maskz">,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpmadd52l_uq_128 :
+              GCCBuiltin<"__builtin_ia32_vpmadd52luq128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpmadd52h_uq_256 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52huq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpmadd52h_uq_256 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52huq256_maskz">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpmadd52l_uq_256 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52luq256_mask">,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpmadd52h_uq_256 :
+              GCCBuiltin<"__builtin_ia32_vpmadd52huq256">,
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpmadd52l_uq_256 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52luq256_maskz">,
+                         llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpmadd52l_uq_256 :
+              GCCBuiltin<"__builtin_ia32_vpmadd52luq256">,
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpmadd52h_uq_512 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52huq512_mask">,
+                         llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpmadd52h_uq_512 :
+              GCCBuiltin<"__builtin_ia32_vpmadd52huq512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpmadd52h_uq_512 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52huq512_maskz">,
+                         llvm_v8i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpmadd52l_uq_512 :
+              GCCBuiltin<"__builtin_ia32_vpmadd52luq512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpmadd52l_uq_512 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52luq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpmadd52l_uq_512 :
-              GCCBuiltin<"__builtin_ia32_vpmadd52luq512_maskz">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+                         llvm_v8i64_ty], [IntrNoMem]>;
 }
 
 // VNNI
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_vpdpbusd_128 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusd128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpbusd_128 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusd128_maskz">,
+  def int_x86_avx512_vpdpbusd_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusd128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpdpbusd_256 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusd256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpbusd_256 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusd256_maskz">,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpdpbusd_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusd256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpdpbusd_512 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusd512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpbusd_512 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusd512_maskz">,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpdpbusd_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusd512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+                         llvm_v16i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpdpbusds_128 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusds128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpbusds_128 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusds128_maskz">,
+  def int_x86_avx512_vpdpbusds_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusds128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpdpbusds_256 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusds256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpbusds_256 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusds256_maskz">,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpdpbusds_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusds256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpdpbusds_512 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusds512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpbusds_512 :
-              GCCBuiltin<"__builtin_ia32_vpdpbusds512_maskz">,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpdpbusds_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusds512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+                         llvm_v16i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpdpwssd_128 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssd128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpwssd_128 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssd128_maskz">,
+  def int_x86_avx512_vpdpwssd_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssd128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpdpwssd_256 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssd256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpwssd_256 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssd256_maskz">,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpdpwssd_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssd256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpdpwssd_512 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssd512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpwssd_512 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssd512_maskz">,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpdpwssd_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssd512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+                         llvm_v16i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpdpwssds_128 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssds128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpwssds_128 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssds128_maskz">,
+  def int_x86_avx512_vpdpwssds_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssds128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpdpwssds_256 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssds256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpwssds_256 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssds256_maskz">,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpdpwssds_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssds256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpdpwssds_512 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssds512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpdpwssds_512 :
-              GCCBuiltin<"__builtin_ia32_vpdpwssds512_maskz">,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpdpwssds_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssds512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+                         llvm_v16i32_ty], [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -3050,62 +2126,62 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               GCCBuiltin<"__builtin_ia32_vpmacsdd">,
               Intrinsic<[llvm_v4i32_ty],
                         [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmacsdqh :
               GCCBuiltin<"__builtin_ia32_vpmacsdqh">,
               Intrinsic<[llvm_v2i64_ty],
                         [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmacsdql :
               GCCBuiltin<"__builtin_ia32_vpmacsdql">,
               Intrinsic<[llvm_v2i64_ty],
                         [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmacssdd :
               GCCBuiltin<"__builtin_ia32_vpmacssdd">,
               Intrinsic<[llvm_v4i32_ty],
                         [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmacssdqh :
               GCCBuiltin<"__builtin_ia32_vpmacssdqh">,
               Intrinsic<[llvm_v2i64_ty],
                         [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmacssdql :
               GCCBuiltin<"__builtin_ia32_vpmacssdql">,
               Intrinsic<[llvm_v2i64_ty],
                         [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmacsswd :
               GCCBuiltin<"__builtin_ia32_vpmacsswd">,
               Intrinsic<[llvm_v4i32_ty],
                         [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmacssww :
               GCCBuiltin<"__builtin_ia32_vpmacssww">,
               Intrinsic<[llvm_v8i16_ty],
                         [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmacswd :
               GCCBuiltin<"__builtin_ia32_vpmacswd">,
               Intrinsic<[llvm_v4i32_ty],
                         [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmacsww :
               GCCBuiltin<"__builtin_ia32_vpmacsww">,
               Intrinsic<[llvm_v8i16_ty],
                         [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmadcsswd :
               GCCBuiltin<"__builtin_ia32_vpmadcsswd">,
               Intrinsic<[llvm_v4i32_ty],
                         [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpmadcswd :
               GCCBuiltin<"__builtin_ia32_vpmadcswd">,
               Intrinsic<[llvm_v4i32_ty],
                         [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_xop_vpperm :
               GCCBuiltin<"__builtin_ia32_vpperm">,
               Intrinsic<[llvm_v16i8_ty],
@@ -3383,48 +2459,42 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 // Permute
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_permvar_df_256 : GCCBuiltin<"__builtin_ia32_permvardf256_mask">,
+  def int_x86_avx512_permvar_df_256 : GCCBuiltin<"__builtin_ia32_permvardf256">,
               Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                        llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_df_512 : GCCBuiltin<"__builtin_ia32_permvardf512_mask">,
+                        llvm_v4i64_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_df_512 : GCCBuiltin<"__builtin_ia32_permvardf512">,
               Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
-                        llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_di_256 : GCCBuiltin<"__builtin_ia32_permvardi256_mask">,
+                        llvm_v8i64_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_di_256 : GCCBuiltin<"__builtin_ia32_permvardi256">,
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                        llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_di_512 : GCCBuiltin<"__builtin_ia32_permvardi512_mask">,
+                        llvm_v4i64_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_di_512 : GCCBuiltin<"__builtin_ia32_permvardi512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                        llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_hi_128 : GCCBuiltin<"__builtin_ia32_permvarhi128_mask">,
+                        llvm_v8i64_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_hi_128 : GCCBuiltin<"__builtin_ia32_permvarhi128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                        llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_hi_256 : GCCBuiltin<"__builtin_ia32_permvarhi256_mask">,
+                        llvm_v8i16_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_hi_256 : GCCBuiltin<"__builtin_ia32_permvarhi256">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                        llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_hi_512 : GCCBuiltin<"__builtin_ia32_permvarhi512_mask">,
+                        llvm_v16i16_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_hi_512 : GCCBuiltin<"__builtin_ia32_permvarhi512">,
               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
-                        llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_qi_128 : GCCBuiltin<"__builtin_ia32_permvarqi128_mask">,
+                        llvm_v32i16_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_qi_128 : GCCBuiltin<"__builtin_ia32_permvarqi128">,
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                        llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_qi_256 : GCCBuiltin<"__builtin_ia32_permvarqi256_mask">,
+                        llvm_v16i8_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_qi_256 : GCCBuiltin<"__builtin_ia32_permvarqi256">,
               Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                        llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512_mask">,
+                        llvm_v32i8_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512">,
               Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
-                        llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_sf_256 : GCCBuiltin<"__builtin_ia32_permvarsf256_mask">,
-              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                        llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512_mask">,
+                        llvm_v64i8_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512">,
               Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
-                        llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_si_256 : GCCBuiltin<"__builtin_ia32_permvarsi256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                        llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],  [IntrNoMem]>;
-  def int_x86_avx512_mask_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512_mask">,
+                        llvm_v16i32_ty],  [IntrNoMem]>;
+  def int_x86_avx512_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                        llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],  [IntrNoMem]>;
+                        llvm_v16i32_ty],  [IntrNoMem]>;
 }
 // Pack ops.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
@@ -3717,35 +2787,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 //===----------------------------------------------------------------------===//
 // AVX512
 
-// Mask ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  // Mask instructions
-  // 16-bit mask
-  def int_x86_avx512_kand_w : // TODO: remove this intrinsic
-              Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
-                         [IntrNoMem]>;
-  def int_x86_avx512_kandn_w : // TODO: remove this intrinsic
-              Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
-                         [IntrNoMem]>;
-  def int_x86_avx512_knot_w : // TODO: remove this intrinsic
-              Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_kor_w : // TODO: remove this intrinsic
-              Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
-                         [IntrNoMem]>;
-  def int_x86_avx512_kxor_w : // TODO: remove this intrinsic
-              Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
-                         [IntrNoMem]>;
-  def int_x86_avx512_kxnor_w : // TODO: remove this intrinsic
-              Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
-                         [IntrNoMem]>;
-  def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
-              Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">,
-              Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
-                        [IntrNoMem]>;
-}
-
 // Conversion ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
@@ -3770,9 +2811,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">,
               Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd32">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -3801,35 +2839,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_cvtb2mask_128 : GCCBuiltin<"__builtin_ia32_cvtb2mask128">,
-              Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtb2mask_256 : GCCBuiltin<"__builtin_ia32_cvtb2mask256">,
-              Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtb2mask_512 : GCCBuiltin<"__builtin_ia32_cvtb2mask512">,
-              Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_cvtw2mask_128 : GCCBuiltin<"__builtin_ia32_cvtw2mask128">,
-              Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtw2mask_256 : GCCBuiltin<"__builtin_ia32_cvtw2mask256">,
-              Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtw2mask_512 : GCCBuiltin<"__builtin_ia32_cvtw2mask512">,
-              Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_cvtd2mask_128 : GCCBuiltin<"__builtin_ia32_cvtd2mask128">,
-              Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtd2mask_256 : GCCBuiltin<"__builtin_ia32_cvtd2mask256">,
-              Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtd2mask_512 : GCCBuiltin<"__builtin_ia32_cvtd2mask512">,
-              Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_cvtq2mask_128 : GCCBuiltin<"__builtin_ia32_cvtq2mask128">,
-              Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtq2mask_256 : GCCBuiltin<"__builtin_ia32_cvtq2mask256">,
-              Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtq2mask_512 : GCCBuiltin<"__builtin_ia32_cvtq2mask512">,
-              Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty], [IntrNoMem]>;
-
 }
 
 // Pack ops.
@@ -3850,18 +2859,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Vector convert
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_cvtdq2ps_128 :
-        GCCBuiltin<"__builtin_ia32_cvtdq2ps128_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4i32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtdq2ps_256 :
-        GCCBuiltin<"__builtin_ia32_cvtdq2ps256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8i32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvtdq2ps_512 :
         GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">,
           Intrinsic<[llvm_v16f32_ty],
@@ -3874,24 +2871,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v2f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvtpd2dq_256 :
-        GCCBuiltin<"__builtin_ia32_cvtpd2dq256_mask">,
-          Intrinsic<[llvm_v4i32_ty],
-          [llvm_v4f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvtpd2dq_512 :
         GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">,
           Intrinsic<[llvm_v8i32_ty],
           [llvm_v8f64_ty, llvm_v8i32_ty,  llvm_i8_ty,  llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvtpd2ps_256 :
-        GCCBuiltin<"__builtin_ia32_cvtpd2ps256_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f64_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvtpd2ps_512 :
         GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">,
           Intrinsic<[llvm_v8f32_ty],
@@ -3988,18 +2973,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v16f32_ty, llvm_v16i32_ty,  llvm_i16_ty,  llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvtps2pd_128 :
-        GCCBuiltin<"__builtin_ia32_cvtps2pd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v4f32_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtps2pd_256 :
-        GCCBuiltin<"__builtin_ia32_cvtps2pd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f32_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvtps2pd_512 :
         GCCBuiltin<"__builtin_ia32_cvtps2pd512_mask">,
           Intrinsic<[llvm_v8f64_ty],
@@ -4060,18 +3033,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvtqq2pd_128 :
-        GCCBuiltin<"__builtin_ia32_cvtqq2pd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2i64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtqq2pd_256 :
-        GCCBuiltin<"__builtin_ia32_cvtqq2pd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4i64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvtqq2pd_512 :
         GCCBuiltin<"__builtin_ia32_cvtqq2pd512_mask">,
           Intrinsic<[llvm_v8f64_ty],
@@ -4102,12 +3063,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v2f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvttpd2dq_256 :
-        GCCBuiltin<"__builtin_ia32_cvttpd2dq256_mask">,
-          Intrinsic<[llvm_v4i32_ty],
-          [llvm_v4f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvttpd2dq_512 :
         GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">,
           Intrinsic<[llvm_v8i32_ty],
@@ -4168,18 +3123,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v8f64_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvttps2dq_128 :
-        GCCBuiltin<"__builtin_ia32_cvttps2dq128_mask">,
-          Intrinsic<[llvm_v4i32_ty],
-          [llvm_v4f32_ty, llvm_v4i32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvttps2dq_256 :
-        GCCBuiltin<"__builtin_ia32_cvttps2dq256_mask">,
-          Intrinsic<[llvm_v8i32_ty],
-          [llvm_v8f32_ty, llvm_v8i32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvttps2dq_512 :
         GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">,
           Intrinsic<[llvm_v16i32_ty],
@@ -4240,36 +3183,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvtudq2ps_128 :
-        GCCBuiltin<"__builtin_ia32_cvtudq2ps128_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4i32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtudq2ps_256 :
-        GCCBuiltin<"__builtin_ia32_cvtudq2ps256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8i32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvtudq2ps_512 :
         GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">,
           Intrinsic<[llvm_v16f32_ty],
           [llvm_v16i32_ty, llvm_v16f32_ty,  llvm_i16_ty,  llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvtuqq2pd_128 :
-        GCCBuiltin<"__builtin_ia32_cvtuqq2pd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2i64_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtuqq2pd_256 :
-        GCCBuiltin<"__builtin_ia32_cvtuqq2pd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4i64_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvtuqq2pd_512 :
         GCCBuiltin<"__builtin_ia32_cvtuqq2pd512_mask">,
           Intrinsic<[llvm_v8f64_ty],
@@ -4352,13 +3271,6 @@ def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mas
 
 // Vector load with broadcast
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  // TODO: Remove the broadcast intrinsics with no gcc builtin and autoupgrade
-  def int_x86_avx512_vbroadcast_ss_512 :
-        Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
-
-  def int_x86_avx512_vbroadcast_sd_512 :
-        Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
-
    def int_x86_avx512_broadcastmw_512 :
           GCCBuiltin<"__builtin_ia32_broadcastmw512">,
           Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>;
@@ -4382,42 +3294,43 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Arithmetic ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
-  def int_x86_avx512_mask_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512_mask">,
+  def int_x86_avx512_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512_mask">,
+                     llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem]>;
 
   def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_round_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
@@ -4505,31 +3418,17 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
                     llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss_round_mask">,
+  def int_x86_avx512_mask_sqrt_ss :
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
                                     llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd_round_mask">,
+  def int_x86_avx512_mask_sqrt_sd :
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
                                     llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_sqrt_pd_128 : GCCBuiltin<"__builtin_ia32_sqrtpd128_mask">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                                    llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256_mask">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                                    llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sqrt_ps_128 : GCCBuiltin<"__builtin_ia32_sqrtps128_mask">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                                     llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256_mask">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                                     llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                                     llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_sqrt_pd_512 :
+        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_sqrt_ps_512 :
+        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_fixupimm_pd_128 :
          GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">,
           Intrinsic<[llvm_v2f64_ty],
@@ -4778,148 +3677,105 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 // Integer arithmetic ops
 let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_padds_b_128 : GCCBuiltin<"__builtin_ia32_paddsb128_mask">,
+  def int_x86_avx512_mask_padds_b_128 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
                      llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padds_b_256 : GCCBuiltin<"__builtin_ia32_paddsb256_mask">,
+  def int_x86_avx512_mask_padds_b_256 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
                      llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_padds_b_512 : GCCBuiltin<"__builtin_ia32_paddsb512_mask">,
           Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
                      llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padds_w_128 : GCCBuiltin<"__builtin_ia32_paddsw128_mask">,
+  def int_x86_avx512_mask_padds_w_128 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
                      llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padds_w_256 : GCCBuiltin<"__builtin_ia32_paddsw256_mask">,
+  def int_x86_avx512_mask_padds_w_256 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
                      llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_padds_w_512 : GCCBuiltin<"__builtin_ia32_paddsw512_mask">,
           Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
                      llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_paddus_b_128 : GCCBuiltin<"__builtin_ia32_paddusb128_mask">,
+  def int_x86_avx512_mask_paddus_b_128 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
                      llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_paddus_b_256 : GCCBuiltin<"__builtin_ia32_paddusb256_mask">,
+  def int_x86_avx512_mask_paddus_b_256 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
                      llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_paddus_b_512 : GCCBuiltin<"__builtin_ia32_paddusb512_mask">,
           Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
                      llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_paddus_w_128 : GCCBuiltin<"__builtin_ia32_paddusw128_mask">,
+  def int_x86_avx512_mask_paddus_w_128 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
                      llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_paddus_w_256 : GCCBuiltin<"__builtin_ia32_paddusw256_mask">,
+  def int_x86_avx512_mask_paddus_w_256 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
                      llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_paddus_w_512 : GCCBuiltin<"__builtin_ia32_paddusw512_mask">,
           Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
                      llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubs_b_128 : GCCBuiltin<"__builtin_ia32_psubsb128_mask">,
+  def int_x86_avx512_mask_psubs_b_128 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
                      llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubs_b_256 : GCCBuiltin<"__builtin_ia32_psubsb256_mask">,
+  def int_x86_avx512_mask_psubs_b_256 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
                      llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_psubs_b_512 : GCCBuiltin<"__builtin_ia32_psubsb512_mask">,
           Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
                      llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubs_w_128 : GCCBuiltin<"__builtin_ia32_psubsw128_mask">,
+  def int_x86_avx512_mask_psubs_w_128 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
                      llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubs_w_256 : GCCBuiltin<"__builtin_ia32_psubsw256_mask">,
+  def int_x86_avx512_mask_psubs_w_256 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
                      llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_psubs_w_512 : GCCBuiltin<"__builtin_ia32_psubsw512_mask">,
           Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
                      llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubus_b_128 : GCCBuiltin<"__builtin_ia32_psubusb128_mask">,
+  def int_x86_avx512_mask_psubus_b_128 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
                      llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubus_b_256 : GCCBuiltin<"__builtin_ia32_psubusb256_mask">,
+  def int_x86_avx512_mask_psubus_b_256 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
                      llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_psubus_b_512 : GCCBuiltin<"__builtin_ia32_psubusb512_mask">,
           Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
                      llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubus_w_128 : GCCBuiltin<"__builtin_ia32_psubusw128_mask">,
+  def int_x86_avx512_mask_psubus_w_128 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
                      llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubus_w_256 : GCCBuiltin<"__builtin_ia32_psubusw256_mask">,
+  def int_x86_avx512_mask_psubus_w_256 : // FIXME: remove this intrinsic
           Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
                      llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_psubus_w_512 : GCCBuiltin<"__builtin_ia32_psubusw512_mask">,
           Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
                      llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmulh_w_512 : GCCBuiltin<"__builtin_ia32_pmulhw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmulhu_w_128 : GCCBuiltin<"__builtin_ia32_pmulhuw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmulhu_w_256 : GCCBuiltin<"__builtin_ia32_pmulhuw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmulh_w_128 : GCCBuiltin<"__builtin_ia32_pmulhw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmulh_w_256 : GCCBuiltin<"__builtin_ia32_pmulhw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaddw_d_128 :
-         GCCBuiltin<"__builtin_ia32_pmaddwd128_mask">,
-          Intrinsic<[llvm_v4i32_ty],
-          [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaddw_d_256 :
-         GCCBuiltin<"__builtin_ia32_pmaddwd256_mask">,
-          Intrinsic<[llvm_v8i32_ty],
-          [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v8i32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaddw_d_512 :
-         GCCBuiltin<"__builtin_ia32_pmaddwd512_mask">,
-          Intrinsic<[llvm_v16i32_ty],
-          [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v16i32_ty,  llvm_i16_ty],
-          [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaddubs_w_128 :
-         GCCBuiltin<"__builtin_ia32_pmaddubsw128_mask">,
-          Intrinsic<[llvm_v8i16_ty],
-          [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v8i16_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaddubs_w_256 :
-         GCCBuiltin<"__builtin_ia32_pmaddubsw256_mask">,
-          Intrinsic<[llvm_v16i16_ty],
-          [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v16i16_ty,  llvm_i16_ty],
-          [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaddubs_w_512 :
-         GCCBuiltin<"__builtin_ia32_pmaddubsw512_mask">,
-          Intrinsic<[llvm_v32i16_ty],
-          [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v32i16_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_dbpsadbw_128 :
-         GCCBuiltin<"__builtin_ia32_dbpsadbw128_mask">,
+  def int_x86_avx512_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_v32i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx512_pmulh_w_512 : GCCBuiltin<"__builtin_ia32_pmulhw512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_v32i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx512_pmaddw_d_512 : GCCBuiltin<"__builtin_ia32_pmaddwd512">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v32i16_ty,
+                         llvm_v32i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx512_pmaddubs_w_512 : GCCBuiltin<"__builtin_ia32_pmaddubsw512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v64i8_ty,
+                         llvm_v64i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_dbpsadbw_128 :
+         GCCBuiltin<"__builtin_ia32_dbpsadbw128">,
           Intrinsic<[llvm_v8i16_ty],
-          [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v8i16_ty,
-           llvm_i8_ty], [IntrNoMem]>;
+                    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_dbpsadbw_256 :
-         GCCBuiltin<"__builtin_ia32_dbpsadbw256_mask">,
+  def int_x86_avx512_dbpsadbw_256 :
+         GCCBuiltin<"__builtin_ia32_dbpsadbw256">,
           Intrinsic<[llvm_v16i16_ty],
-          [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v16i16_ty,
-           llvm_i16_ty], [IntrNoMem]>;
+                    [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_dbpsadbw_512 :
-         GCCBuiltin<"__builtin_ia32_dbpsadbw512_mask">,
+  def int_x86_avx512_dbpsadbw_512 :
+         GCCBuiltin<"__builtin_ia32_dbpsadbw512">,
           Intrinsic<[llvm_v32i16_ty],
-          [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v32i16_ty,
-           llvm_i32_ty], [IntrNoMem]>;
+                    [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty], [IntrNoMem]>;
 }
 
 // Gather and Scatter ops
@@ -5290,31 +4146,6 @@ let TargetPrefix = "x86" in {
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                    llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_compress_store_ps_512 :
-                            GCCBuiltin<"__builtin_ia32_compressstoresf512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty,
-                   llvm_i16_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_pd_512 :
-                            GCCBuiltin<"__builtin_ia32_compressstoredf512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_ps_256 :
-                            GCCBuiltin<"__builtin_ia32_compressstoresf256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_pd_256 :
-                            GCCBuiltin<"__builtin_ia32_compressstoredf256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_ps_128 :
-                            GCCBuiltin<"__builtin_ia32_compressstoresf128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_pd_128 :
-                            GCCBuiltin<"__builtin_ia32_compressstoredf128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-
   def int_x86_avx512_mask_compress_d_512 :
                              GCCBuiltin<"__builtin_ia32_compresssi512_mask">,
         Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
@@ -5340,31 +4171,6 @@ let TargetPrefix = "x86" in {
         Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
                    llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_compress_store_d_512 :
-                            GCCBuiltin<"__builtin_ia32_compressstoresi512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty,
-                   llvm_i16_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_q_512 :
-                            GCCBuiltin<"__builtin_ia32_compressstoredi512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_d_256 :
-                            GCCBuiltin<"__builtin_ia32_compressstoresi256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_q_256 :
-                            GCCBuiltin<"__builtin_ia32_compressstoredi256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_d_128 :
-                            GCCBuiltin<"__builtin_ia32_compressstoresi128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_q_128 :
-                            GCCBuiltin<"__builtin_ia32_compressstoredi128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-
   def int_x86_avx512_mask_compress_b_512 :
                              GCCBuiltin<"__builtin_ia32_compressqi512_mask">,
         Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
@@ -5390,31 +4196,6 @@ let TargetPrefix = "x86" in {
         Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
                    llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_compress_store_b_512 :
-                            GCCBuiltin<"__builtin_ia32_compressstoreqi512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v64i8_ty,
-                   llvm_i64_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_w_512 :
-                            GCCBuiltin<"__builtin_ia32_compressstorehi512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty,
-                   llvm_i32_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_b_256 :
-                            GCCBuiltin<"__builtin_ia32_compressstoreqi256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty,
-                   llvm_i32_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_w_256 :
-                            GCCBuiltin<"__builtin_ia32_compressstorehi256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty,
-                   llvm_i16_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_b_128 :
-                            GCCBuiltin<"__builtin_ia32_compressstoreqi128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v16i8_ty,
-                   llvm_i16_ty], [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_compress_store_w_128 :
-                            GCCBuiltin<"__builtin_ia32_compressstorehi128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty,
-                   llvm_i8_ty], [IntrArgMemOnly]>;
-
 // expand
   def int_x86_avx512_mask_expand_ps_512 :
                              GCCBuiltin<"__builtin_ia32_expandsf512_mask">,
@@ -5441,31 +4222,6 @@ let TargetPrefix = "x86" in {
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                    llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_expand_load_ps_512 :
-                            GCCBuiltin<"__builtin_ia32_expandloadsf512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty,
-                   llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_pd_512 :
-                            GCCBuiltin<"__builtin_ia32_expandloaddf512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_ps_256 :
-                            GCCBuiltin<"__builtin_ia32_expandloadsf256_mask">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_pd_256 :
-                            GCCBuiltin<"__builtin_ia32_expandloaddf256_mask">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_ps_128 :
-                            GCCBuiltin<"__builtin_ia32_expandloadsf128_mask">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_pd_128 :
-                            GCCBuiltin<"__builtin_ia32_expandloaddf128_mask">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
-
   def int_x86_avx512_mask_expand_d_512 :
                              GCCBuiltin<"__builtin_ia32_expandsi512_mask">,
         Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
@@ -5491,31 +4247,6 @@ let TargetPrefix = "x86" in {
         Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
                    llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_expand_load_d_512 :
-                            GCCBuiltin<"__builtin_ia32_expandloadsi512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty, llvm_v16i32_ty,
-                   llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_q_512 :
-                            GCCBuiltin<"__builtin_ia32_expandloaddi512_mask">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty, llvm_v8i64_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_d_256 :
-                            GCCBuiltin<"__builtin_ia32_expandloadsi256_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_q_256 :
-                            GCCBuiltin<"__builtin_ia32_expandloaddi256_mask">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_d_128 :
-                            GCCBuiltin<"__builtin_ia32_expandloadsi128_mask">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_q_128 :
-                            GCCBuiltin<"__builtin_ia32_expandloaddi128_mask">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
-
   def int_x86_avx512_mask_expand_b_512 :
                             GCCBuiltin<"__builtin_ia32_expandqi512_mask">,
         Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
@@ -5540,130 +4271,87 @@ let TargetPrefix = "x86" in {
                             GCCBuiltin<"__builtin_ia32_expandhi128_mask">,
         Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
                    llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_expand_load_b_512 :
-                            GCCBuiltin<"__builtin_ia32_expandloadqi512_mask">,
-        Intrinsic<[llvm_v64i8_ty], [llvm_ptr_ty, llvm_v64i8_ty,
-                   llvm_i64_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_w_512 :
-                            GCCBuiltin<"__builtin_ia32_expandloadhi512_mask">,
-        Intrinsic<[llvm_v32i16_ty], [llvm_ptr_ty, llvm_v32i16_ty,
-                   llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_b_256 :
-                            GCCBuiltin<"__builtin_ia32_expandloadqi256_mask">,
-        Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_v32i8_ty,
-                   llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_w_256 :
-                            GCCBuiltin<"__builtin_ia32_expandloadhi256_mask">,
-        Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_v16i16_ty,
-                   llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_b_128 :
-                            GCCBuiltin<"__builtin_ia32_expandloadqi128_mask">,
-        Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_v16i8_ty,
-                   llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_mask_expand_load_w_128 :
-                            GCCBuiltin<"__builtin_ia32_expandloadhi128_mask">,
-        Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_v8i16_ty,
-                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
 }
 
 // VBMI2 Concat & Shift
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_vpshld_q_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldq512_mask">,
+  def int_x86_avx512_vpshld_q_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldq512">,
         Intrinsic<[llvm_v8i64_ty],
-                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshld_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldq256_mask">,
+                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshld_q_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldq256">,
         Intrinsic<[llvm_v4i64_ty],
-                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshld_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldq128_mask">,
+                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshld_q_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldq128">,
         Intrinsic<[llvm_v2i64_ty],
-                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
+                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpshld_d_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldd512_mask">,
+  def int_x86_avx512_vpshld_d_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldd512">,
         Intrinsic<[llvm_v16i32_ty],
-                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshld_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldd256_mask">,
+                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshld_d_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldd256">,
         Intrinsic<[llvm_v8i32_ty],
-                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshld_d_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldd128_mask">,
+                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshld_d_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldd128">,
         Intrinsic<[llvm_v4i32_ty],
-                  [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
+                  [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpshld_w_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldw512_mask">,
+  def int_x86_avx512_vpshld_w_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldw512">,
         Intrinsic<[llvm_v32i16_ty],
-                  [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_v32i16_ty,
-                   llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshld_w_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldw256_mask">,
+                  [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshld_w_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldw256">,
         Intrinsic<[llvm_v16i16_ty],
-                  [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, llvm_v16i16_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshld_w_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldw128_mask">,
+                  [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshld_w_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldw128">,
         Intrinsic<[llvm_v8i16_ty],
-                  [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
+                  [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpshrd_q_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdq512_mask">,
+  def int_x86_avx512_vpshrd_q_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdq512">,
         Intrinsic<[llvm_v8i64_ty],
-                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrd_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdq256_mask">,
+                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshrd_q_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdq256">,
         Intrinsic<[llvm_v4i64_ty],
-                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrd_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdq128_mask">,
+                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshrd_q_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdq128">,
         Intrinsic<[llvm_v2i64_ty],
-                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
+                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpshrd_d_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdd512_mask">,
+  def int_x86_avx512_vpshrd_d_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdd512">,
         Intrinsic<[llvm_v16i32_ty],
-                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrd_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdd256_mask">,
+                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshrd_d_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdd256">,
         Intrinsic<[llvm_v8i32_ty],
-                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrd_d_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdd128_mask">,
+                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshrd_d_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdd128">,
         Intrinsic<[llvm_v4i32_ty],
-                  [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
+                  [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpshrd_w_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdw512_mask">,
+  def int_x86_avx512_vpshrd_w_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdw512">,
         Intrinsic<[llvm_v32i16_ty],
-                  [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_v32i16_ty,
-                   llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrd_w_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdw256_mask">,
+                  [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshrd_w_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdw256">,
         Intrinsic<[llvm_v16i16_ty],
-                  [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, llvm_v16i16_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrd_w_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdw128_mask">,
+                  [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshrd_w_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdw128">,
         Intrinsic<[llvm_v8i16_ty],
-                  [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
+                  [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_x86_avx512_mask_vpshldv_w_128 :
         GCCBuiltin<"__builtin_ia32_vpshldvw128_mask">,
@@ -5969,7 +4657,6 @@ let TargetPrefix = "x86" in {
                     [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
                     [IntrArgMemOnly]>;
   def int_x86_avx512_mask_pmov_qw_512 :
-          GCCBuiltin<"__builtin_ia32_pmovqw512_mask">,
           Intrinsic<[llvm_v8i16_ty],
                     [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
                     [IntrNoMem]>;
@@ -6028,8 +4715,7 @@ let TargetPrefix = "x86" in {
           Intrinsic<[],
                     [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
                     [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_pmov_qd_256 :
-          GCCBuiltin<"__builtin_ia32_pmovqd256_mask">,
+  def int_x86_avx512_mask_pmov_qd_256 : // FIXME: Replace with trunc+select.
           Intrinsic<[llvm_v4i32_ty],
                     [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
                     [IntrNoMem]>;
@@ -6058,8 +4744,7 @@ let TargetPrefix = "x86" in {
           Intrinsic<[],
                     [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
                     [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_pmov_qd_512 :
-          GCCBuiltin<"__builtin_ia32_pmovqd512_mask">,
+  def int_x86_avx512_mask_pmov_qd_512 : // FIXME: Replace with trunc+select.
           Intrinsic<[llvm_v8i32_ty],
                     [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
                     [IntrNoMem]>;
@@ -6149,7 +4834,6 @@ let TargetPrefix = "x86" in {
                     [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
                     [IntrArgMemOnly]>;
   def int_x86_avx512_mask_pmov_db_512 :
-          GCCBuiltin<"__builtin_ia32_pmovdb512_mask">,
           Intrinsic<[llvm_v16i8_ty],
                     [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
                     [IntrNoMem]>;
@@ -6239,7 +4923,6 @@ let TargetPrefix = "x86" in {
                     [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
                     [IntrArgMemOnly]>;
   def int_x86_avx512_mask_pmov_dw_512 :
-          GCCBuiltin<"__builtin_ia32_pmovdw512_mask">,
           Intrinsic<[llvm_v16i16_ty],
                     [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
                     [IntrNoMem]>;
@@ -6298,8 +4981,7 @@ let TargetPrefix = "x86" in {
           Intrinsic<[],
                     [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
                     [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_pmov_wb_256 :
-          GCCBuiltin<"__builtin_ia32_pmovwb256_mask">,
+  def int_x86_avx512_mask_pmov_wb_256 : // FIXME: Replace with trunc+select.
           Intrinsic<[llvm_v16i8_ty],
                     [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
                     [IntrNoMem]>;
@@ -6328,8 +5010,7 @@ let TargetPrefix = "x86" in {
           Intrinsic<[],
                     [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
                     [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_pmov_wb_512 :
-          GCCBuiltin<"__builtin_ia32_pmovwb512_mask">,
+  def int_x86_avx512_mask_pmov_wb_512 : // FIXME: Replace with trunc+select.
           Intrinsic<[llvm_v32i8_ty],
                     [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
                     [IntrNoMem]>;
@@ -6362,105 +5043,66 @@ let TargetPrefix = "x86" in {
 
 // Bitwise ternary logic
 let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_pternlog_d_128 :
-          GCCBuiltin<"__builtin_ia32_pternlogd128_mask">,
-          Intrinsic<[llvm_v4i32_ty],
-                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
-                     llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_maskz_pternlog_d_128 :
-          GCCBuiltin<"__builtin_ia32_pternlogd128_maskz">,
+  def int_x86_avx512_pternlog_d_128 :
+          GCCBuiltin<"__builtin_ia32_pternlogd128">,
           Intrinsic<[llvm_v4i32_ty],
-                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
-                     llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_pternlog_d_256 :
-          GCCBuiltin<"__builtin_ia32_pternlogd256_mask">,
-          Intrinsic<[llvm_v8i32_ty],
-                    [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
-                     llvm_i8_ty], [IntrNoMem]>;
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
 
-  def int_x86_avx512_maskz_pternlog_d_256 :
-          GCCBuiltin<"__builtin_ia32_pternlogd256_maskz">,
+  def int_x86_avx512_pternlog_d_256 :
+          GCCBuiltin<"__builtin_ia32_pternlogd256">,
           Intrinsic<[llvm_v8i32_ty],
-                    [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
-                     llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_pternlog_d_512 :
-          GCCBuiltin<"__builtin_ia32_pternlogd512_mask">,
-          Intrinsic<[llvm_v16i32_ty],
-                    [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
-                     llvm_i16_ty], [IntrNoMem]>;
+                    [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
 
-  def int_x86_avx512_maskz_pternlog_d_512 :
-          GCCBuiltin<"__builtin_ia32_pternlogd512_maskz">,
+  def int_x86_avx512_pternlog_d_512 :
+          GCCBuiltin<"__builtin_ia32_pternlogd512">,
           Intrinsic<[llvm_v16i32_ty],
-                    [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
-                     llvm_i16_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_pternlog_q_128 :
-          GCCBuiltin<"__builtin_ia32_pternlogq128_mask">,
-          Intrinsic<[llvm_v2i64_ty],
-                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
-                     llvm_i8_ty], [IntrNoMem]>;
+                    [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty,
+                     llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_maskz_pternlog_q_128 :
-          GCCBuiltin<"__builtin_ia32_pternlogq128_maskz">,
+  def int_x86_avx512_pternlog_q_128 :
+          GCCBuiltin<"__builtin_ia32_pternlogq128">,
           Intrinsic<[llvm_v2i64_ty],
-                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
-                     llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_pternlog_q_256 :
-          GCCBuiltin<"__builtin_ia32_pternlogq256_mask">,
-          Intrinsic<[llvm_v4i64_ty],
-                    [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
-                     llvm_i8_ty], [IntrNoMem]>;
+                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
 
-  def int_x86_avx512_maskz_pternlog_q_256 :
-          GCCBuiltin<"__builtin_ia32_pternlogq256_maskz">,
+  def int_x86_avx512_pternlog_q_256 :
+          GCCBuiltin<"__builtin_ia32_pternlogq256">,
           Intrinsic<[llvm_v4i64_ty],
-                    [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
-                     llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_pternlog_q_512 :
-          GCCBuiltin<"__builtin_ia32_pternlogq512_mask">,
-          Intrinsic<[llvm_v8i64_ty],
-                    [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
-                     llvm_i8_ty], [IntrNoMem]>;
+                    [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
 
-  def int_x86_avx512_maskz_pternlog_q_512 :
-          GCCBuiltin<"__builtin_ia32_pternlogq512_maskz">,
+  def int_x86_avx512_pternlog_q_512 :
+          GCCBuiltin<"__builtin_ia32_pternlogq512">,
           Intrinsic<[llvm_v8i64_ty],
-                    [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
-                     llvm_i8_ty], [IntrNoMem]>;
+                    [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
 }
 
 // Misc.
 let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_cmp_ps_512 :
-        GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
-              Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                         llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cmp_pd_512 :
-        GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
-              Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                         llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cmp_ps_256 :
-        GCCBuiltin<"__builtin_ia32_cmpps256_mask">,
-              Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                         llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cmp_pd_256 :
-        GCCBuiltin<"__builtin_ia32_cmppd256_mask">,
-              Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                         llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cmp_ps_128 :
-        GCCBuiltin<"__builtin_ia32_cmpps128_mask">,
-            Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                       llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cmp_pd_128 :
-        GCCBuiltin<"__builtin_ia32_cmppd128_mask">,
-            Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                       llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  // NOTE: These comparison intrinsics are not used by clang as long as the
+  //       distinction in signaling behaviour is not implemented.
+  def int_x86_avx512_cmp_ps_512 :
+              Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cmp_pd_512 :
+              Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cmp_ps_256 :
+              Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cmp_pd_256 :
+              Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cmp_ps_128 :
+            Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+                       llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cmp_pd_128 :
+            Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+                       llvm_i32_ty], [IntrNoMem]>;
+
   def int_x86_avx512_mask_cmp_ss :
         GCCBuiltin<"__builtin_ia32_cmpss_mask">,
               Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
@@ -6509,3 +5151,65 @@ let TargetPrefix = "x86" in {
   def int_x86_clzero : GCCBuiltin<"__builtin_ia32_clzero">,
       Intrinsic<[], [llvm_ptr_ty], []>;
 }
+
+//===----------------------------------------------------------------------===//
+// Cache write back intrinsics
+
+let TargetPrefix = "x86" in {
+  // Write back and invalidate
+  def int_x86_wbinvd : GCCBuiltin<"__builtin_ia32_wbinvd">,
+      Intrinsic<[], [], []>;
+
+  // Write back no-invalidate
+  def int_x86_wbnoinvd : GCCBuiltin<"__builtin_ia32_wbnoinvd">,
+      Intrinsic<[], [], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Cache-line demote
+
+let TargetPrefix = "x86" in {
+  def int_x86_cldemote : GCCBuiltin<"__builtin_ia32_cldemote">,
+      Intrinsic<[], [llvm_ptr_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Wait and pause enhancements
+let TargetPrefix = "x86" in {
+  def int_x86_umonitor : GCCBuiltin<"__builtin_ia32_umonitor">,
+              Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_x86_umwait : GCCBuiltin<"__builtin_ia32_umwait">,
+              Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_tpause : GCCBuiltin<"__builtin_ia32_tpause">,
+              Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Direct Move Instructions
+
+let TargetPrefix = "x86" in {
+  def int_x86_directstore32 : GCCBuiltin<"__builtin_ia32_directstore_u32">,
+      Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>;
+  def int_x86_directstore64 : GCCBuiltin<"__builtin_ia32_directstore_u64">,
+      Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>;
+  def int_x86_movdir64b : GCCBuiltin<"__builtin_ia32_movdir64b">,
+      Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// PTWrite - Write data to processor trace pocket
+
+let TargetPrefix = "x86" in {
+  def int_x86_ptwrite32 : GCCBuiltin<"__builtin_ia32_ptwrite32">,
+              Intrinsic<[], [llvm_i32_ty], []>;
+  def int_x86_ptwrite64 : GCCBuiltin<"__builtin_ia32_ptwrite64">,
+              Intrinsic<[], [llvm_i64_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// INVPCID - Invalidate Process-Context Identifier
+
+let TargetPrefix = "x86" in {
+  def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">,
+              Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>;
+}
diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h
index a95634d32c21..ebd445553167 100644
--- a/include/llvm/IR/LLVMContext.h
+++ b/include/llvm/IR/LLVMContext.h
@@ -31,7 +31,7 @@ class Function;
 class Instruction;
 class LLVMContextImpl;
 class Module;
-class OptBisect;
+class OptPassGate;
 template <typename T> class SmallVectorImpl;
 class SMDiagnostic;
 class StringRef;
@@ -76,7 +76,7 @@ public:
 
   // Pinned metadata names, which always have the same value.  This is a
   // compile-time performance optimization, not a correctness optimization.
-  enum {
+  enum : unsigned {
     MD_dbg = 0,                       // "dbg"
     MD_tbaa = 1,                      // "tbaa"
     MD_prof = 2,                      // "prof"
@@ -108,7 +108,7 @@ public:
   /// operand bundle tags that LLVM has special knowledge of are listed here.
   /// Additionally, this scheme allows LLVM to efficiently check for specific
   /// operand bundle tags without comparing strings.
-  enum {
+  enum : unsigned {
     OB_deopt = 0,         // "deopt"
     OB_funclet = 1,       // "funclet"
     OB_gc_transition = 2, // "gc-transition"
@@ -229,23 +229,23 @@ public:
   /// to caller.
   std::unique_ptr<DiagnosticHandler> getDiagnosticHandler();
 
-  /// \brief Return if a code hotness metric should be included in optimization
+  /// Return if a code hotness metric should be included in optimization
   /// diagnostics.
   bool getDiagnosticsHotnessRequested() const;
-  /// \brief Set if a code hotness metric should be included in optimization
+  /// Set if a code hotness metric should be included in optimization
   /// diagnostics.
   void setDiagnosticsHotnessRequested(bool Requested);
 
-  /// \brief Return the minimum hotness value a diagnostic would need in order
+  /// Return the minimum hotness value a diagnostic would need in order
   /// to be included in optimization diagnostics. If there is no minimum, this
   /// returns None.
   uint64_t getDiagnosticsHotnessThreshold() const;
 
-  /// \brief Set the minimum hotness value a diagnostic needs in order to be
+  /// Set the minimum hotness value a diagnostic needs in order to be
   /// included in optimization diagnostics.
   void setDiagnosticsHotnessThreshold(uint64_t Threshold);
 
-  /// \brief Return the YAML file used by the backend to save optimization
+  /// Return the YAML file used by the backend to save optimization
   /// diagnostics.  If null, diagnostics are not saved in a file but only
   /// emitted via the diagnostic handler.
   yaml::Output *getDiagnosticsOutputFile();
@@ -256,11 +256,11 @@ public:
   /// set, the handler is invoked for each diagnostic message.
   void setDiagnosticsOutputFile(std::unique_ptr<yaml::Output> F);
 
-  /// \brief Get the prefix that should be printed in front of a diagnostic of
+  /// Get the prefix that should be printed in front of a diagnostic of
   ///        the given \p Severity
   static const char *getDiagnosticMessagePrefix(DiagnosticSeverity Severity);
 
-  /// \brief Report a message to the currently installed diagnostic handler.
+  /// Report a message to the currently installed diagnostic handler.
   ///
   /// This function returns, in particular in the case of error reporting
   /// (DI.Severity == \a DS_Error), so the caller should leave the compilation
@@ -272,7 +272,7 @@ public:
   /// "warning: " for \a DS_Warning, and "note: " for \a DS_Note.
   void diagnose(const DiagnosticInfo &DI);
 
-  /// \brief Registers a yield callback with the given context.
+  /// Registers a yield callback with the given context.
   ///
   /// The yield callback function may be called by LLVM to transfer control back
   /// to the client that invoked the LLVM compilation. This can be used to yield
@@ -291,7 +291,7 @@ public:
   /// control to LLVM. Other LLVM contexts are unaffected by this restriction.
   void setYieldCallback(YieldCallbackTy Callback, void *OpaqueHandle);
 
-  /// \brief Calls the yield callback (if applicable).
+  /// Calls the yield callback (if applicable).
   ///
   /// This transfers control of the current thread back to the client, which may
   /// suspend the current thread. Only call this method when LLVM doesn't hold
@@ -307,7 +307,7 @@ public:
   void emitError(const Instruction *I, const Twine &ErrorStr);
   void emitError(const Twine &ErrorStr);
 
-  /// \brief Query for a debug option's value.
+  /// Query for a debug option's value.
   ///
   /// This function returns typed data populated from command line parsing.
   template <typename ValT, typename Base, ValT(Base::*Mem)>
@@ -315,9 +315,17 @@ public:
     return OptionRegistry::instance().template get<ValT, Base, Mem>();
   }
 
-  /// \brief Access the object which manages optimization bisection for failure
-  /// analysis.
-  OptBisect &getOptBisect();
+  /// Access the object which can disable optional passes and individual
+  /// optimizations at compile time.
+  OptPassGate &getOptPassGate() const;
+
+  /// Set the object which can disable optional passes and individual
+  /// optimizations at compile time.
+  ///
+  /// The lifetime of the object must be guaranteed to extend as long as the
+  /// LLVMContext is used by compilation.
+  void setOptPassGate(OptPassGate&);
+
 private:
   // Module needs access to the add/removeModule methods.
   friend class Module;
diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h
index 3dc4a776dba0..90036c6ce248 100644
--- a/include/llvm/IR/LegacyPassManagers.h
+++ b/include/llvm/IR/LegacyPassManagers.h
@@ -403,6 +403,15 @@ public:
       InheritedAnalysis[Index++] = (*I)->getAvailableAnalysis();
   }
 
+  /// Set the initial size of the module if the user has specified that they
+  /// want remarks for size.
+  /// Returns 0 if the remark was not requested.
+  unsigned initSizeRemarkInfo(Module &M);
+
+  /// Emit a remark signifying that the number of IR instructions in the module
+  /// changed.
+  void emitInstrCountChangedRemark(Pass *P, Module &M, unsigned CountBefore);
+
 protected:
   // Top level manager.
   PMTopLevelManager *TPM;
diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h
index dff1ca12407f..174616c7ab1d 100644
--- a/include/llvm/IR/MDBuilder.h
+++ b/include/llvm/IR/MDBuilder.h
@@ -38,17 +38,17 @@ class MDBuilder {
 public:
   MDBuilder(LLVMContext &context) : Context(context) {}
 
-  /// \brief Return the given string as metadata.
+  /// Return the given string as metadata.
   MDString *createString(StringRef Str);
 
-  /// \brief Return the given constant as metadata.
+  /// Return the given constant as metadata.
   ConstantAsMetadata *createConstant(Constant *C);
 
   //===------------------------------------------------------------------===//
   // FPMath metadata.
   //===------------------------------------------------------------------===//
 
-  /// \brief Return metadata with the given settings.  The special value 0.0
+  /// Return metadata with the given settings.  The special value 0.0
   /// for the Accuracy parameter indicates the default (maximal precision)
   /// setting.
   MDNode *createFPMath(float Accuracy);
@@ -57,19 +57,20 @@ public:
   // Prof metadata.
   //===------------------------------------------------------------------===//
 
-  /// \brief Return metadata containing two branch weights.
+  /// Return metadata containing two branch weights.
   MDNode *createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight);
 
-  /// \brief Return metadata containing a number of branch weights.
+  /// Return metadata containing a number of branch weights.
   MDNode *createBranchWeights(ArrayRef<uint32_t> Weights);
 
   /// Return metadata specifying that a branch or switch is unpredictable.
   MDNode *createUnpredictable();
 
-  /// Return metadata containing the entry \p Count for a function, and the
+  /// Return metadata containing the entry \p Count for a function, a boolean
+  /// \Synthetic indicating whether the counts were synthetized, and the
   /// GUIDs stored in \p Imports that need to be imported for sample PGO, to
   /// enable the same inlines as the profiled optimized binary
-  MDNode *createFunctionEntryCount(uint64_t Count,
+  MDNode *createFunctionEntryCount(uint64_t Count, bool Synthetic,
                                    const DenseSet<GlobalValue::GUID> *Imports);
 
   /// Return metadata containing the section prefix for a function.
@@ -79,17 +80,17 @@ public:
   // Range metadata.
   //===------------------------------------------------------------------===//
 
-  /// \brief Return metadata describing the range [Lo, Hi).
+  /// Return metadata describing the range [Lo, Hi).
   MDNode *createRange(const APInt &Lo, const APInt &Hi);
 
-  /// \brief Return metadata describing the range [Lo, Hi).
+  /// Return metadata describing the range [Lo, Hi).
   MDNode *createRange(Constant *Lo, Constant *Hi);
 
   //===------------------------------------------------------------------===//
   // Callees metadata.
   //===------------------------------------------------------------------===//
 
-  /// \brief Return metadata indicating the possible callees of indirect
+  /// Return metadata indicating the possible callees of indirect
   /// calls.
   MDNode *createCallees(ArrayRef<Function *> Callees);
 
@@ -98,28 +99,28 @@ public:
   //===------------------------------------------------------------------===//
 
 protected:
-  /// \brief Return metadata appropriate for a AA root node (scope or TBAA).
+  /// Return metadata appropriate for a AA root node (scope or TBAA).
   /// Each returned node is distinct from all other metadata and will never
   /// be identified (uniqued) with anything else.
   MDNode *createAnonymousAARoot(StringRef Name = StringRef(),
                                 MDNode *Extra = nullptr);
 
 public:
-  /// \brief Return metadata appropriate for a TBAA root node. Each returned
+  /// Return metadata appropriate for a TBAA root node. Each returned
   /// node is distinct from all other metadata and will never be identified
   /// (uniqued) with anything else.
   MDNode *createAnonymousTBAARoot() {
     return createAnonymousAARoot();
   }
 
-  /// \brief Return metadata appropriate for an alias scope domain node.
+  /// Return metadata appropriate for an alias scope domain node.
   /// Each returned node is distinct from all other metadata and will never
   /// be identified (uniqued) with anything else.
   MDNode *createAnonymousAliasScopeDomain(StringRef Name = StringRef()) {
     return createAnonymousAARoot(Name);
   }
 
-  /// \brief Return metadata appropriate for an alias scope root node.
+  /// Return metadata appropriate for an alias scope root node.
   /// Each returned node is distinct from all other metadata and will never
   /// be identified (uniqued) with anything else.
   MDNode *createAnonymousAliasScope(MDNode *Domain,
@@ -127,22 +128,22 @@ public:
     return createAnonymousAARoot(Name, Domain);
   }
 
-  /// \brief Return metadata appropriate for a TBAA root node with the given
+  /// Return metadata appropriate for a TBAA root node with the given
   /// name.  This may be identified (uniqued) with other roots with the same
   /// name.
   MDNode *createTBAARoot(StringRef Name);
 
-  /// \brief Return metadata appropriate for an alias scope domain node with
+  /// Return metadata appropriate for an alias scope domain node with
   /// the given name. This may be identified (uniqued) with other roots with
   /// the same name.
   MDNode *createAliasScopeDomain(StringRef Name);
 
-  /// \brief Return metadata appropriate for an alias scope node with
+  /// Return metadata appropriate for an alias scope node with
   /// the given name. This may be identified (uniqued) with other scopes with
   /// the same name and domain.
   MDNode *createAliasScope(StringRef Name, MDNode *Domain);
 
-  /// \brief Return metadata for a non-root TBAA node with the given name,
+  /// Return metadata for a non-root TBAA node with the given name,
   /// parent in the TBAA tree, and value for 'pointsToConstantMemory'.
   MDNode *createTBAANode(StringRef Name, MDNode *Parent,
                          bool isConstant = false);
@@ -155,33 +156,33 @@ public:
       Offset(Offset), Size(Size), Type(Type) {}
   };
 
-  /// \brief Return metadata for a tbaa.struct node with the given
+  /// Return metadata for a tbaa.struct node with the given
   /// struct field descriptions.
   MDNode *createTBAAStructNode(ArrayRef<TBAAStructField> Fields);
 
-  /// \brief Return metadata for a TBAA struct node in the type DAG
+  /// Return metadata for a TBAA struct node in the type DAG
   /// with the given name, a list of pairs (offset, field type in the type DAG).
   MDNode *
   createTBAAStructTypeNode(StringRef Name,
                            ArrayRef<std::pair<MDNode *, uint64_t>> Fields);
 
-  /// \brief Return metadata for a TBAA scalar type node with the
+  /// Return metadata for a TBAA scalar type node with the
   /// given name, an offset and a parent in the TBAA type DAG.
   MDNode *createTBAAScalarTypeNode(StringRef Name, MDNode *Parent,
                                    uint64_t Offset = 0);
 
-  /// \brief Return metadata for a TBAA tag node with the given
+  /// Return metadata for a TBAA tag node with the given
   /// base type, access type and offset relative to the base type.
   MDNode *createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
                                   uint64_t Offset, bool IsConstant = false);
 
-  /// \brief Return metadata for a TBAA type node in the TBAA type DAG with the
+  /// Return metadata for a TBAA type node in the TBAA type DAG with the
   /// given parent type, size in bytes, type identifier and a list of fields.
   MDNode *createTBAATypeNode(MDNode *Parent, uint64_t Size, Metadata *Id,
                              ArrayRef<TBAAStructField> Fields =
                                  ArrayRef<TBAAStructField>());
 
-  /// \brief Return metadata for a TBAA access tag with the given base type,
+  /// Return metadata for a TBAA access tag with the given base type,
   /// final access type, offset of the access relative to the base type, size of
   /// the access and flag indicating whether the accessed object can be
   /// considered immutable for the purposes of the TBAA analysis.
@@ -189,7 +190,11 @@ public:
                               uint64_t Offset, uint64_t Size,
                               bool IsImmutable = false);
 
-  /// \brief Return metadata containing an irreducible loop header weight.
+  /// Return mutable version of the given mutable or immutable TBAA
+  /// access tag.
+  MDNode *createMutableTBAAAccessTag(MDNode *Tag);
+
+  /// Return metadata containing an irreducible loop header weight.
   MDNode *createIrrLoopHeaderWeight(uint64_t Weight);
 };
 
diff --git a/include/llvm/IR/Mangler.h b/include/llvm/IR/Mangler.h
index 56ee21392ccd..0261c00f524c 100644
--- a/include/llvm/IR/Mangler.h
+++ b/include/llvm/IR/Mangler.h
@@ -50,6 +50,9 @@ public:
 void emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
                                   const Triple &TT, Mangler &Mangler);
 
+void emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV,
+                                const Triple &T, Mangler &M);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Metadata.def b/include/llvm/IR/Metadata.def
index 03cdcab7dc47..70a03f28b488 100644
--- a/include/llvm/IR/Metadata.def
+++ b/include/llvm/IR/Metadata.def
@@ -108,6 +108,7 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DITemplateValueParameter)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DIVariable)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGlobalVariable)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocalVariable)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILabel)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIObjCProperty)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode)
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index bc0b87a6c348..9ac97f4224ac 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -52,20 +52,20 @@ enum LLVMConstants : uint32_t {
   DEBUG_METADATA_VERSION = 3 // Current debug info version number.
 };
 
-/// \brief Root of the metadata hierarchy.
+/// Root of the metadata hierarchy.
 ///
 /// This is a root class for typeless data in the IR.
 class Metadata {
   friend class ReplaceableMetadataImpl;
 
-  /// \brief RTTI.
+  /// RTTI.
   const unsigned char SubclassID;
 
 protected:
-  /// \brief Active type of storage.
+  /// Active type of storage.
   enum StorageType { Uniqued, Distinct, Temporary };
 
-  /// \brief Storage flag for non-uniqued, otherwise unowned, metadata.
+  /// Storage flag for non-uniqued, otherwise unowned, metadata.
   unsigned char Storage;
   // TODO: expose remaining bits to subclasses.
 
@@ -86,7 +86,7 @@ protected:
 
   ~Metadata() = default;
 
-  /// \brief Default handling of a changed operand, which asserts.
+  /// Default handling of a changed operand, which asserts.
   ///
   /// If subclasses pass themselves in as owners to a tracking node reference,
   /// they must provide an implementation of this method.
@@ -97,7 +97,7 @@ protected:
 public:
   unsigned getMetadataID() const { return SubclassID; }
 
-  /// \brief User-friendly dump.
+  /// User-friendly dump.
   ///
   /// If \c M is provided, metadata nodes will be numbered canonically;
   /// otherwise, pointer addresses are substituted.
@@ -110,7 +110,7 @@ public:
   void dump(const Module *M) const;
   /// @}
 
-  /// \brief Print.
+  /// Print.
   ///
   /// Prints definition of \c this.
   ///
@@ -123,7 +123,7 @@ public:
              bool IsForDebug = false) const;
   /// @}
 
-  /// \brief Print as operand.
+  /// Print as operand.
   ///
   /// Prints reference of \c this.
   ///
@@ -162,7 +162,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Metadata &MD) {
   return OS;
 }
 
-/// \brief Metadata wrapper in the Value hierarchy.
+/// Metadata wrapper in the Value hierarchy.
 ///
 /// A member of the \a Value hierarchy to represent a reference to metadata.
 /// This allows, e.g., instrinsics to have metadata as operands.
@@ -177,7 +177,7 @@ class MetadataAsValue : public Value {
 
   MetadataAsValue(Type *Ty, Metadata *MD);
 
-  /// \brief Drop use of metadata (during teardown).
+  /// Drop use of metadata (during teardown).
   void dropUse() { MD = nullptr; }
 
 public:
@@ -198,7 +198,7 @@ private:
   void untrack();
 };
 
-/// \brief API for tracking metadata references through RAUW and deletion.
+/// API for tracking metadata references through RAUW and deletion.
 ///
 /// Shared API for updating \a Metadata pointers in subclasses that support
 /// RAUW.
@@ -207,7 +207,7 @@ private:
 /// user-friendly tracking reference.
 class MetadataTracking {
 public:
-  /// \brief Track the reference to metadata.
+  /// Track the reference to metadata.
   ///
   /// Register \c MD with \c *MD, if the subclass supports tracking.  If \c *MD
   /// gets RAUW'ed, \c MD will be updated to the new address.  If \c *MD gets
@@ -220,7 +220,7 @@ public:
     return track(&MD, *MD, static_cast<Metadata *>(nullptr));
   }
 
-  /// \brief Track the reference to metadata for \a Metadata.
+  /// Track the reference to metadata for \a Metadata.
   ///
   /// As \a track(Metadata*&), but with support for calling back to \c Owner to
   /// tell it that its operand changed.  This could trigger \c Owner being
@@ -229,7 +229,7 @@ public:
     return track(Ref, MD, &Owner);
   }
 
-  /// \brief Track the reference to metadata for \a MetadataAsValue.
+  /// Track the reference to metadata for \a MetadataAsValue.
   ///
   /// As \a track(Metadata*&), but with support for calling back to \c Owner to
   /// tell it that its operand changed.  This could trigger \c Owner being
@@ -238,13 +238,13 @@ public:
     return track(Ref, MD, &Owner);
   }
 
-  /// \brief Stop tracking a reference to metadata.
+  /// Stop tracking a reference to metadata.
   ///
   /// Stops \c *MD from tracking \c MD.
   static void untrack(Metadata *&MD) { untrack(&MD, *MD); }
   static void untrack(void *Ref, Metadata &MD);
 
-  /// \brief Move tracking from one reference to another.
+  /// Move tracking from one reference to another.
   ///
   /// Semantically equivalent to \c untrack(MD) followed by \c track(New),
   /// except that ownership callbacks are maintained.
@@ -257,19 +257,19 @@ public:
   }
   static bool retrack(void *Ref, Metadata &MD, void *New);
 
-  /// \brief Check whether metadata is replaceable.
+  /// Check whether metadata is replaceable.
   static bool isReplaceable(const Metadata &MD);
 
   using OwnerTy = PointerUnion<MetadataAsValue *, Metadata *>;
 
 private:
-  /// \brief Track a reference to metadata for an owner.
+  /// Track a reference to metadata for an owner.
   ///
   /// Generalized version of tracking.
   static bool track(void *Ref, Metadata &MD, OwnerTy Owner);
 };
 
-/// \brief Shared implementation of use-lists for replaceable metadata.
+/// Shared implementation of use-lists for replaceable metadata.
 ///
 /// Most metadata cannot be RAUW'ed.  This is a shared implementation of
 /// use-lists and associated API for the two that support it (\a ValueAsMetadata
@@ -294,12 +294,12 @@ public:
 
   LLVMContext &getContext() const { return Context; }
 
-  /// \brief Replace all uses of this with MD.
+  /// Replace all uses of this with MD.
   ///
   /// Replace all uses of this with \c MD, which is allowed to be null.
   void replaceAllUsesWith(Metadata *MD);
 
-  /// \brief Resolve all uses of this.
+  /// Resolve all uses of this.
   ///
   /// Resolve all uses of this, turning off RAUW permanently.  If \c
   /// ResolveUsers, call \a MDNode::resolve() on any users whose last operand
@@ -326,7 +326,7 @@ private:
   static bool isReplaceable(const Metadata &MD);
 };
 
-/// \brief Value wrapper in the Metadata hierarchy.
+/// Value wrapper in the Metadata hierarchy.
 ///
 /// This is a custom value handle that allows other metadata to refer to
 /// classes in the Value hierarchy.
@@ -340,7 +340,7 @@ class ValueAsMetadata : public Metadata, ReplaceableMetadataImpl {
 
   Value *V;
 
-  /// \brief Drop users without RAUW (during teardown).
+  /// Drop users without RAUW (during teardown).
   void dropUsers() {
     ReplaceableMetadataImpl::resolveAllUses(/* ResolveUsers */ false);
   }
@@ -382,7 +382,7 @@ public:
   static void handleRAUW(Value *From, Value *To);
 
 protected:
-  /// \brief Handle collisions after \a Value::replaceAllUsesWith().
+  /// Handle collisions after \a Value::replaceAllUsesWith().
   ///
   /// RAUW isn't supported directly for \a ValueAsMetadata, but if the wrapped
   /// \a Value gets RAUW'ed and the target already exists, this is used to
@@ -444,7 +444,7 @@ public:
   }
 };
 
-/// \brief Transitional API for extracting constants from Metadata.
+/// Transitional API for extracting constants from Metadata.
 ///
 /// This namespace contains transitional functions for metadata that points to
 /// \a Constants.
@@ -520,7 +520,7 @@ template <class V, class M> struct IsValidReference {
 
 } // end namespace detail
 
-/// \brief Check whether Metadata has a Value.
+/// Check whether Metadata has a Value.
 ///
 /// As an analogue to \a isa(), check whether \c MD has an \a Value inside of
 /// type \c X.
@@ -539,7 +539,7 @@ inline
   return hasa(&MD);
 }
 
-/// \brief Extract a Value from Metadata.
+/// Extract a Value from Metadata.
 ///
 /// As an analogue to \a cast(), extract the \a Value subclass \c X from \c MD.
 template <class X, class Y>
@@ -554,7 +554,7 @@ inline
   return extract(&MD);
 }
 
-/// \brief Extract a Value from Metadata, allowing null.
+/// Extract a Value from Metadata, allowing null.
 ///
 /// As an analogue to \a cast_or_null(), extract the \a Value subclass \c X
 /// from \c MD, allowing \c MD to be null.
@@ -566,7 +566,7 @@ extract_or_null(Y &&MD) {
   return nullptr;
 }
 
-/// \brief Extract a Value from Metadata, if any.
+/// Extract a Value from Metadata, if any.
 ///
 /// As an analogue to \a dyn_cast_or_null(), extract the \a Value subclass \c X
 /// from \c MD, return null if \c MD doesn't contain a \a Value or if the \a
@@ -579,7 +579,7 @@ dyn_extract(Y &&MD) {
   return nullptr;
 }
 
-/// \brief Extract a Value from Metadata, if any, allowing null.
+/// Extract a Value from Metadata, if any, allowing null.
 ///
 /// As an analogue to \a dyn_cast_or_null(), extract the \a Value subclass \c X
 /// from \c MD, return null if \c MD doesn't contain a \a Value or if the \a
@@ -595,7 +595,7 @@ dyn_extract_or_null(Y &&MD) {
 } // end namespace mdconst
 
 //===----------------------------------------------------------------------===//
-/// \brief A single uniqued string.
+/// A single uniqued string.
 ///
 /// These are used to efficiently contain a byte sequence for metadata.
 /// MDString is always unnamed.
@@ -622,22 +622,22 @@ public:
 
   using iterator = StringRef::iterator;
 
-  /// \brief Pointer to the first byte of the string.
+  /// Pointer to the first byte of the string.
   iterator begin() const { return getString().begin(); }
 
-  /// \brief Pointer to one byte past the end of the string.
+  /// Pointer to one byte past the end of the string.
   iterator end() const { return getString().end(); }
 
   const unsigned char *bytes_begin() const { return getString().bytes_begin(); }
   const unsigned char *bytes_end() const { return getString().bytes_end(); }
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast.
+  /// Methods for support type inquiry through isa, cast, and dyn_cast.
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDStringKind;
   }
 };
 
-/// \brief A collection of metadata nodes that might be associated with a
+/// A collection of metadata nodes that might be associated with a
 /// memory access used by the alias-analysis infrastructure.
 struct AAMDNodes {
   explicit AAMDNodes(MDNode *T = nullptr, MDNode *S = nullptr,
@@ -652,16 +652,16 @@ struct AAMDNodes {
 
   explicit operator bool() const { return TBAA || Scope || NoAlias; }
 
-  /// \brief The tag for type-based alias analysis.
+  /// The tag for type-based alias analysis.
   MDNode *TBAA;
 
-  /// \brief The tag for alias scope specification (used with noalias).
+  /// The tag for alias scope specification (used with noalias).
   MDNode *Scope;
 
-  /// \brief The tag specifying the noalias scope.
+  /// The tag specifying the noalias scope.
   MDNode *NoAlias;
 
-  /// \brief Given two sets of AAMDNodes that apply to the same pointer,
+  /// Given two sets of AAMDNodes that apply to the same pointer,
   /// give the best AAMDNodes that are compatible with both (i.e. a set of
   /// nodes whose allowable aliasing conclusions are a subset of those
   /// allowable by both of the inputs). However, for efficiency
@@ -699,7 +699,7 @@ struct DenseMapInfo<AAMDNodes> {
   }
 };
 
-/// \brief Tracking metadata reference owned by Metadata.
+/// Tracking metadata reference owned by Metadata.
 ///
 /// Similar to \a TrackingMDRef, but it's expected to be owned by an instance
 /// of \a Metadata, which has the option of registering itself for callbacks to
@@ -761,7 +761,7 @@ template <> struct simplify_type<const MDOperand> {
   static SimpleType getSimplifiedValue(const MDOperand &MD) { return MD.get(); }
 };
 
-/// \brief Pointer to the context, with optional RAUW support.
+/// Pointer to the context, with optional RAUW support.
 ///
 /// Either a raw (non-null) pointer to the \a LLVMContext, or an owned pointer
 /// to \a ReplaceableMetadataImpl (which has a reference to \a LLVMContext).
@@ -785,7 +785,7 @@ public:
 
   operator LLVMContext &() { return getContext(); }
 
-  /// \brief Whether this contains RAUW support.
+  /// Whether this contains RAUW support.
   bool hasReplaceableUses() const {
     return Ptr.is<ReplaceableMetadataImpl *>();
   }
@@ -809,7 +809,7 @@ public:
     return getReplaceableUses();
   }
 
-  /// \brief Assign RAUW support to this.
+  /// Assign RAUW support to this.
   ///
   /// Make this replaceable, taking ownership of \c ReplaceableUses (which must
   /// not be null).
@@ -822,7 +822,7 @@ public:
     Ptr = ReplaceableUses.release();
   }
 
-  /// \brief Drop RAUW support.
+  /// Drop RAUW support.
   ///
   /// Cede ownership of RAUW support, returning it.
   std::unique_ptr<ReplaceableMetadataImpl> takeReplaceableUses() {
@@ -843,7 +843,7 @@ struct TempMDNodeDeleter {
 #define HANDLE_MDNODE_BRANCH(CLASS) HANDLE_MDNODE_LEAF(CLASS)
 #include "llvm/IR/Metadata.def"
 
-/// \brief Metadata node.
+/// Metadata node.
 ///
 /// Metadata nodes can be uniqued, like constants, or distinct.  Temporary
 /// metadata nodes (with full support for RAUW) can be used to delay uniquing
@@ -876,12 +876,12 @@ protected:
   void *operator new(size_t Size, unsigned NumOps);
   void operator delete(void *Mem);
 
-  /// \brief Required by std, but never called.
+  /// Required by std, but never called.
   void operator delete(void *, unsigned) {
     llvm_unreachable("Constructor throws?");
   }
 
-  /// \brief Required by std, but never called.
+  /// Required by std, but never called.
   void operator delete(void *, unsigned, bool) {
     llvm_unreachable("Constructor throws?");
   }
@@ -910,10 +910,10 @@ public:
   static inline TempMDTuple getTemporary(LLVMContext &Context,
                                          ArrayRef<Metadata *> MDs);
 
-  /// \brief Create a (temporary) clone of this.
+  /// Create a (temporary) clone of this.
   TempMDNode clone() const;
 
-  /// \brief Deallocate a node created by getTemporary.
+  /// Deallocate a node created by getTemporary.
   ///
   /// Calls \c replaceAllUsesWith(nullptr) before deleting, so any remaining
   /// references will be reset.
@@ -921,10 +921,10 @@ public:
 
   LLVMContext &getContext() const { return Context.getContext(); }
 
-  /// \brief Replace a specific operand.
+  /// Replace a specific operand.
   void replaceOperandWith(unsigned I, Metadata *New);
 
-  /// \brief Check if node is fully resolved.
+  /// Check if node is fully resolved.
   ///
   /// If \a isTemporary(), this always returns \c false; if \a isDistinct(),
   /// this always returns \c true.
@@ -941,7 +941,7 @@ public:
   bool isDistinct() const { return Storage == Distinct; }
   bool isTemporary() const { return Storage == Temporary; }
 
-  /// \brief RAUW a temporary.
+  /// RAUW a temporary.
   ///
   /// \pre \a isTemporary() must be \c true.
   void replaceAllUsesWith(Metadata *MD) {
@@ -950,7 +950,7 @@ public:
       Context.getReplaceableUses()->replaceAllUsesWith(MD);
   }
 
-  /// \brief Resolve cycles.
+  /// Resolve cycles.
   ///
   /// Once all forward declarations have been resolved, force cycles to be
   /// resolved.
@@ -961,7 +961,7 @@ public:
   /// Resolve a unique, unresolved node.
   void resolve();
 
-  /// \brief Replace a temporary node with a permanent one.
+  /// Replace a temporary node with a permanent one.
   ///
   /// Try to create a uniqued version of \c N -- in place, if possible -- and
   /// return it.  If \c N cannot be uniqued, return a distinct node instead.
@@ -971,7 +971,7 @@ public:
     return cast<T>(N.release()->replaceWithPermanentImpl());
   }
 
-  /// \brief Replace a temporary node with a uniqued one.
+  /// Replace a temporary node with a uniqued one.
   ///
   /// Create a uniqued version of \c N -- in place, if possible -- and return
   /// it.  Takes ownership of the temporary node.
@@ -983,7 +983,7 @@ public:
     return cast<T>(N.release()->replaceWithUniquedImpl());
   }
 
-  /// \brief Replace a temporary node with a distinct one.
+  /// Replace a temporary node with a distinct one.
   ///
   /// Create a distinct version of \c N -- in place, if possible -- and return
   /// it.  Takes ownership of the temporary node.
@@ -999,7 +999,7 @@ private:
   MDNode *replaceWithDistinctImpl();
 
 protected:
-  /// \brief Set an operand.
+  /// Set an operand.
   ///
   /// Sets the operand directly, without worrying about uniquing.
   void setOperand(unsigned I, Metadata *New);
@@ -1019,14 +1019,14 @@ private:
   void decrementUnresolvedOperandCount();
   void countUnresolvedOperands();
 
-  /// \brief Mutate this to be "uniqued".
+  /// Mutate this to be "uniqued".
   ///
   /// Mutate this so that \a isUniqued().
   /// \pre \a isTemporary().
   /// \pre already added to uniquing set.
   void makeUniqued();
 
-  /// \brief Mutate this to be "distinct".
+  /// Mutate this to be "distinct".
   ///
   /// Mutate this so that \a isDistinct().
   /// \pre \a isTemporary().
@@ -1069,10 +1069,10 @@ public:
     return op_begin()[I];
   }
 
-  /// \brief Return number of MDNode operands.
+  /// Return number of MDNode operands.
   unsigned getNumOperands() const { return NumOperands; }
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Metadata *MD) {
     switch (MD->getMetadataID()) {
     default:
@@ -1084,10 +1084,10 @@ public:
     }
   }
 
-  /// \brief Check whether MDNode is a vtable access.
+  /// Check whether MDNode is a vtable access.
   bool isTBAAVtableAccess() const;
 
-  /// \brief Methods for metadata merging.
+  /// Methods for metadata merging.
   static MDNode *concatenate(MDNode *A, MDNode *B);
   static MDNode *intersect(MDNode *A, MDNode *B);
   static MDNode *getMostGenericTBAA(MDNode *A, MDNode *B);
@@ -1097,7 +1097,7 @@ public:
   static MDNode *getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B);
 };
 
-/// \brief Tuple of metadata.
+/// Tuple of metadata.
 ///
 /// This is the simple \a MDNode arbitrary tuple.  Nodes are uniqued by
 /// default based on their operands.
@@ -1125,7 +1125,7 @@ class MDTuple : public MDNode {
   }
 
 public:
-  /// \brief Get the hash, if any.
+  /// Get the hash, if any.
   unsigned getHash() const { return SubclassData32; }
 
   static MDTuple *get(LLVMContext &Context, ArrayRef<Metadata *> MDs) {
@@ -1136,14 +1136,14 @@ public:
     return getImpl(Context, MDs, Uniqued, /* ShouldCreate */ false);
   }
 
-  /// \brief Return a distinct node.
+  /// Return a distinct node.
   ///
   /// Return a distinct node -- i.e., a node that is not uniqued.
   static MDTuple *getDistinct(LLVMContext &Context, ArrayRef<Metadata *> MDs) {
     return getImpl(Context, MDs, Distinct);
   }
 
-  /// \brief Return a temporary node.
+  /// Return a temporary node.
   ///
   /// For use in constructing cyclic MDNode structures. A temporary MDNode is
   /// not uniqued, may be RAUW'd, and must be manually deleted with
@@ -1153,7 +1153,7 @@ public:
     return TempMDTuple(getImpl(Context, MDs, Temporary));
   }
 
-  /// \brief Return a (temporary) clone of this.
+  /// Return a (temporary) clone of this.
   TempMDTuple clone() const { return cloneImpl(); }
 
   static bool classof(const Metadata *MD) {
@@ -1182,7 +1182,7 @@ void TempMDNodeDeleter::operator()(MDNode *Node) const {
   MDNode::deleteTemporary(Node);
 }
 
-/// \brief Typed iterator through MDNode operands.
+/// Typed iterator through MDNode operands.
 ///
 /// An iterator that transforms an \a MDNode::iterator into an iterator over a
 /// particular Metadata subclass.
@@ -1213,7 +1213,7 @@ public:
   bool operator!=(const TypedMDOperandIterator &X) const { return I != X.I; }
 };
 
-/// \brief Typed, array-like tuple of metadata.
+/// Typed, array-like tuple of metadata.
 ///
 /// This is a wrapper for \a MDTuple that makes it act like an array holding a
 /// particular type of metadata.
@@ -1314,7 +1314,7 @@ public:
 };
 
 //===----------------------------------------------------------------------===//
-/// \brief A tuple of MDNodes.
+/// A tuple of MDNodes.
 ///
 /// Despite its name, a NamedMDNode isn't itself an MDNode. NamedMDNodes belong
 /// to modules, have names, and contain lists of MDNodes.
@@ -1377,7 +1377,7 @@ public:
   NamedMDNode(const NamedMDNode &) = delete;
   ~NamedMDNode();
 
-  /// \brief Drop all references and remove the node from parent module.
+  /// Drop all references and remove the node from parent module.
   void eraseFromParent();
 
   /// Remove all uses and clear node vector.
@@ -1385,7 +1385,7 @@ public:
   /// Drop all references to this node's operands.
   void clearOperands();
 
-  /// \brief Get the module that holds this named metadata collection.
+  /// Get the module that holds this named metadata collection.
   inline Module *getParent() { return Parent; }
   inline const Module *getParent() const { return Parent; }
 
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index 196e32e3615c..a405f7df3efe 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -59,7 +59,7 @@ class StructType;
 /// A module maintains a GlobalValRefMap object that is used to hold all
 /// constant references to global variables in the module.  When a global
 /// variable is destroyed, it should have no entries in the GlobalValueRefMap.
-/// @brief The main container class for the LLVM Intermediate Representation.
+/// The main container class for the LLVM Intermediate Representation.
 class Module {
 /// @name Types And Enumerations
 /// @{
@@ -207,13 +207,18 @@ public:
   /// @returns the module identifier as a string
   const std::string &getModuleIdentifier() const { return ModuleID; }
 
+  /// Returns the number of non-debug IR instructions in the module.
+  /// This is equivalent to the sum of the IR instruction counts of each
+  /// function contained in the module.
+  unsigned getInstructionCount();
+
   /// Get the module's original source file name. When compiling from
   /// bitcode, this is taken from a bitcode record where it was recorded.
   /// For other compiles it is the same as the ModuleID, which would
   /// contain the source file name.
   const std::string &getSourceFileName() const { return SourceFileName; }
 
-  /// \brief Get a short "name" for the module.
+  /// Get a short "name" for the module.
   ///
   /// This is useful for debugging or logging. It is essentially a convenience
   /// wrapper around getModuleIdentifier().
@@ -251,9 +256,16 @@ public:
   /// versions when the pass does not change.
   std::unique_ptr<RandomNumberGenerator> createRNG(const Pass* P) const;
 
-/// @}
-/// @name Module Level Mutators
-/// @{
+  /// Return true if size-info optimization remark is enabled, false
+  /// otherwise.
+  bool shouldEmitInstrCountChangedRemark() {
+    return getContext().getDiagHandlerPtr()->isAnalysisRemarkEnabled(
+        "size-info");
+  }
+
+  /// @}
+  /// @name Module Level Mutators
+  /// @{
 
   /// Set the module identifier.
   void setModuleIdentifier(StringRef ID) { ModuleID = ID; }
@@ -795,14 +807,14 @@ public:
 /// @name Utility functions for querying Debug information.
 /// @{
 
-  /// \brief Returns the Number of Register ParametersDwarf Version by checking
+  /// Returns the Number of Register ParametersDwarf Version by checking
   /// module flags.
   unsigned getNumberRegisterParameters() const;
 
-  /// \brief Returns the Dwarf Version by checking module flags.
+  /// Returns the Dwarf Version by checking module flags.
   unsigned getDwarfVersion() const;
 
-  /// \brief Returns the CodeView Version by checking module flags.
+  /// Returns the CodeView Version by checking module flags.
   /// Returns zero if not present in module.
   unsigned getCodeViewFlag() const;
 
@@ -810,10 +822,10 @@ public:
 /// @name Utility functions for querying and setting PIC level
 /// @{
 
-  /// \brief Returns the PIC level (small or large model)
+  /// Returns the PIC level (small or large model)
   PICLevel::Level getPICLevel() const;
 
-  /// \brief Set the PIC level (small or large model)
+  /// Set the PIC level (small or large model)
   void setPICLevel(PICLevel::Level PL);
 /// @}
 
@@ -821,28 +833,35 @@ public:
 /// @name Utility functions for querying and setting PIE level
 /// @{
 
-  /// \brief Returns the PIE level (small or large model)
+  /// Returns the PIE level (small or large model)
   PIELevel::Level getPIELevel() const;
 
-  /// \brief Set the PIE level (small or large model)
+  /// Set the PIE level (small or large model)
   void setPIELevel(PIELevel::Level PL);
 /// @}
 
   /// @name Utility functions for querying and setting PGO summary
   /// @{
 
-  /// \brief Attach profile summary metadata to this module.
+  /// Attach profile summary metadata to this module.
   void setProfileSummary(Metadata *M);
 
-  /// \brief Returns profile summary metadata
+  /// Returns profile summary metadata
   Metadata *getProfileSummary();
   /// @}
 
+  /// Returns true if PLT should be avoided for RTLib calls.
+  bool getRtLibUseGOT() const;
+
+  /// Set that PLT should be avoid for RTLib calls.
+  void setRtLibUseGOT();
+
+
   /// Take ownership of the given memory buffer.
   void setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB);
 };
 
-/// \brief Given "llvm.used" or "llvm.compiler.used" as a global name, collect
+/// Given "llvm.used" or "llvm.compiler.used" as a global name, collect
 /// the initializer elements of that global in Set and return the global itself.
 GlobalVariable *collectUsedGlobalVariables(const Module &M,
                                            SmallPtrSetImpl<GlobalValue *> &Set,
diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h
index dd7a0db83774..fdf3d4b5f1ce 100644
--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@@ -25,6 +25,10 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ScaledNumber.h"
+#include "llvm/Support/StringSaver.h"
 #include <algorithm>
 #include <array>
 #include <cassert>
@@ -45,7 +49,7 @@ template <typename T> struct MappingTraits;
 
 } // end namespace yaml
 
-/// \brief Class to accumulate and hold information about a callee.
+/// Class to accumulate and hold information about a callee.
 struct CalleeInfo {
   enum class HotnessType : uint8_t {
     Unknown = 0,
@@ -54,13 +58,44 @@ struct CalleeInfo {
     Hot = 3,
     Critical = 4
   };
-  HotnessType Hotness = HotnessType::Unknown;
 
-  CalleeInfo() = default;
-  explicit CalleeInfo(HotnessType Hotness) : Hotness(Hotness) {}
+  // The size of the bit-field might need to be adjusted if more values are
+  // added to HotnessType enum.
+  uint32_t Hotness : 3;
+
+  /// The value stored in RelBlockFreq has to be interpreted as the digits of
+  /// a scaled number with a scale of \p -ScaleShift.
+  uint32_t RelBlockFreq : 29;
+  static constexpr int32_t ScaleShift = 8;
+  static constexpr uint64_t MaxRelBlockFreq = (1 << 29) - 1;
+
+  CalleeInfo()
+      : Hotness(static_cast<uint32_t>(HotnessType::Unknown)), RelBlockFreq(0) {}
+  explicit CalleeInfo(HotnessType Hotness, uint64_t RelBF)
+      : Hotness(static_cast<uint32_t>(Hotness)), RelBlockFreq(RelBF) {}
 
   void updateHotness(const HotnessType OtherHotness) {
-    Hotness = std::max(Hotness, OtherHotness);
+    Hotness = std::max(Hotness, static_cast<uint32_t>(OtherHotness));
+  }
+
+  HotnessType getHotness() const { return HotnessType(Hotness); }
+
+  /// Update \p RelBlockFreq from \p BlockFreq and \p EntryFreq
+  ///
+  /// BlockFreq is divided by EntryFreq and added to RelBlockFreq. To represent
+  /// fractional values, the result is represented as a fixed point number with
+  /// scale of -ScaleShift.
+  void updateRelBlockFreq(uint64_t BlockFreq, uint64_t EntryFreq) {
+    if (EntryFreq == 0)
+      return;
+    using Scaled64 = ScaledNumber<uint64_t>;
+    Scaled64 Temp(BlockFreq, ScaleShift);
+    Temp /= Scaled64::get(EntryFreq);
+
+    uint64_t Sum =
+        SaturatingAdd<uint64_t>(Temp.toInt<uint64_t>(), RelBlockFreq);
+    Sum = std::min(Sum, uint64_t(MaxRelBlockFreq));
+    RelBlockFreq = static_cast<uint32_t>(Sum);
   }
 };
 
@@ -69,9 +104,29 @@ class GlobalValueSummary;
 using GlobalValueSummaryList = std::vector<std::unique_ptr<GlobalValueSummary>>;
 
 struct GlobalValueSummaryInfo {
-  /// The GlobalValue corresponding to this summary. This is only used in
-  /// per-module summaries.
-  const GlobalValue *GV = nullptr;
+  union NameOrGV {
+    NameOrGV(bool HaveGVs) {
+      if (HaveGVs)
+        GV = nullptr;
+      else
+        Name = "";
+    }
+
+    /// The GlobalValue corresponding to this summary. This is only used in
+    /// per-module summaries and when the IR is available. E.g. when module
+    /// analysis is being run, or when parsing both the IR and the summary
+    /// from assembly.
+    const GlobalValue *GV;
+
+    /// Summary string representation. This StringRef points to BC module
+    /// string table and is valid until module data is stored in memory.
+    /// This is guaranteed to happen until runThinLTOBackend function is
+    /// called, so it is safe to use this field during thin link. This field
+    /// is only valid if summary index was loaded from BC file.
+    StringRef Name;
+  } U;
+
+  GlobalValueSummaryInfo(bool HaveGVs) : U(HaveGVs) {}
 
   /// List of global value summary structures for a particular value held
   /// in the GlobalValueMap. Requires a vector in the case of multiple
@@ -91,44 +146,98 @@ using GlobalValueSummaryMapTy =
 /// Struct that holds a reference to a particular GUID in a global value
 /// summary.
 struct ValueInfo {
-  const GlobalValueSummaryMapTy::value_type *Ref = nullptr;
+  PointerIntPair<const GlobalValueSummaryMapTy::value_type *, 1, bool>
+      RefAndFlag;
 
   ValueInfo() = default;
-  ValueInfo(const GlobalValueSummaryMapTy::value_type *Ref) : Ref(Ref) {}
+  ValueInfo(bool HaveGVs, const GlobalValueSummaryMapTy::value_type *R) {
+    RefAndFlag.setPointer(R);
+    RefAndFlag.setInt(HaveGVs);
+  }
 
-  operator bool() const { return Ref; }
+  operator bool() const { return getRef(); }
 
-  GlobalValue::GUID getGUID() const { return Ref->first; }
-  const GlobalValue *getValue() const { return Ref->second.GV; }
+  GlobalValue::GUID getGUID() const { return getRef()->first; }
+  const GlobalValue *getValue() const {
+    assert(haveGVs());
+    return getRef()->second.U.GV;
+  }
 
   ArrayRef<std::unique_ptr<GlobalValueSummary>> getSummaryList() const {
-    return Ref->second.SummaryList;
+    return getRef()->second.SummaryList;
+  }
+
+  StringRef name() const {
+    return haveGVs() ? getRef()->second.U.GV->getName()
+                     : getRef()->second.U.Name;
+  }
+
+  bool haveGVs() const { return RefAndFlag.getInt(); }
+
+  const GlobalValueSummaryMapTy::value_type *getRef() const {
+    return RefAndFlag.getPointer();
   }
+
+  bool isDSOLocal() const;
 };
 
+inline raw_ostream &operator<<(raw_ostream &OS, const ValueInfo &VI) {
+  OS << VI.getGUID();
+  if (!VI.name().empty())
+    OS << " (" << VI.name() << ")";
+  return OS;
+}
+
+inline bool operator==(const ValueInfo &A, const ValueInfo &B) {
+  assert(A.getRef() && B.getRef() &&
+         "Need ValueInfo with non-null Ref for comparison");
+  return A.getRef() == B.getRef();
+}
+
+inline bool operator!=(const ValueInfo &A, const ValueInfo &B) {
+  assert(A.getRef() && B.getRef() &&
+         "Need ValueInfo with non-null Ref for comparison");
+  return A.getRef() != B.getRef();
+}
+
+inline bool operator<(const ValueInfo &A, const ValueInfo &B) {
+  assert(A.getRef() && B.getRef() &&
+         "Need ValueInfo with non-null Ref to compare GUIDs");
+  return A.getGUID() < B.getGUID();
+}
+
 template <> struct DenseMapInfo<ValueInfo> {
   static inline ValueInfo getEmptyKey() {
-    return ValueInfo((GlobalValueSummaryMapTy::value_type *)-1);
+    return ValueInfo(false, (GlobalValueSummaryMapTy::value_type *)-8);
   }
 
   static inline ValueInfo getTombstoneKey() {
-    return ValueInfo((GlobalValueSummaryMapTy::value_type *)-2);
+    return ValueInfo(false, (GlobalValueSummaryMapTy::value_type *)-16);
+  }
+
+  static inline bool isSpecialKey(ValueInfo V) {
+    return V == getTombstoneKey() || V == getEmptyKey();
   }
 
-  static bool isEqual(ValueInfo L, ValueInfo R) { return L.Ref == R.Ref; }
-  static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.Ref; }
+  static bool isEqual(ValueInfo L, ValueInfo R) {
+    // We are not supposed to mix ValueInfo(s) with different HaveGVs flag
+    // in a same container.
+    assert(isSpecialKey(L) || isSpecialKey(R) || (L.haveGVs() == R.haveGVs()));
+    return L.getRef() == R.getRef();
+  }
+  static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.getRef(); }
 };
 
-/// \brief Function and variable summary information to aid decisions and
+/// Function and variable summary information to aid decisions and
 /// implementation of importing.
 class GlobalValueSummary {
 public:
-  /// \brief Sububclass discriminator (for dyn_cast<> et al.)
+  /// Sububclass discriminator (for dyn_cast<> et al.)
   enum SummaryKind : unsigned { AliasKind, FunctionKind, GlobalVarKind };
 
   /// Group flags (Linkage, NotEligibleToImport, etc.) as a bitfield.
   struct GVFlags {
-    /// \brief The linkage type of the associated global value.
+    /// The linkage type of the associated global value.
     ///
     /// One use is to flag values that have local linkage types and need to
     /// have module identifier appended before placing into the combined
@@ -170,7 +279,7 @@ private:
   /// GUID includes the module level id in the hash.
   GlobalValue::GUID OriginalName = 0;
 
-  /// \brief Path of module IR containing value's definition, used to locate
+  /// Path of module IR containing value's definition, used to locate
   /// module during importing.
   ///
   /// This is only used during parsing of the combined index, or when
@@ -185,8 +294,6 @@ private:
   /// are listed in the derived FunctionSummary object.
   std::vector<ValueInfo> RefEdgeList;
 
-  bool isLive() const { return Flags.Live; }
-
 protected:
   GlobalValueSummary(SummaryKind K, GVFlags Flags, std::vector<ValueInfo> Refs)
       : Kind(K), Flags(Flags), RefEdgeList(std::move(Refs)) {
@@ -199,7 +306,7 @@ public:
 
   /// Returns the hash of the original name, it is identical to the GUID for
   /// externally visible symbols, but not for local ones.
-  GlobalValue::GUID getOriginalName() { return OriginalName; }
+  GlobalValue::GUID getOriginalName() const { return OriginalName; }
 
   /// Initialize the original name hash in this summary.
   void setOriginalName(GlobalValue::GUID Name) { OriginalName = Name; }
@@ -215,7 +322,7 @@ public:
   StringRef modulePath() const { return ModulePath; }
 
   /// Get the flags for this GlobalValue (see \p struct GVFlags).
-  GVFlags flags() { return Flags; }
+  GVFlags flags() const { return Flags; }
 
   /// Return linkage type recorded for this global value.
   GlobalValue::LinkageTypes linkage() const {
@@ -231,6 +338,8 @@ public:
   /// Return true if this global value can't be imported.
   bool notEligibleToImport() const { return Flags.NotEligibleToImport; }
 
+  bool isLive() const { return Flags.Live; }
+
   void setLive(bool Live) { Flags.Live = Live; }
 
   void setDSOLocal(bool Local) { Flags.DSOLocal = Local; }
@@ -249,11 +358,9 @@ public:
   const GlobalValueSummary *getBaseObject() const;
 
   friend class ModuleSummaryIndex;
-  friend void computeDeadSymbols(class ModuleSummaryIndex &,
-                                 const DenseSet<GlobalValue::GUID> &);
 };
 
-/// \brief Alias summary information.
+/// Alias summary information.
 class AliasSummary : public GlobalValueSummary {
   GlobalValueSummary *AliaseeSummary;
   // AliaseeGUID is only set and accessed when we are building a combined index
@@ -273,6 +380,8 @@ public:
   void setAliasee(GlobalValueSummary *Aliasee) { AliaseeSummary = Aliasee; }
   void setAliaseeGUID(GlobalValue::GUID GUID) { AliaseeGUID = GUID; }
 
+  bool hasAliasee() const { return !!AliaseeSummary; }
+
   const GlobalValueSummary &getAliasee() const {
     assert(AliaseeSummary && "Unexpected missing aliasee summary");
     return *AliaseeSummary;
@@ -300,13 +409,20 @@ inline GlobalValueSummary *GlobalValueSummary::getBaseObject() {
   return this;
 }
 
-/// \brief Function summary information to aid decisions and implementation of
+/// Function summary information to aid decisions and implementation of
 /// importing.
 class FunctionSummary : public GlobalValueSummary {
 public:
   /// <CalleeValueInfo, CalleeInfo> call edge pair.
   using EdgeTy = std::pair<ValueInfo, CalleeInfo>;
 
+  /// Types for -force-summary-edges-cold debugging option.
+  enum ForceSummaryHotnessType : unsigned {
+    FSHT_None,
+    FSHT_AllNonCritical,
+    FSHT_All
+  };
+
   /// An "identifier" for a virtual function. This contains the type identifier
   /// represented as a GUID and the offset from the address point to the virtual
   /// function pointer, where "address point" is as defined in the Itanium ABI:
@@ -324,6 +440,26 @@ public:
     std::vector<uint64_t> Args;
   };
 
+  /// All type identifier related information. Because these fields are
+  /// relatively uncommon we only allocate space for them if necessary.
+  struct TypeIdInfo {
+    /// List of type identifiers used by this function in llvm.type.test
+    /// intrinsics referenced by something other than an llvm.assume intrinsic,
+    /// represented as GUIDs.
+    std::vector<GlobalValue::GUID> TypeTests;
+
+    /// List of virtual calls made by this function using (respectively)
+    /// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics that do
+    /// not have all constant integer arguments.
+    std::vector<VFuncId> TypeTestAssumeVCalls, TypeCheckedLoadVCalls;
+
+    /// List of virtual calls made by this function using (respectively)
+    /// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics with
+    /// all constant integer arguments.
+    std::vector<ConstVCall> TypeTestAssumeConstVCalls,
+        TypeCheckedLoadConstVCalls;
+  };
+
   /// Function attribute flags. Used to track if a function accesses memory,
   /// recurses or aliases.
   struct FFlags {
@@ -333,6 +469,25 @@ public:
     unsigned ReturnDoesNotAlias : 1;
   };
 
+  /// Create an empty FunctionSummary (with specified call edges).
+  /// Used to represent external nodes and the dummy root node.
+  static FunctionSummary
+  makeDummyFunctionSummary(std::vector<FunctionSummary::EdgeTy> Edges) {
+    return FunctionSummary(
+        FunctionSummary::GVFlags(
+            GlobalValue::LinkageTypes::AvailableExternallyLinkage,
+            /*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false),
+        0, FunctionSummary::FFlags{}, std::vector<ValueInfo>(),
+        std::move(Edges), std::vector<GlobalValue::GUID>(),
+        std::vector<FunctionSummary::VFuncId>(),
+        std::vector<FunctionSummary::VFuncId>(),
+        std::vector<FunctionSummary::ConstVCall>(),
+        std::vector<FunctionSummary::ConstVCall>());
+  }
+
+  /// A dummy node to reference external functions that aren't in the index
+  static FunctionSummary ExternalNode;
+
 private:
   /// Number of instructions (ignoring debug instructions, e.g.) computed
   /// during the initial compile step when the summary index is first built.
@@ -345,25 +500,6 @@ private:
   /// List of <CalleeValueInfo, CalleeInfo> call edge pairs from this function.
   std::vector<EdgeTy> CallGraphEdgeList;
 
-  /// All type identifier related information. Because these fields are
-  /// relatively uncommon we only allocate space for them if necessary.
-  struct TypeIdInfo {
-    /// List of type identifiers used by this function in llvm.type.test
-    /// intrinsics other than by an llvm.assume intrinsic, represented as GUIDs.
-    std::vector<GlobalValue::GUID> TypeTests;
-
-    /// List of virtual calls made by this function using (respectively)
-    /// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics that do
-    /// not have all constant integer arguments.
-    std::vector<VFuncId> TypeTestAssumeVCalls, TypeCheckedLoadVCalls;
-
-    /// List of virtual calls made by this function using (respectively)
-    /// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics with
-    /// all constant integer arguments.
-    std::vector<ConstVCall> TypeTestAssumeConstVCalls,
-        TypeCheckedLoadConstVCalls;
-  };
-
   std::unique_ptr<TypeIdInfo> TIdInfo;
 
 public:
@@ -393,7 +529,7 @@ public:
   }
 
   /// Get function attribute flags.
-  FFlags &fflags() { return FunFlags; }
+  FFlags fflags() const { return FunFlags; }
 
   /// Get the instruction count recorded for this function.
   unsigned instCount() const { return InstCount; }
@@ -452,6 +588,10 @@ public:
       TIdInfo = llvm::make_unique<TypeIdInfo>();
     TIdInfo->TypeTests.push_back(Guid);
   }
+
+  const TypeIdInfo *getTypeIdInfo() const { return TIdInfo.get(); };
+
+  friend struct GraphTraits<ValueInfo>;
 };
 
 template <> struct DenseMapInfo<FunctionSummary::VFuncId> {
@@ -488,7 +628,7 @@ template <> struct DenseMapInfo<FunctionSummary::ConstVCall> {
   }
 };
 
-/// \brief Global variable summary information to aid decisions and
+/// Global variable summary information to aid decisions and
 /// implementation of importing.
 ///
 /// Currently this doesn't add anything to the base \p GlobalValueSummary,
@@ -538,8 +678,11 @@ struct TypeTestResolution {
 
 struct WholeProgramDevirtResolution {
   enum Kind {
-    Indir,      ///< Just do a regular virtual call
-    SingleImpl, ///< Single implementation devirtualization
+    Indir,        ///< Just do a regular virtual call
+    SingleImpl,   ///< Single implementation devirtualization
+    BranchFunnel, ///< When retpoline mitigation is enabled, use a branch funnel
+                  ///< that is defined in the merged module. Otherwise same as
+                  ///< Indir.
   } TheKind = Indir;
 
   std::string SingleImplName;
@@ -607,7 +750,6 @@ private:
 
   /// Mapping from type identifiers to summary information for that type
   /// identifier.
-  // FIXME: Add bitcode read/write support for this field.
   std::map<std::string, TypeIdSummary> TypeIdMap;
 
   /// Mapping from original ID to GUID. If original ID can map to multiple
@@ -619,24 +761,111 @@ private:
   /// considered live.
   bool WithGlobalValueDeadStripping = false;
 
+  /// Indicates that distributed backend should skip compilation of the
+  /// module. Flag is suppose to be set by distributed ThinLTO indexing
+  /// when it detected that the module is not needed during the final
+  /// linking. As result distributed backend should just output a minimal
+  /// valid object file.
+  bool SkipModuleByDistributedBackend = false;
+
+  /// If true then we're performing analysis of IR module, or parsing along with
+  /// the IR from assembly. The value of 'false' means we're reading summary
+  /// from BC or YAML source. Affects the type of value stored in NameOrGV
+  /// union.
+  bool HaveGVs;
+
   std::set<std::string> CfiFunctionDefs;
   std::set<std::string> CfiFunctionDecls;
 
+  // Used in cases where we want to record the name of a global, but
+  // don't have the string owned elsewhere (e.g. the Strtab on a module).
+  StringSaver Saver;
+  BumpPtrAllocator Alloc;
+
   // YAML I/O support.
   friend yaml::MappingTraits<ModuleSummaryIndex>;
 
   GlobalValueSummaryMapTy::value_type *
   getOrInsertValuePtr(GlobalValue::GUID GUID) {
-    return &*GlobalValueMap.emplace(GUID, GlobalValueSummaryInfo{}).first;
+    return &*GlobalValueMap.emplace(GUID, GlobalValueSummaryInfo(HaveGVs))
+                 .first;
   }
 
 public:
+  // See HaveGVs variable comment.
+  ModuleSummaryIndex(bool HaveGVs) : HaveGVs(HaveGVs), Saver(Alloc) {}
+
+  bool haveGVs() const { return HaveGVs; }
+
   gvsummary_iterator begin() { return GlobalValueMap.begin(); }
   const_gvsummary_iterator begin() const { return GlobalValueMap.begin(); }
   gvsummary_iterator end() { return GlobalValueMap.end(); }
   const_gvsummary_iterator end() const { return GlobalValueMap.end(); }
   size_t size() const { return GlobalValueMap.size(); }
 
+  /// Convenience function for doing a DFS on a ValueInfo. Marks the function in
+  /// the FunctionHasParent map.
+  static void discoverNodes(ValueInfo V,
+                            std::map<ValueInfo, bool> &FunctionHasParent) {
+    if (!V.getSummaryList().size())
+      return; // skip external functions that don't have summaries
+
+    // Mark discovered if we haven't yet
+    auto S = FunctionHasParent.emplace(V, false);
+
+    // Stop if we've already discovered this node
+    if (!S.second)
+      return;
+
+    FunctionSummary *F =
+        dyn_cast<FunctionSummary>(V.getSummaryList().front().get());
+    assert(F != nullptr && "Expected FunctionSummary node");
+
+    for (auto &C : F->calls()) {
+      // Insert node if necessary
+      auto S = FunctionHasParent.emplace(C.first, true);
+
+      // Skip nodes that we're sure have parents
+      if (!S.second && S.first->second)
+        continue;
+
+      if (S.second)
+        discoverNodes(C.first, FunctionHasParent);
+      else
+        S.first->second = true;
+    }
+  }
+
+  // Calculate the callgraph root
+  FunctionSummary calculateCallGraphRoot() {
+    // Functions that have a parent will be marked in FunctionHasParent pair.
+    // Once we've marked all functions, the functions in the map that are false
+    // have no parent (so they're the roots)
+    std::map<ValueInfo, bool> FunctionHasParent;
+
+    for (auto &S : *this) {
+      // Skip external functions
+      if (!S.second.SummaryList.size() ||
+          !isa<FunctionSummary>(S.second.SummaryList.front().get()))
+        continue;
+      discoverNodes(ValueInfo(HaveGVs, &S), FunctionHasParent);
+    }
+
+    std::vector<FunctionSummary::EdgeTy> Edges;
+    // create edges to all roots in the Index
+    for (auto &P : FunctionHasParent) {
+      if (P.second)
+        continue; // skip over non-root nodes
+      Edges.push_back(std::make_pair(P.first, CalleeInfo{}));
+    }
+    if (Edges.empty()) {
+      // Failed to find root - return an empty node
+      return FunctionSummary::makeDummyFunctionSummary({});
+    }
+    auto CallGraphRoot = FunctionSummary::makeDummyFunctionSummary(Edges);
+    return CallGraphRoot;
+  }
+
   bool withGlobalValueDeadStripping() const {
     return WithGlobalValueDeadStripping;
   }
@@ -644,27 +873,54 @@ public:
     WithGlobalValueDeadStripping = true;
   }
 
+  bool skipModuleByDistributedBackend() const {
+    return SkipModuleByDistributedBackend;
+  }
+  void setSkipModuleByDistributedBackend() {
+    SkipModuleByDistributedBackend = true;
+  }
+
   bool isGlobalValueLive(const GlobalValueSummary *GVS) const {
     return !WithGlobalValueDeadStripping || GVS->isLive();
   }
   bool isGUIDLive(GlobalValue::GUID GUID) const;
 
+  /// Return a ValueInfo for the index value_type (convenient when iterating
+  /// index).
+  ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const {
+    return ValueInfo(HaveGVs, &R);
+  }
+
   /// Return a ValueInfo for GUID if it exists, otherwise return ValueInfo().
   ValueInfo getValueInfo(GlobalValue::GUID GUID) const {
     auto I = GlobalValueMap.find(GUID);
-    return ValueInfo(I == GlobalValueMap.end() ? nullptr : &*I);
+    return ValueInfo(HaveGVs, I == GlobalValueMap.end() ? nullptr : &*I);
   }
 
   /// Return a ValueInfo for \p GUID.
   ValueInfo getOrInsertValueInfo(GlobalValue::GUID GUID) {
-    return ValueInfo(getOrInsertValuePtr(GUID));
+    return ValueInfo(HaveGVs, getOrInsertValuePtr(GUID));
+  }
+
+  // Save a string in the Index. Use before passing Name to
+  // getOrInsertValueInfo when the string isn't owned elsewhere (e.g. on the
+  // module's Strtab).
+  StringRef saveString(std::string String) { return Saver.save(String); }
+
+  /// Return a ValueInfo for \p GUID setting value \p Name.
+  ValueInfo getOrInsertValueInfo(GlobalValue::GUID GUID, StringRef Name) {
+    assert(!HaveGVs);
+    auto VP = getOrInsertValuePtr(GUID);
+    VP->second.U.Name = Name;
+    return ValueInfo(HaveGVs, VP);
   }
 
   /// Return a ValueInfo for \p GV and mark it as belonging to GV.
   ValueInfo getOrInsertValueInfo(const GlobalValue *GV) {
+    assert(HaveGVs);
     auto VP = getOrInsertValuePtr(GV->getGUID());
-    VP->second.GV = GV;
-    return ValueInfo(VP);
+    VP->second.U.GV = GV;
+    return ValueInfo(HaveGVs, VP);
   }
 
   /// Return the GUID for \p OriginalId in the OidGuidMap.
@@ -679,6 +935,12 @@ public:
   std::set<std::string> &cfiFunctionDecls() { return CfiFunctionDecls; }
   const std::set<std::string> &cfiFunctionDecls() const { return CfiFunctionDecls; }
 
+  /// Add a global value summary for a value.
+  void addGlobalValueSummary(const GlobalValue &GV,
+                             std::unique_ptr<GlobalValueSummary> Summary) {
+    addGlobalValueSummary(getOrInsertValueInfo(&GV), std::move(Summary));
+  }
+
   /// Add a global value summary for a value of the given name.
   void addGlobalValueSummary(StringRef ValueName,
                              std::unique_ptr<GlobalValueSummary> Summary) {
@@ -692,7 +954,7 @@ public:
     addOriginalName(VI.getGUID(), Summary->getOriginalName());
     // Here we have a notionally const VI, but the value it points to is owned
     // by the non-const *this.
-    const_cast<GlobalValueSummaryMapTy::value_type *>(VI.Ref)
+    const_cast<GlobalValueSummaryMapTy::value_type *>(VI.getRef())
         ->second.SummaryList.push_back(std::move(Summary));
   }
 
@@ -730,8 +992,7 @@ public:
   GlobalValueSummary *getGlobalValueSummary(const GlobalValue &GV,
                                             bool PerModuleIndex = true) const {
     assert(GV.hasName() && "Can't get GlobalValueSummary for GV with no name");
-    return getGlobalValueSummary(GlobalValue::getGUID(GV.getName()),
-                                 PerModuleIndex);
+    return getGlobalValueSummary(GV.getGUID(), PerModuleIndex);
   }
 
   /// Returns the first GlobalValueSummary for \p ValueGUID, asserting that
@@ -788,6 +1049,13 @@ public:
     return &*ModulePathStringTable.insert({ModPath, {ModId, Hash}}).first;
   }
 
+  /// Return module entry for module with the given \p ModPath.
+  ModuleInfo *getModule(StringRef ModPath) {
+    auto It = ModulePathStringTable.find(ModPath);
+    assert(It != ModulePathStringTable.end() && "Module not registered");
+    return &*It;
+  }
+
   /// Check if the given Module has any functions available for exporting
   /// in the index. We consider any module present in the ModulePathStringTable
   /// to have exported functions.
@@ -814,7 +1082,7 @@ public:
     return &I->second;
   }
 
-  /// Collect for the given module the list of function it defines
+  /// Collect for the given module the list of functions it defines
   /// (GUID -> Summary).
   void collectDefinedFunctionsForModule(StringRef ModulePath,
                                         GVSummaryMapTy &GVSummaryMap) const;
@@ -823,6 +1091,65 @@ public:
   /// Summary).
   void collectDefinedGVSummariesPerModule(
       StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries) const;
+
+  /// Print to an output stream.
+  void print(raw_ostream &OS, bool IsForDebug = false) const;
+
+  /// Dump to stderr (for debugging).
+  void dump() const;
+
+  /// Export summary to dot file for GraphViz.
+  void exportToDot(raw_ostream& OS) const;
+
+  /// Print out strongly connected components for debugging.
+  void dumpSCCs(raw_ostream &OS);
+};
+
+/// GraphTraits definition to build SCC for the index
+template <> struct GraphTraits<ValueInfo> {
+  typedef ValueInfo NodeRef;
+
+  static NodeRef valueInfoFromEdge(FunctionSummary::EdgeTy &P) {
+    return P.first;
+  }
+  using ChildIteratorType =
+      mapped_iterator<std::vector<FunctionSummary::EdgeTy>::iterator,
+                      decltype(&valueInfoFromEdge)>;
+
+  static NodeRef getEntryNode(ValueInfo V) { return V; }
+
+  static ChildIteratorType child_begin(NodeRef N) {
+    if (!N.getSummaryList().size()) // handle external function
+      return ChildIteratorType(
+          FunctionSummary::ExternalNode.CallGraphEdgeList.begin(),
+          &valueInfoFromEdge);
+    FunctionSummary *F =
+        cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
+    return ChildIteratorType(F->CallGraphEdgeList.begin(), &valueInfoFromEdge);
+  }
+
+  static ChildIteratorType child_end(NodeRef N) {
+    if (!N.getSummaryList().size()) // handle external function
+      return ChildIteratorType(
+          FunctionSummary::ExternalNode.CallGraphEdgeList.end(),
+          &valueInfoFromEdge);
+    FunctionSummary *F =
+        cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
+    return ChildIteratorType(F->CallGraphEdgeList.end(), &valueInfoFromEdge);
+  }
+};
+
+template <>
+struct GraphTraits<ModuleSummaryIndex *> : public GraphTraits<ValueInfo> {
+  static NodeRef getEntryNode(ModuleSummaryIndex *I) {
+    std::unique_ptr<GlobalValueSummary> Root =
+        make_unique<FunctionSummary>(I->calculateCallGraphRoot());
+    GlobalValueSummaryInfo G(I->haveGVs());
+    G.SummaryList.push_back(std::move(Root));
+    static auto P =
+        GlobalValueSummaryMapTy::value_type(GlobalValue::GUID(0), std::move(G));
+    return ValueInfo(I->haveGVs(), &P);
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h
index 4687f2d53e7e..1b339ab32cf1 100644
--- a/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -98,6 +98,8 @@ template <> struct ScalarEnumerationTraits<WholeProgramDevirtResolution::Kind> {
   static void enumeration(IO &io, WholeProgramDevirtResolution::Kind &value) {
     io.enumCase(value, "Indir", WholeProgramDevirtResolution::Indir);
     io.enumCase(value, "SingleImpl", WholeProgramDevirtResolution::SingleImpl);
+    io.enumCase(value, "BranchFunnel",
+                WholeProgramDevirtResolution::BranchFunnel);
   }
 };
 
@@ -136,6 +138,7 @@ template <> struct MappingTraits<TypeIdSummary> {
 struct FunctionSummaryYaml {
   unsigned Linkage;
   bool NotEligibleToImport, Live, IsLocal;
+  std::vector<uint64_t> Refs;
   std::vector<uint64_t> TypeTests;
   std::vector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
       TypeCheckedLoadVCalls;
@@ -178,6 +181,7 @@ template <> struct MappingTraits<FunctionSummaryYaml> {
     io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport);
     io.mapOptional("Live", summary.Live);
     io.mapOptional("Local", summary.IsLocal);
+    io.mapOptional("Refs", summary.Refs);
     io.mapOptional("TypeTests", summary.TypeTests);
     io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls);
     io.mapOptional("TypeCheckedLoadVCalls", summary.TypeCheckedLoadVCalls);
@@ -207,13 +211,21 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
       io.setError("key not an integer");
       return;
     }
-    auto &Elem = V[KeyInt];
+    if (!V.count(KeyInt))
+      V.emplace(KeyInt, /*IsAnalysis=*/false);
+    auto &Elem = V.find(KeyInt)->second;
     for (auto &FSum : FSums) {
+      std::vector<ValueInfo> Refs;
+      for (auto &RefGUID : FSum.Refs) {
+        if (!V.count(RefGUID))
+          V.emplace(RefGUID, /*IsAnalysis=*/false);
+        Refs.push_back(ValueInfo(/*IsAnalysis=*/false, &*V.find(RefGUID)));
+      }
       Elem.SummaryList.push_back(llvm::make_unique<FunctionSummary>(
           GlobalValueSummary::GVFlags(
               static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
               FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal),
-          0, FunctionSummary::FFlags{}, ArrayRef<ValueInfo>{},
+          0, FunctionSummary::FFlags{}, Refs,
           ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests),
           std::move(FSum.TypeTestAssumeVCalls),
           std::move(FSum.TypeCheckedLoadVCalls),
@@ -225,15 +237,20 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
     for (auto &P : V) {
       std::vector<FunctionSummaryYaml> FSums;
       for (auto &Sum : P.second.SummaryList) {
-        if (auto *FSum = dyn_cast<FunctionSummary>(Sum.get()))
+        if (auto *FSum = dyn_cast<FunctionSummary>(Sum.get())) {
+          std::vector<uint64_t> Refs;
+          for (auto &VI : FSum->refs())
+            Refs.push_back(VI.getGUID());
           FSums.push_back(FunctionSummaryYaml{
               FSum->flags().Linkage,
               static_cast<bool>(FSum->flags().NotEligibleToImport),
               static_cast<bool>(FSum->flags().Live),
-              static_cast<bool>(FSum->flags().DSOLocal), FSum->type_tests(),
-              FSum->type_test_assume_vcalls(), FSum->type_checked_load_vcalls(),
+              static_cast<bool>(FSum->flags().DSOLocal), Refs,
+              FSum->type_tests(), FSum->type_test_assume_vcalls(),
+              FSum->type_checked_load_vcalls(),
               FSum->type_test_assume_const_vcalls(),
               FSum->type_checked_load_const_vcalls()});
+          }
       }
       if (!FSums.empty())
         io.mapRequired(llvm::utostr(P.first).c_str(), FSums);
diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h
index 01746e4b6a29..939cec7f4aa4 100644
--- a/include/llvm/IR/Operator.h
+++ b/include/llvm/IR/Operator.h
@@ -207,17 +207,28 @@ public:
   bool isFast() const          { return all(); }
 
   /// Flag setters
-  void setAllowReassoc()    { Flags |= AllowReassoc; }
-  void setNoNaNs()          { Flags |= NoNaNs; }
-  void setNoInfs()          { Flags |= NoInfs; }
-  void setNoSignedZeros()   { Flags |= NoSignedZeros; }
-  void setAllowReciprocal() { Flags |= AllowReciprocal; }
-  // TODO: Change the other set* functions to take a parameter?
-  void setAllowContract(bool B) {
+  void setAllowReassoc(bool B = true) {
+    Flags = (Flags & ~AllowReassoc) | B * AllowReassoc;
+  }
+  void setNoNaNs(bool B = true) {
+    Flags = (Flags & ~NoNaNs) | B * NoNaNs;
+  }
+  void setNoInfs(bool B = true) {
+    Flags = (Flags & ~NoInfs) | B * NoInfs;
+  }
+  void setNoSignedZeros(bool B = true) {
+    Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros;
+  }
+  void setAllowReciprocal(bool B = true) {
+    Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal;
+  }
+  void setAllowContract(bool B = true) {
     Flags = (Flags & ~AllowContract) | B * AllowContract;
   }
-  void setApproxFunc()      { Flags |= ApproxFunc; }
-  void setFast()            { set(); }
+  void setApproxFunc(bool B = true) {
+    Flags = (Flags & ~ApproxFunc) | B * ApproxFunc;
+  }
+  void setFast(bool B = true) { B ? set() : clear(); }
 
   void operator&=(const FastMathFlags &OtherFlags) {
     Flags &= OtherFlags.Flags;
@@ -507,7 +518,7 @@ public:
       });
   }
 
-  /// \brief Accumulate the constant address offset of this GEP if possible.
+  /// Accumulate the constant address offset of this GEP if possible.
   ///
   /// This routine accepts an APInt into which it will accumulate the constant
   /// offset of this GEP if the GEP is in fact constant. If the GEP is not
diff --git a/include/llvm/IR/OptBisect.h b/include/llvm/IR/OptBisect.h
index 09e67aa79246..aa24c94c0130 100644
--- a/include/llvm/IR/OptBisect.h
+++ b/include/llvm/IR/OptBisect.h
@@ -20,14 +20,34 @@
 namespace llvm {
 
 class Pass;
+class Module;
+class Function;
+class BasicBlock;
+class Region;
+class Loop;
+class CallGraphSCC;
+
+/// Extensions to this class implement mechanisms to disable passes and
+/// individual optimizations at compile time.
+class OptPassGate {
+public:
+  virtual ~OptPassGate() = default;
+
+  virtual bool shouldRunPass(const Pass *P, const Module &U) { return true; }
+  virtual bool shouldRunPass(const Pass *P, const Function &U)  {return true; }
+  virtual bool shouldRunPass(const Pass *P, const BasicBlock &U)  { return true; }
+  virtual bool shouldRunPass(const Pass *P, const Region &U)  { return true; }
+  virtual bool shouldRunPass(const Pass *P, const Loop &U)  { return true; }
+  virtual bool shouldRunPass(const Pass *P, const CallGraphSCC &U)  { return true; }
+};
 
 /// This class implements a mechanism to disable passes and individual
 /// optimizations at compile time based on a command line option
 /// (-opt-bisect-limit) in order to perform a bisecting search for
 /// optimization-related problems.
-class OptBisect {
+class OptBisect : public OptPassGate {
 public:
-  /// \brief Default constructor, initializes the OptBisect state based on the
+  /// Default constructor, initializes the OptBisect state based on the
   /// -opt-bisect-limit command line argument.
   ///
   /// By default, bisection is disabled.
@@ -36,20 +56,26 @@ public:
   /// through LLVMContext.
   OptBisect();
 
+  virtual ~OptBisect() = default;
+
   /// Checks the bisect limit to determine if the specified pass should run.
   ///
-  /// This function will immediate return true if bisection is disabled. If the
-  /// bisect limit is set to -1, the function will print a message describing
+  /// These functions immediately return true if bisection is disabled. If the
+  /// bisect limit is set to -1, the functions print a message describing
   /// the pass and the bisect number assigned to it and return true.  Otherwise,
-  /// the function will print a message with the bisect number assigned to the
+  /// the functions print a message with the bisect number assigned to the
   /// pass and indicating whether or not the pass will be run and return true if
-  /// the bisect limit has not yet been exceded or false if it has.
+  /// the bisect limit has not yet been exceeded or false if it has.
   ///
-  /// Most passes should not call this routine directly.  Instead, it is called
-  /// through a helper routine provided by the pass base class.  For instance,
-  /// function passes should call FunctionPass::skipFunction().
-  template <class UnitT>
-  bool shouldRunPass(const Pass *P, const UnitT &U);
+  /// Most passes should not call these routines directly. Instead, they are
+  /// called through helper routines provided by the pass base classes.  For
+  /// instance, function passes should call FunctionPass::skipFunction().
+  bool shouldRunPass(const Pass *P, const Module &U) override;
+  bool shouldRunPass(const Pass *P, const Function &U) override;
+  bool shouldRunPass(const Pass *P, const BasicBlock &U) override;
+  bool shouldRunPass(const Pass *P, const Region &U) override;
+  bool shouldRunPass(const Pass *P, const Loop &U) override;
+  bool shouldRunPass(const Pass *P, const CallGraphSCC &U) override;
 
 private:
   bool checkPass(const StringRef PassName, const StringRef TargetDesc);
diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h
index 4f838a719512..a5d4aaf71c0e 100644
--- a/include/llvm/IR/PassManager.h
+++ b/include/llvm/IR/PassManager.h
@@ -152,17 +152,17 @@ private:
 /// ```
 class PreservedAnalyses {
 public:
-  /// \brief Convenience factory function for the empty preserved set.
+  /// Convenience factory function for the empty preserved set.
   static PreservedAnalyses none() { return PreservedAnalyses(); }
 
-  /// \brief Construct a special preserved set that preserves all passes.
+  /// Construct a special preserved set that preserves all passes.
   static PreservedAnalyses all() {
     PreservedAnalyses PA;
     PA.PreservedIDs.insert(&AllAnalysesKey);
     return PA;
   }
 
-  /// \brief Construct a preserved analyses object with a single preserved set.
+  /// Construct a preserved analyses object with a single preserved set.
   template <typename AnalysisSetT>
   static PreservedAnalyses allInSet() {
     PreservedAnalyses PA;
@@ -173,7 +173,7 @@ public:
   /// Mark an analysis as preserved.
   template <typename AnalysisT> void preserve() { preserve(AnalysisT::ID()); }
 
-  /// \brief Given an analysis's ID, mark the analysis as preserved, adding it
+  /// Given an analysis's ID, mark the analysis as preserved, adding it
   /// to the set.
   void preserve(AnalysisKey *ID) {
     // Clear this ID from the explicit not-preserved set if present.
@@ -218,7 +218,7 @@ public:
     NotPreservedAnalysisIDs.insert(ID);
   }
 
-  /// \brief Intersect this set with another in place.
+  /// Intersect this set with another in place.
   ///
   /// This is a mutating operation on this preserved set, removing all
   /// preserved passes which are not also preserved in the argument.
@@ -240,7 +240,7 @@ public:
         PreservedIDs.erase(ID);
   }
 
-  /// \brief Intersect this set with a temporary other set in place.
+  /// Intersect this set with a temporary other set in place.
   ///
   /// This is a mutating operation on this preserved set, removing all
   /// preserved passes which are not also preserved in the argument.
@@ -402,7 +402,7 @@ struct AnalysisInfoMixin : PassInfoMixin<DerivedT> {
   }
 };
 
-/// \brief Manages a sequence of passes over a particular unit of IR.
+/// Manages a sequence of passes over a particular unit of IR.
 ///
 /// A pass manager contains a sequence of passes to run over a particular unit
 /// of IR (e.g. Functions, Modules). It is itself a valid pass over that unit of
@@ -420,7 +420,7 @@ template <typename IRUnitT,
 class PassManager : public PassInfoMixin<
                         PassManager<IRUnitT, AnalysisManagerT, ExtraArgTs...>> {
 public:
-  /// \brief Construct a pass manager.
+  /// Construct a pass manager.
   ///
   /// If \p DebugLogging is true, we'll log our progress to llvm::dbgs().
   explicit PassManager(bool DebugLogging = false) : DebugLogging(DebugLogging) {}
@@ -439,7 +439,7 @@ public:
     return *this;
   }
 
-  /// \brief Run all of the passes in this manager over the given unit of IR.
+  /// Run all of the passes in this manager over the given unit of IR.
   /// ExtraArgs are passed to each pass.
   PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
                         ExtraArgTs... ExtraArgs) {
@@ -496,21 +496,21 @@ private:
 
   std::vector<std::unique_ptr<PassConceptT>> Passes;
 
-  /// \brief Flag indicating whether we should do debug logging.
+  /// Flag indicating whether we should do debug logging.
   bool DebugLogging;
 };
 
 extern template class PassManager<Module>;
 
-/// \brief Convenience typedef for a pass manager over modules.
+/// Convenience typedef for a pass manager over modules.
 using ModulePassManager = PassManager<Module>;
 
 extern template class PassManager<Function>;
 
-/// \brief Convenience typedef for a pass manager over functions.
+/// Convenience typedef for a pass manager over functions.
 using FunctionPassManager = PassManager<Function>;
 
-/// \brief A container for analyses that lazily runs them and caches their
+/// A container for analyses that lazily runs them and caches their
 /// results.
 ///
 /// This class can manage analyses for any IR unit where the address of the IR
@@ -527,7 +527,7 @@ private:
       detail::AnalysisPassConcept<IRUnitT, PreservedAnalyses, Invalidator,
                                   ExtraArgTs...>;
 
-  /// \brief List of analysis pass IDs and associated concept pointers.
+  /// List of analysis pass IDs and associated concept pointers.
   ///
   /// Requires iterators to be valid across appending new entries and arbitrary
   /// erases. Provides the analysis ID to enable finding iterators to a given
@@ -536,10 +536,10 @@ private:
   using AnalysisResultListT =
       std::list<std::pair<AnalysisKey *, std::unique_ptr<ResultConceptT>>>;
 
-  /// \brief Map type from IRUnitT pointer to our custom list type.
+  /// Map type from IRUnitT pointer to our custom list type.
   using AnalysisResultListMapT = DenseMap<IRUnitT *, AnalysisResultListT>;
 
-  /// \brief Map type from a pair of analysis ID and IRUnitT pointer to an
+  /// Map type from a pair of analysis ID and IRUnitT pointer to an
   /// iterator into a particular result list (which is where the actual analysis
   /// result is stored).
   using AnalysisResultMapT =
@@ -634,14 +634,14 @@ public:
     const AnalysisResultMapT &Results;
   };
 
-  /// \brief Construct an empty analysis manager.
+  /// Construct an empty analysis manager.
   ///
   /// If \p DebugLogging is true, we'll log our progress to llvm::dbgs().
   AnalysisManager(bool DebugLogging = false) : DebugLogging(DebugLogging) {}
   AnalysisManager(AnalysisManager &&) = default;
   AnalysisManager &operator=(AnalysisManager &&) = default;
 
-  /// \brief Returns true if the analysis manager has an empty results cache.
+  /// Returns true if the analysis manager has an empty results cache.
   bool empty() const {
     assert(AnalysisResults.empty() == AnalysisResultLists.empty() &&
            "The storage and index of analysis results disagree on how many "
@@ -649,7 +649,7 @@ public:
     return AnalysisResults.empty();
   }
 
-  /// \brief Clear any cached analysis results for a single unit of IR.
+  /// Clear any cached analysis results for a single unit of IR.
   ///
   /// This doesn't invalidate, but instead simply deletes, the relevant results.
   /// It is useful when the IR is being removed and we want to clear out all the
@@ -669,7 +669,7 @@ public:
     AnalysisResultLists.erase(ResultsListI);
   }
 
-  /// \brief Clear all analysis results cached by this AnalysisManager.
+  /// Clear all analysis results cached by this AnalysisManager.
   ///
   /// Like \c clear(IRUnitT&), this doesn't invalidate the results; it simply
   /// deletes them.  This lets you clean up the AnalysisManager when the set of
@@ -680,7 +680,7 @@ public:
     AnalysisResultLists.clear();
   }
 
-  /// \brief Get the result of an analysis pass for a given IR unit.
+  /// Get the result of an analysis pass for a given IR unit.
   ///
   /// Runs the analysis if a cached result is not available.
   template <typename PassT>
@@ -697,7 +697,7 @@ public:
     return static_cast<ResultModelT &>(ResultConcept).Result;
   }
 
-  /// \brief Get the cached result of an analysis pass for a given IR unit.
+  /// Get the cached result of an analysis pass for a given IR unit.
   ///
   /// This method never runs the analysis.
   ///
@@ -718,7 +718,7 @@ public:
     return &static_cast<ResultModelT *>(ResultConcept)->Result;
   }
 
-  /// \brief Register an analysis pass with the manager.
+  /// Register an analysis pass with the manager.
   ///
   /// The parameter is a callable whose result is an analysis pass. This allows
   /// passing in a lambda to construct the analysis.
@@ -752,7 +752,7 @@ public:
     return true;
   }
 
-  /// \brief Invalidate a specific analysis pass for an IR module.
+  /// Invalidate a specific analysis pass for an IR module.
   ///
   /// Note that the analysis result can disregard invalidation, if it determines
   /// it is in fact still valid.
@@ -762,7 +762,7 @@ public:
     invalidateImpl(PassT::ID(), IR);
   }
 
-  /// \brief Invalidate cached analyses for an IR unit.
+  /// Invalidate cached analyses for an IR unit.
   ///
   /// Walk through all of the analyses pertaining to this unit of IR and
   /// invalidate them, unless they are preserved by the PreservedAnalyses set.
@@ -829,7 +829,7 @@ public:
   }
 
 private:
-  /// \brief Look up a registered analysis pass.
+  /// Look up a registered analysis pass.
   PassConceptT &lookUpPass(AnalysisKey *ID) {
     typename AnalysisPassMapT::iterator PI = AnalysisPasses.find(ID);
     assert(PI != AnalysisPasses.end() &&
@@ -837,7 +837,7 @@ private:
     return *PI->second;
   }
 
-  /// \brief Look up a registered analysis pass.
+  /// Look up a registered analysis pass.
   const PassConceptT &lookUpPass(AnalysisKey *ID) const {
     typename AnalysisPassMapT::const_iterator PI = AnalysisPasses.find(ID);
     assert(PI != AnalysisPasses.end() &&
@@ -845,7 +845,7 @@ private:
     return *PI->second;
   }
 
-  /// \brief Get an analysis result, running the pass if necessary.
+  /// Get an analysis result, running the pass if necessary.
   ResultConceptT &getResultImpl(AnalysisKey *ID, IRUnitT &IR,
                                 ExtraArgTs... ExtraArgs) {
     typename AnalysisResultMapT::iterator RI;
@@ -874,14 +874,14 @@ private:
     return *RI->second->second;
   }
 
-  /// \brief Get a cached analysis result or return null.
+  /// Get a cached analysis result or return null.
   ResultConceptT *getCachedResultImpl(AnalysisKey *ID, IRUnitT &IR) const {
     typename AnalysisResultMapT::const_iterator RI =
         AnalysisResults.find({ID, &IR});
     return RI == AnalysisResults.end() ? nullptr : &*RI->second->second;
   }
 
-  /// \brief Invalidate a function pass result.
+  /// Invalidate a function pass result.
   void invalidateImpl(AnalysisKey *ID, IRUnitT &IR) {
     typename AnalysisResultMapT::iterator RI =
         AnalysisResults.find({ID, &IR});
@@ -895,38 +895,38 @@ private:
     AnalysisResults.erase(RI);
   }
 
-  /// \brief Map type from module analysis pass ID to pass concept pointer.
+  /// Map type from module analysis pass ID to pass concept pointer.
   using AnalysisPassMapT =
       DenseMap<AnalysisKey *, std::unique_ptr<PassConceptT>>;
 
-  /// \brief Collection of module analysis passes, indexed by ID.
+  /// Collection of module analysis passes, indexed by ID.
   AnalysisPassMapT AnalysisPasses;
 
-  /// \brief Map from function to a list of function analysis results.
+  /// Map from function to a list of function analysis results.
   ///
   /// Provides linear time removal of all analysis results for a function and
   /// the ultimate storage for a particular cached analysis result.
   AnalysisResultListMapT AnalysisResultLists;
 
-  /// \brief Map from an analysis ID and function to a particular cached
+  /// Map from an analysis ID and function to a particular cached
   /// analysis result.
   AnalysisResultMapT AnalysisResults;
 
-  /// \brief Indicates whether we log to \c llvm::dbgs().
+  /// Indicates whether we log to \c llvm::dbgs().
   bool DebugLogging;
 };
 
 extern template class AnalysisManager<Module>;
 
-/// \brief Convenience typedef for the Module analysis manager.
+/// Convenience typedef for the Module analysis manager.
 using ModuleAnalysisManager = AnalysisManager<Module>;
 
 extern template class AnalysisManager<Function>;
 
-/// \brief Convenience typedef for the Function analysis manager.
+/// Convenience typedef for the Function analysis manager.
 using FunctionAnalysisManager = AnalysisManager<Function>;
 
-/// \brief An analysis over an "outer" IR unit that provides access to an
+/// An analysis over an "outer" IR unit that provides access to an
 /// analysis manager over an "inner" IR unit.  The inner unit must be contained
 /// in the outer unit.
 ///
@@ -977,10 +977,10 @@ public:
       return *this;
     }
 
-    /// \brief Accessor for the analysis manager.
+    /// Accessor for the analysis manager.
     AnalysisManagerT &getManager() { return *InnerAM; }
 
-    /// \brief Handler for invalidation of the outer IR unit, \c IRUnitT.
+    /// Handler for invalidation of the outer IR unit, \c IRUnitT.
     ///
     /// If the proxy analysis itself is not preserved, we assume that the set of
     /// inner IR objects contained in IRUnit may have changed.  In this case,
@@ -1001,7 +1001,7 @@ public:
   explicit InnerAnalysisManagerProxy(AnalysisManagerT &InnerAM)
       : InnerAM(&InnerAM) {}
 
-  /// \brief Run the analysis pass and create our proxy result object.
+  /// Run the analysis pass and create our proxy result object.
   ///
   /// This doesn't do any interesting work; it is primarily used to insert our
   /// proxy result object into the outer analysis cache so that we can proxy
@@ -1040,7 +1040,7 @@ bool FunctionAnalysisManagerModuleProxy::Result::invalidate(
 extern template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
                                                 Module>;
 
-/// \brief An analysis over an "inner" IR unit that provides access to an
+/// An analysis over an "inner" IR unit that provides access to an
 /// analysis manager over a "outer" IR unit.  The inner unit must be contained
 /// in the outer unit.
 ///
@@ -1063,7 +1063,7 @@ class OuterAnalysisManagerProxy
     : public AnalysisInfoMixin<
           OuterAnalysisManagerProxy<AnalysisManagerT, IRUnitT, ExtraArgTs...>> {
 public:
-  /// \brief Result proxy object for \c OuterAnalysisManagerProxy.
+  /// Result proxy object for \c OuterAnalysisManagerProxy.
   class Result {
   public:
     explicit Result(const AnalysisManagerT &AM) : AM(&AM) {}
@@ -1130,7 +1130,7 @@ public:
 
   OuterAnalysisManagerProxy(const AnalysisManagerT &AM) : AM(&AM) {}
 
-  /// \brief Run the analysis pass and create our proxy result object.
+  /// Run the analysis pass and create our proxy result object.
   /// Nothing to see here, it just forwards the \c AM reference into the
   /// result.
   Result run(IRUnitT &, AnalysisManager<IRUnitT, ExtraArgTs...> &,
@@ -1157,7 +1157,7 @@ extern template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
 using ModuleAnalysisManagerFunctionProxy =
     OuterAnalysisManagerProxy<ModuleAnalysisManager, Function>;
 
-/// \brief Trivial adaptor that maps from a module to its functions.
+/// Trivial adaptor that maps from a module to its functions.
 ///
 /// Designed to allow composition of a FunctionPass(Manager) and
 /// a ModulePassManager, by running the FunctionPass(Manager) over every
@@ -1187,7 +1187,7 @@ public:
   explicit ModuleToFunctionPassAdaptor(FunctionPassT Pass)
       : Pass(std::move(Pass)) {}
 
-  /// \brief Runs the function pass across every function in the module.
+  /// Runs the function pass across every function in the module.
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
     FunctionAnalysisManager &FAM =
         AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -1223,7 +1223,7 @@ private:
   FunctionPassT Pass;
 };
 
-/// \brief A function to deduce a function pass type and wrap it in the
+/// A function to deduce a function pass type and wrap it in the
 /// templated adaptor.
 template <typename FunctionPassT>
 ModuleToFunctionPassAdaptor<FunctionPassT>
@@ -1231,7 +1231,7 @@ createModuleToFunctionPassAdaptor(FunctionPassT Pass) {
   return ModuleToFunctionPassAdaptor<FunctionPassT>(std::move(Pass));
 }
 
-/// \brief A utility pass template to force an analysis result to be available.
+/// A utility pass template to force an analysis result to be available.
 ///
 /// If there are extra arguments at the pass's run level there may also be
 /// extra arguments to the analysis manager's \c getResult routine. We can't
@@ -1246,7 +1246,7 @@ template <typename AnalysisT, typename IRUnitT,
 struct RequireAnalysisPass
     : PassInfoMixin<RequireAnalysisPass<AnalysisT, IRUnitT, AnalysisManagerT,
                                         ExtraArgTs...>> {
-  /// \brief Run this pass over some unit of IR.
+  /// Run this pass over some unit of IR.
   ///
   /// This pass can be run over any unit of IR and use any analysis manager
   /// provided they satisfy the basic API requirements. When this pass is
@@ -1261,12 +1261,12 @@ struct RequireAnalysisPass
   }
 };
 
-/// \brief A no-op pass template which simply forces a specific analysis result
+/// A no-op pass template which simply forces a specific analysis result
 /// to be invalidated.
 template <typename AnalysisT>
 struct InvalidateAnalysisPass
     : PassInfoMixin<InvalidateAnalysisPass<AnalysisT>> {
-  /// \brief Run this pass over some unit of IR.
+  /// Run this pass over some unit of IR.
   ///
   /// This pass can be run over any unit of IR and use any analysis manager,
   /// provided they satisfy the basic API requirements. When this pass is
@@ -1280,12 +1280,12 @@ struct InvalidateAnalysisPass
   }
 };
 
-/// \brief A utility pass that does nothing, but preserves no analyses.
+/// A utility pass that does nothing, but preserves no analyses.
 ///
 /// Because this preserves no analyses, any analysis passes queried after this
 /// pass runs will recompute fresh results.
 struct InvalidateAllAnalysesPass : PassInfoMixin<InvalidateAllAnalysesPass> {
-  /// \brief Run this pass over some unit of IR.
+  /// Run this pass over some unit of IR.
   template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
   PreservedAnalyses run(IRUnitT &, AnalysisManagerT &, ExtraArgTs &&...) {
     return PreservedAnalyses::none();
diff --git a/include/llvm/IR/PassManagerInternal.h b/include/llvm/IR/PassManagerInternal.h
index 9195d4dfa428..16a3258b4121 100644
--- a/include/llvm/IR/PassManagerInternal.h
+++ b/include/llvm/IR/PassManagerInternal.h
@@ -29,17 +29,17 @@ template <typename IRUnitT> class AllAnalysesOn;
 template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager;
 class PreservedAnalyses;
 
-/// \brief Implementation details of the pass manager interfaces.
+/// Implementation details of the pass manager interfaces.
 namespace detail {
 
-/// \brief Template for the abstract base class used to dispatch
+/// Template for the abstract base class used to dispatch
 /// polymorphically over pass objects.
 template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
 struct PassConcept {
   // Boiler plate necessary for the container of derived classes.
   virtual ~PassConcept() = default;
 
-  /// \brief The polymorphic API which runs the pass over a given IR entity.
+  /// The polymorphic API which runs the pass over a given IR entity.
   ///
   /// Note that actual pass object can omit the analysis manager argument if
   /// desired. Also that the analysis manager may be null if there is no
@@ -47,11 +47,11 @@ struct PassConcept {
   virtual PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
                                 ExtraArgTs... ExtraArgs) = 0;
 
-  /// \brief Polymorphic method to access the name of a pass.
+  /// Polymorphic method to access the name of a pass.
   virtual StringRef name() = 0;
 };
 
-/// \brief A template wrapper used to implement the polymorphic API.
+/// A template wrapper used to implement the polymorphic API.
 ///
 /// Can be instantiated for any object which provides a \c run method accepting
 /// an \c IRUnitT& and an \c AnalysisManager<IRUnit>&. It requires the pass to
@@ -85,7 +85,7 @@ struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> {
   PassT Pass;
 };
 
-/// \brief Abstract concept of an analysis result.
+/// Abstract concept of an analysis result.
 ///
 /// This concept is parameterized over the IR unit that this result pertains
 /// to.
@@ -93,7 +93,7 @@ template <typename IRUnitT, typename PreservedAnalysesT, typename InvalidatorT>
 struct AnalysisResultConcept {
   virtual ~AnalysisResultConcept() = default;
 
-  /// \brief Method to try and mark a result as invalid.
+  /// Method to try and mark a result as invalid.
   ///
   /// When the outer analysis manager detects a change in some underlying
   /// unit of the IR, it will call this method on all of the results cached.
@@ -112,7 +112,7 @@ struct AnalysisResultConcept {
                           InvalidatorT &Inv) = 0;
 };
 
-/// \brief SFINAE metafunction for computing whether \c ResultT provides an
+/// SFINAE metafunction for computing whether \c ResultT provides an
 /// \c invalidate member function.
 template <typename IRUnitT, typename ResultT> class ResultHasInvalidateMethod {
   using EnabledType = char;
@@ -148,7 +148,7 @@ public:
   enum { Value = sizeof(check<ResultT>(rank<2>())) == sizeof(EnabledType) };
 };
 
-/// \brief Wrapper to model the analysis result concept.
+/// Wrapper to model the analysis result concept.
 ///
 /// By default, this will implement the invalidate method with a trivial
 /// implementation so that the actual analysis result doesn't need to provide
@@ -160,7 +160,7 @@ template <typename IRUnitT, typename PassT, typename ResultT,
               ResultHasInvalidateMethod<IRUnitT, ResultT>::Value>
 struct AnalysisResultModel;
 
-/// \brief Specialization of \c AnalysisResultModel which provides the default
+/// Specialization of \c AnalysisResultModel which provides the default
 /// invalidate functionality.
 template <typename IRUnitT, typename PassT, typename ResultT,
           typename PreservedAnalysesT, typename InvalidatorT>
@@ -184,7 +184,7 @@ struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT,
     return *this;
   }
 
-  /// \brief The model bases invalidation solely on being in the preserved set.
+  /// The model bases invalidation solely on being in the preserved set.
   //
   // FIXME: We should actually use two different concepts for analysis results
   // rather than two different models, and avoid the indirect function call for
@@ -199,7 +199,7 @@ struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT,
   ResultT Result;
 };
 
-/// \brief Specialization of \c AnalysisResultModel which delegates invalidate
+/// Specialization of \c AnalysisResultModel which delegates invalidate
 /// handling to \c ResultT.
 template <typename IRUnitT, typename PassT, typename ResultT,
           typename PreservedAnalysesT, typename InvalidatorT>
@@ -223,7 +223,7 @@ struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT,
     return *this;
   }
 
-  /// \brief The model delegates to the \c ResultT method.
+  /// The model delegates to the \c ResultT method.
   bool invalidate(IRUnitT &IR, const PreservedAnalysesT &PA,
                   InvalidatorT &Inv) override {
     return Result.invalidate(IR, PA, Inv);
@@ -232,7 +232,7 @@ struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT,
   ResultT Result;
 };
 
-/// \brief Abstract concept of an analysis pass.
+/// Abstract concept of an analysis pass.
 ///
 /// This concept is parameterized over the IR unit that it can run over and
 /// produce an analysis result.
@@ -241,7 +241,7 @@ template <typename IRUnitT, typename PreservedAnalysesT, typename InvalidatorT,
 struct AnalysisPassConcept {
   virtual ~AnalysisPassConcept() = default;
 
-  /// \brief Method to run this analysis over a unit of IR.
+  /// Method to run this analysis over a unit of IR.
   /// \returns A unique_ptr to the analysis result object to be queried by
   /// users.
   virtual std::unique_ptr<
@@ -249,11 +249,11 @@ struct AnalysisPassConcept {
   run(IRUnitT &IR, AnalysisManager<IRUnitT, ExtraArgTs...> &AM,
       ExtraArgTs... ExtraArgs) = 0;
 
-  /// \brief Polymorphic method to access the name of a pass.
+  /// Polymorphic method to access the name of a pass.
   virtual StringRef name() = 0;
 };
 
-/// \brief Wrapper to model the analysis pass concept.
+/// Wrapper to model the analysis pass concept.
 ///
 /// Can wrap any type which implements a suitable \c run method. The method
 /// must accept an \c IRUnitT& and an \c AnalysisManager<IRUnitT>& as arguments
@@ -283,7 +283,7 @@ struct AnalysisPassModel : AnalysisPassConcept<IRUnitT, PreservedAnalysesT,
       AnalysisResultModel<IRUnitT, PassT, typename PassT::Result,
                           PreservedAnalysesT, InvalidatorT>;
 
-  /// \brief The model delegates to the \c PassT::run method.
+  /// The model delegates to the \c PassT::run method.
   ///
   /// The return is wrapped in an \c AnalysisResultModel.
   std::unique_ptr<
@@ -293,7 +293,7 @@ struct AnalysisPassModel : AnalysisPassConcept<IRUnitT, PreservedAnalysesT,
     return llvm::make_unique<ResultModelT>(Pass.run(IR, AM, ExtraArgs...));
   }
 
-  /// \brief The model delegates to a static \c PassT::name method.
+  /// The model delegates to a static \c PassT::name method.
   ///
   /// The returned string ref must point to constant immutable data!
   StringRef name() override { return PassT::name(); }
diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h
index 245d72fbd16e..af0616cd8221 100644
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@@ -8,10 +8,10 @@
 //===----------------------------------------------------------------------===//
 //
 // This file provides a simple and efficient mechanism for performing general
-// tree-based pattern matches on the LLVM IR.  The power of these routines is
+// tree-based pattern matches on the LLVM IR. The power of these routines is
 // that it allows you to write concise patterns that are expressive and easy to
-// understand.  The other major advantage of this is that it allows you to
-// trivially capture/bind elements in the pattern to variables.  For example,
+// understand. The other major advantage of this is that it allows you to
+// trivially capture/bind elements in the pattern to variables. For example,
 // you can do something like this:
 //
 //  Value *Exp = ...
@@ -68,26 +68,26 @@ template <typename Class> struct class_match {
   template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
 };
 
-/// \brief Match an arbitrary value and ignore it.
+/// Match an arbitrary value and ignore it.
 inline class_match<Value> m_Value() { return class_match<Value>(); }
 
-/// \brief Match an arbitrary binary operation and ignore it.
+/// Match an arbitrary binary operation and ignore it.
 inline class_match<BinaryOperator> m_BinOp() {
   return class_match<BinaryOperator>();
 }
 
-/// \brief Matches any compare instruction and ignore it.
+/// Matches any compare instruction and ignore it.
 inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
 
-/// \brief Match an arbitrary ConstantInt and ignore it.
+/// Match an arbitrary ConstantInt and ignore it.
 inline class_match<ConstantInt> m_ConstantInt() {
   return class_match<ConstantInt>();
 }
 
-/// \brief Match an arbitrary undef constant.
+/// Match an arbitrary undef constant.
 inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
 
-/// \brief Match an arbitrary Constant and ignore it.
+/// Match an arbitrary Constant and ignore it.
 inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
 
 /// Matching combinators
@@ -132,89 +132,6 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
   return match_combine_and<LTy, RTy>(L, R);
 }
 
-struct match_zero {
-  template <typename ITy> bool match(ITy *V) {
-    if (const auto *C = dyn_cast<Constant>(V))
-      return C->isNullValue();
-    return false;
-  }
-};
-
-/// \brief Match an arbitrary zero/null constant.  This includes
-/// zero_initializer for vectors and ConstantPointerNull for pointers.
-inline match_zero m_Zero() { return match_zero(); }
-
-struct match_neg_zero {
-  template <typename ITy> bool match(ITy *V) {
-    if (const auto *C = dyn_cast<Constant>(V))
-      return C->isNegativeZeroValue();
-    return false;
-  }
-};
-
-/// \brief Match an arbitrary zero/null constant.  This includes
-/// zero_initializer for vectors and ConstantPointerNull for pointers. For
-/// floating point constants, this will match negative zero but not positive
-/// zero
-inline match_neg_zero m_NegZero() { return match_neg_zero(); }
-
-struct match_any_zero {
-  template <typename ITy> bool match(ITy *V) {
-    if (const auto *C = dyn_cast<Constant>(V))
-      return C->isZeroValue();
-    return false;
-  }
-};
-
-/// \brief - Match an arbitrary zero/null constant.  This includes
-/// zero_initializer for vectors and ConstantPointerNull for pointers. For
-/// floating point constants, this will match negative zero and positive zero
-inline match_any_zero m_AnyZero() { return match_any_zero(); }
-
-struct match_nan {
-  template <typename ITy> bool match(ITy *V) {
-    if (const auto *C = dyn_cast<ConstantFP>(V))
-      return C->isNaN();
-    return false;
-  }
-};
-
-/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
-inline match_nan m_NaN() { return match_nan(); }
-
-struct match_one {
-  template <typename ITy> bool match(ITy *V) {
-    if (const auto *C = dyn_cast<Constant>(V))
-      return C->isOneValue();
-    return false;
-  }
-};
-
-/// \brief Match an integer 1 or a vector with all elements equal to 1.
-inline match_one m_One() { return match_one(); }
-
-struct match_all_ones {
-  template <typename ITy> bool match(ITy *V) {
-    if (const auto *C = dyn_cast<Constant>(V))
-      return C->isAllOnesValue();
-    return false;
-  }
-};
-
-/// \brief Match an integer or vector with all bits set to true.
-inline match_all_ones m_AllOnes() { return match_all_ones(); }
-
-struct match_sign_mask {
-  template <typename ITy> bool match(ITy *V) {
-    if (const auto *C = dyn_cast<Constant>(V))
-      return C->isMinSignedValue();
-    return false;
-  }
-};
-
-/// \brief Match an integer or vector with only the sign bit(s) set.
-inline match_sign_mask m_SignMask() { return match_sign_mask(); }
-
 struct apint_match {
   const APInt *&Res;
 
@@ -255,11 +172,11 @@ struct apfloat_match {
   }
 };
 
-/// \brief Match a ConstantInt or splatted ConstantVector, binding the
+/// Match a ConstantInt or splatted ConstantVector, binding the
 /// specified pointer to the contained APInt.
 inline apint_match m_APInt(const APInt *&Res) { return Res; }
 
-/// \brief Match a ConstantFP or splatted ConstantVector, binding the
+/// Match a ConstantFP or splatted ConstantVector, binding the
 /// specified pointer to the contained APFloat.
 inline apfloat_match m_APFloat(const APFloat *&Res) { return Res; }
 
@@ -278,26 +195,44 @@ template <int64_t Val> struct constantint_match {
   }
 };
 
-/// \brief Match a ConstantInt with a specific value.
+/// Match a ConstantInt with a specific value.
 template <int64_t Val> inline constantint_match<Val> m_ConstantInt() {
   return constantint_match<Val>();
 }
 
-/// \brief This helper class is used to match scalar and vector constants that
+/// This helper class is used to match scalar and vector integer constants that
 /// satisfy a specified predicate.
+/// For vector constants, undefined elements are ignored.
 template <typename Predicate> struct cst_pred_ty : public Predicate {
   template <typename ITy> bool match(ITy *V) {
     if (const auto *CI = dyn_cast<ConstantInt>(V))
       return this->isValue(CI->getValue());
-    if (V->getType()->isVectorTy())
-      if (const auto *C = dyn_cast<Constant>(V))
+    if (V->getType()->isVectorTy()) {
+      if (const auto *C = dyn_cast<Constant>(V)) {
         if (const auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
           return this->isValue(CI->getValue());
+
+        // Non-splat vector constant: check each element for a match.
+        unsigned NumElts = V->getType()->getVectorNumElements();
+        assert(NumElts != 0 && "Constant vector with no elements?");
+        for (unsigned i = 0; i != NumElts; ++i) {
+          Constant *Elt = C->getAggregateElement(i);
+          if (!Elt)
+            return false;
+          if (isa<UndefValue>(Elt))
+            continue;
+          auto *CI = dyn_cast<ConstantInt>(Elt);
+          if (!CI || !this->isValue(CI->getValue()))
+            return false;
+        }
+        return true;
+      }
+    }
     return false;
   }
 };
 
-/// \brief This helper class is used to match scalar and vector constants that
+/// This helper class is used to match scalar and vector constants that
 /// satisfy a specified predicate, and bind them to an APInt.
 template <typename Predicate> struct api_pred_ty : public Predicate {
   const APInt *&Res;
@@ -322,20 +257,202 @@ template <typename Predicate> struct api_pred_ty : public Predicate {
   }
 };
 
-struct is_power2 {
-  bool isValue(const APInt &C) { return C.isPowerOf2(); }
+/// This helper class is used to match scalar and vector floating-point
+/// constants that satisfy a specified predicate.
+/// For vector constants, undefined elements are ignored.
+template <typename Predicate> struct cstfp_pred_ty : public Predicate {
+  template <typename ITy> bool match(ITy *V) {
+    if (const auto *CF = dyn_cast<ConstantFP>(V))
+      return this->isValue(CF->getValueAPF());
+    if (V->getType()->isVectorTy()) {
+      if (const auto *C = dyn_cast<Constant>(V)) {
+        if (const auto *CF = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
+          return this->isValue(CF->getValueAPF());
+
+        // Non-splat vector constant: check each element for a match.
+        unsigned NumElts = V->getType()->getVectorNumElements();
+        assert(NumElts != 0 && "Constant vector with no elements?");
+        for (unsigned i = 0; i != NumElts; ++i) {
+          Constant *Elt = C->getAggregateElement(i);
+          if (!Elt)
+            return false;
+          if (isa<UndefValue>(Elt))
+            continue;
+          auto *CF = dyn_cast<ConstantFP>(Elt);
+          if (!CF || !this->isValue(CF->getValueAPF()))
+            return false;
+        }
+        return true;
+      }
+    }
+    return false;
+  }
 };
 
-/// \brief Match an integer or vector power of 2.
-inline cst_pred_ty<is_power2> m_Power2() { return cst_pred_ty<is_power2>(); }
-inline api_pred_ty<is_power2> m_Power2(const APInt *&V) { return V; }
+///////////////////////////////////////////////////////////////////////////////
+//
+// Encapsulate constant value queries for use in templated predicate matchers.
+// This allows checking if constants match using compound predicates and works
+// with vector constants, possibly with relaxed constraints. For example, ignore
+// undef values.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+struct is_all_ones {
+  bool isValue(const APInt &C) { return C.isAllOnesValue(); }
+};
+/// Match an integer or vector with all bits set.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_all_ones> m_AllOnes() {
+  return cst_pred_ty<is_all_ones>();
+}
 
 struct is_maxsignedvalue {
   bool isValue(const APInt &C) { return C.isMaxSignedValue(); }
 };
+/// Match an integer or vector with values having all bits except for the high
+/// bit set (0x7f...).
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() {
+  return cst_pred_ty<is_maxsignedvalue>();
+}
+inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) {
+  return V;
+}
 
-inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() { return cst_pred_ty<is_maxsignedvalue>(); }
-inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) { return V; }
+struct is_negative {
+  bool isValue(const APInt &C) { return C.isNegative(); }
+};
+/// Match an integer or vector of negative values.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_negative> m_Negative() {
+  return cst_pred_ty<is_negative>();
+}
+inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
+  return V;
+}
+
+struct is_nonnegative {
+  bool isValue(const APInt &C) { return C.isNonNegative(); }
+};
+/// Match an integer or vector of nonnegative values.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_nonnegative> m_NonNegative() {
+  return cst_pred_ty<is_nonnegative>();
+}
+inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
+  return V;
+}
+
+struct is_one {
+  bool isValue(const APInt &C) { return C.isOneValue(); }
+};
+/// Match an integer 1 or a vector with all elements equal to 1.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_one> m_One() {
+  return cst_pred_ty<is_one>();
+}
+
+struct is_zero_int {
+  bool isValue(const APInt &C) { return C.isNullValue(); }
+};
+/// Match an integer 0 or a vector with all elements equal to 0.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_zero_int> m_ZeroInt() {
+  return cst_pred_ty<is_zero_int>();
+}
+
+struct is_zero {
+  template <typename ITy> bool match(ITy *V) {
+    auto *C = dyn_cast<Constant>(V);
+    return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C));
+  }
+};
+/// Match any null constant or a vector with all elements equal to 0.
+/// For vectors, this includes constants with undefined elements.
+inline is_zero m_Zero() {
+  return is_zero();
+}
+
+struct is_power2 {
+  bool isValue(const APInt &C) { return C.isPowerOf2(); }
+};
+/// Match an integer or vector power-of-2.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_power2> m_Power2() {
+  return cst_pred_ty<is_power2>();
+}
+inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
+  return V;
+}
+
+struct is_power2_or_zero {
+  bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
+};
+/// Match an integer or vector of 0 or power-of-2 values.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() {
+  return cst_pred_ty<is_power2_or_zero>();
+}
+inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) {
+  return V;
+}
+
+struct is_sign_mask {
+  bool isValue(const APInt &C) { return C.isSignMask(); }
+};
+/// Match an integer or vector with only the sign bit(s) set.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_sign_mask> m_SignMask() {
+  return cst_pred_ty<is_sign_mask>();
+}
+
+struct is_lowbit_mask {
+  bool isValue(const APInt &C) { return C.isMask(); }
+};
+/// Match an integer or vector with only the low bit(s) set.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
+  return cst_pred_ty<is_lowbit_mask>();
+}
+
+struct is_nan {
+  bool isValue(const APFloat &C) { return C.isNaN(); }
+};
+/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
+/// For vectors, this includes constants with undefined elements.
+inline cstfp_pred_ty<is_nan> m_NaN() {
+  return cstfp_pred_ty<is_nan>();
+}
+
+struct is_any_zero_fp {
+  bool isValue(const APFloat &C) { return C.isZero(); }
+};
+/// Match a floating-point negative zero or positive zero.
+/// For vectors, this includes constants with undefined elements.
+inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() {
+  return cstfp_pred_ty<is_any_zero_fp>();
+}
+
+struct is_pos_zero_fp {
+  bool isValue(const APFloat &C) { return C.isPosZero(); }
+};
+/// Match a floating-point positive zero.
+/// For vectors, this includes constants with undefined elements.
+inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() {
+  return cstfp_pred_ty<is_pos_zero_fp>();
+}
+
+struct is_neg_zero_fp {
+  bool isValue(const APFloat &C) { return C.isNegZero(); }
+};
+/// Match a floating-point negative zero.
+/// For vectors, this includes constants with undefined elements.
+inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() {
+  return cstfp_pred_ty<is_neg_zero_fp>();
+}
+
+///////////////////////////////////////////////////////////////////////////////
 
 template <typename Class> struct bind_ty {
   Class *&VR;
@@ -351,25 +468,25 @@ template <typename Class> struct bind_ty {
   }
 };
 
-/// \brief Match a value, capturing it if we match.
+/// Match a value, capturing it if we match.
 inline bind_ty<Value> m_Value(Value *&V) { return V; }
 inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
 
-/// \brief Match an instruction, capturing it if we match.
+/// Match an instruction, capturing it if we match.
 inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
-/// \brief Match a binary operator, capturing it if we match.
+/// Match a binary operator, capturing it if we match.
 inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
 
-/// \brief Match a ConstantInt, capturing the value if we match.
+/// Match a ConstantInt, capturing the value if we match.
 inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
 
-/// \brief Match a Constant, capturing the value if we match.
+/// Match a Constant, capturing the value if we match.
 inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
 
-/// \brief Match a ConstantFP, capturing the value if we match.
+/// Match a ConstantFP, capturing the value if we match.
 inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
 
-/// \brief Match a specified Value*.
+/// Match a specified Value*.
 struct specificval_ty {
   const Value *Val;
 
@@ -378,10 +495,26 @@ struct specificval_ty {
   template <typename ITy> bool match(ITy *V) { return V == Val; }
 };
 
-/// \brief Match if we have a specific specified value.
+/// Match if we have a specific specified value.
 inline specificval_ty m_Specific(const Value *V) { return V; }
 
-/// \brief Match a specified floating point value or vector of all elements of
+/// Stores a reference to the Value *, not the Value * itself,
+/// thus can be used in commutative matchers.
+template <typename Class> struct deferredval_ty {
+  Class *const &Val;
+
+  deferredval_ty(Class *const &V) : Val(V) {}
+
+  template <typename ITy> bool match(ITy *const V) { return V == Val; }
+};
+
+/// A commutative-friendly version of m_Specific().
+inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; }
+inline deferredval_ty<const Value> m_Deferred(const Value *const &V) {
+  return V;
+}
+
+/// Match a specified floating point value or vector of all elements of
 /// that value.
 struct specific_fpval {
   double Val;
@@ -399,11 +532,11 @@ struct specific_fpval {
   }
 };
 
-/// \brief Match a specific floating point value or vector with all elements
+/// Match a specific floating point value or vector with all elements
 /// equal to the value.
 inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); }
 
-/// \brief Match a float 1.0 or vector with all elements equal to 1.0.
+/// Match a float 1.0 or vector with all elements equal to 1.0.
 inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); }
 
 struct bind_const_intval_ty {
@@ -421,7 +554,7 @@ struct bind_const_intval_ty {
   }
 };
 
-/// \brief Match a specified integer value or vector of all elements of that
+/// Match a specified integer value or vector of all elements of that
 // value.
 struct specific_intval {
   uint64_t Val;
@@ -438,11 +571,11 @@ struct specific_intval {
   }
 };
 
-/// \brief Match a specific integer value or vector with all elements equal to
+/// Match a specific integer value or vector with all elements equal to
 /// the value.
 inline specific_intval m_SpecificInt(uint64_t V) { return specific_intval(V); }
 
-/// \brief Match a ConstantInt and bind to its value.  This does not match
+/// Match a ConstantInt and bind to its value.  This does not match
 /// ConstantInts wider than 64-bits.
 inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
 
@@ -454,13 +587,15 @@ struct AnyBinaryOp_match {
   LHS_t L;
   RHS_t R;
 
+  // The evaluation order is always stable, regardless of Commutability.
+  // The LHS is always matched first.
   AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
 
   template <typename OpTy> bool match(OpTy *V) {
     if (auto *I = dyn_cast<BinaryOperator>(V))
       return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
-             (Commutable && R.match(I->getOperand(0)) &&
-              L.match(I->getOperand(1)));
+             (Commutable && L.match(I->getOperand(1)) &&
+              R.match(I->getOperand(0)));
     return false;
   }
 };
@@ -480,20 +615,22 @@ struct BinaryOp_match {
   LHS_t L;
   RHS_t R;
 
+  // The evaluation order is always stable, regardless of Commutability.
+  // The LHS is always matched first.
   BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
 
   template <typename OpTy> bool match(OpTy *V) {
     if (V->getValueID() == Value::InstructionVal + Opcode) {
       auto *I = cast<BinaryOperator>(V);
       return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
-             (Commutable && R.match(I->getOperand(0)) &&
-              L.match(I->getOperand(1)));
+             (Commutable && L.match(I->getOperand(1)) &&
+              R.match(I->getOperand(0)));
     }
     if (auto *CE = dyn_cast<ConstantExpr>(V))
       return CE->getOpcode() == Opcode &&
              ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
-              (Commutable && R.match(CE->getOperand(0)) &&
-               L.match(CE->getOperand(1))));
+              (Commutable && L.match(CE->getOperand(1)) &&
+               R.match(CE->getOperand(0))));
     return false;
   }
 };
@@ -522,6 +659,13 @@ inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
   return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
 }
 
+/// Match 'fneg X' as 'fsub -0.0, X'.
+template <typename RHS>
+inline BinaryOp_match<cstfp_pred_ty<is_neg_zero_fp>, RHS, Instruction::FSub>
+m_FNeg(const RHS &X) {
+  return m_FSub(m_NegZeroFP(), X);
+}
+
 template <typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
                                                         const RHS &R) {
@@ -746,35 +890,35 @@ struct is_idiv_op {
   }
 };
 
-/// \brief Matches shift operations.
+/// Matches shift operations.
 template <typename LHS, typename RHS>
 inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
                                                       const RHS &R) {
   return BinOpPred_match<LHS, RHS, is_shift_op>(L, R);
 }
 
-/// \brief Matches logical shift operations.
+/// Matches logical shift operations.
 template <typename LHS, typename RHS>
 inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L,
                                                           const RHS &R) {
   return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R);
 }
 
-/// \brief Matches logical shift operations.
+/// Matches logical shift operations.
 template <typename LHS, typename RHS>
 inline BinOpPred_match<LHS, RHS, is_logical_shift_op>
 m_LogicalShift(const LHS &L, const RHS &R) {
   return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R);
 }
 
-/// \brief Matches bitwise logic operations.
+/// Matches bitwise logic operations.
 template <typename LHS, typename RHS>
 inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op>
 m_BitwiseLogic(const LHS &L, const RHS &R) {
   return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R);
 }
 
-/// \brief Matches integer division operations.
+/// Matches integer division operations.
 template <typename LHS, typename RHS>
 inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
                                                     const RHS &R) {
@@ -811,14 +955,16 @@ struct CmpClass_match {
   LHS_t L;
   RHS_t R;
 
+  // The evaluation order is always stable, regardless of Commutability.
+  // The LHS is always matched first.
   CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
       : Predicate(Pred), L(LHS), R(RHS) {}
 
   template <typename OpTy> bool match(OpTy *V) {
     if (auto *I = dyn_cast<Class>(V))
       if ((L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
-          (Commutable && R.match(I->getOperand(0)) &&
-           L.match(I->getOperand(1)))) {
+          (Commutable && L.match(I->getOperand(1)) &&
+           R.match(I->getOperand(0)))) {
         Predicate = I->getPredicate();
         return true;
       }
@@ -871,7 +1017,7 @@ inline SelectClass_match<Cond, LHS, RHS> m_Select(const Cond &C, const LHS &L,
   return SelectClass_match<Cond, LHS, RHS>(C, L, R);
 }
 
-/// \brief This matches a select of two constants, e.g.:
+/// This matches a select of two constants, e.g.:
 /// m_SelectCst<-1, 0>(m_Value(V))
 template <int64_t L, int64_t R, typename Cond>
 inline SelectClass_match<Cond, constantint_match<L>, constantint_match<R>>
@@ -880,6 +1026,84 @@ m_SelectCst(const Cond &C) {
 }
 
 //===----------------------------------------------------------------------===//
+// Matchers for InsertElementInst classes
+//
+
+template <typename Val_t, typename Elt_t, typename Idx_t>
+struct InsertElementClass_match {
+  Val_t V;
+  Elt_t E;
+  Idx_t I;
+
+  InsertElementClass_match(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
+      : V(Val), E(Elt), I(Idx) {}
+
+  template <typename OpTy> bool match(OpTy *VV) {
+    if (auto *II = dyn_cast<InsertElementInst>(VV))
+      return V.match(II->getOperand(0)) && E.match(II->getOperand(1)) &&
+             I.match(II->getOperand(2));
+    return false;
+  }
+};
+
+template <typename Val_t, typename Elt_t, typename Idx_t>
+inline InsertElementClass_match<Val_t, Elt_t, Idx_t>
+m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
+  return InsertElementClass_match<Val_t, Elt_t, Idx_t>(Val, Elt, Idx);
+}
+
+//===----------------------------------------------------------------------===//
+// Matchers for ExtractElementInst classes
+//
+
+template <typename Val_t, typename Idx_t> struct ExtractElementClass_match {
+  Val_t V;
+  Idx_t I;
+
+  ExtractElementClass_match(const Val_t &Val, const Idx_t &Idx)
+      : V(Val), I(Idx) {}
+
+  template <typename OpTy> bool match(OpTy *VV) {
+    if (auto *II = dyn_cast<ExtractElementInst>(VV))
+      return V.match(II->getOperand(0)) && I.match(II->getOperand(1));
+    return false;
+  }
+};
+
+template <typename Val_t, typename Idx_t>
+inline ExtractElementClass_match<Val_t, Idx_t>
+m_ExtractElement(const Val_t &Val, const Idx_t &Idx) {
+  return ExtractElementClass_match<Val_t, Idx_t>(Val, Idx);
+}
+
+//===----------------------------------------------------------------------===//
+// Matchers for ShuffleVectorInst classes
+//
+
+template <typename V1_t, typename V2_t, typename Mask_t>
+struct ShuffleVectorClass_match {
+  V1_t V1;
+  V2_t V2;
+  Mask_t M;
+
+  ShuffleVectorClass_match(const V1_t &v1, const V2_t &v2, const Mask_t &m)
+      : V1(v1), V2(v2), M(m) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    if (auto *SI = dyn_cast<ShuffleVectorInst>(V))
+      return V1.match(SI->getOperand(0)) && V2.match(SI->getOperand(1)) &&
+             M.match(SI->getOperand(2));
+    return false;
+  }
+};
+
+template <typename V1_t, typename V2_t, typename Mask_t>
+inline ShuffleVectorClass_match<V1_t, V2_t, Mask_t>
+m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m) {
+  return ShuffleVectorClass_match<V1_t, V2_t, Mask_t>(v1, v2, m);
+}
+
+//===----------------------------------------------------------------------===//
 // Matchers for CastInst classes
 //
 
@@ -895,31 +1119,31 @@ template <typename Op_t, unsigned Opcode> struct CastClass_match {
   }
 };
 
-/// \brief Matches BitCast.
+/// Matches BitCast.
 template <typename OpTy>
 inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::BitCast>(Op);
 }
 
-/// \brief Matches PtrToInt.
+/// Matches PtrToInt.
 template <typename OpTy>
 inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
 }
 
-/// \brief Matches Trunc.
+/// Matches Trunc.
 template <typename OpTy>
 inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::Trunc>(Op);
 }
 
-/// \brief Matches SExt.
+/// Matches SExt.
 template <typename OpTy>
 inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::SExt>(Op);
 }
 
-/// \brief Matches ZExt.
+/// Matches ZExt.
 template <typename OpTy>
 inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::ZExt>(Op);
@@ -932,25 +1156,25 @@ m_ZExtOrSExt(const OpTy &Op) {
   return m_CombineOr(m_ZExt(Op), m_SExt(Op));
 }
 
-/// \brief Matches UIToFP.
+/// Matches UIToFP.
 template <typename OpTy>
 inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::UIToFP>(Op);
 }
 
-/// \brief Matches SIToFP.
+/// Matches SIToFP.
 template <typename OpTy>
 inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::SIToFP>(Op);
 }
 
-/// \brief Matches FPTrunc
+/// Matches FPTrunc
 template <typename OpTy>
 inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
 }
 
-/// \brief Matches FPExt
+/// Matches FPExt
 template <typename OpTy>
 inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::FPExt>(Op);
@@ -976,80 +1200,32 @@ template <typename Op_t> struct LoadClass_match {
 template <typename OpTy> inline LoadClass_match<OpTy> m_Load(const OpTy &Op) {
   return LoadClass_match<OpTy>(Op);
 }
+
 //===----------------------------------------------------------------------===//
-// Matchers for unary operators
+// Matcher for StoreInst classes
 //
 
-template <typename LHS_t> struct not_match {
-  LHS_t L;
-
-  not_match(const LHS_t &LHS) : L(LHS) {}
-
-  template <typename OpTy> bool match(OpTy *V) {
-    if (auto *O = dyn_cast<Operator>(V))
-      if (O->getOpcode() == Instruction::Xor) {
-        if (isAllOnes(O->getOperand(1)))
-          return L.match(O->getOperand(0));
-        if (isAllOnes(O->getOperand(0)))
-          return L.match(O->getOperand(1));
-      }
-    return false;
-  }
-
-private:
-  bool isAllOnes(Value *V) {
-    return isa<Constant>(V) && cast<Constant>(V)->isAllOnesValue();
-  }
-};
-
-template <typename LHS> inline not_match<LHS> m_Not(const LHS &L) { return L; }
-
-template <typename LHS_t> struct neg_match {
-  LHS_t L;
-
-  neg_match(const LHS_t &LHS) : L(LHS) {}
-
-  template <typename OpTy> bool match(OpTy *V) {
-    if (auto *O = dyn_cast<Operator>(V))
-      if (O->getOpcode() == Instruction::Sub)
-        return matchIfNeg(O->getOperand(0), O->getOperand(1));
-    return false;
-  }
-
-private:
-  bool matchIfNeg(Value *LHS, Value *RHS) {
-    return ((isa<ConstantInt>(LHS) && cast<ConstantInt>(LHS)->isZero()) ||
-            isa<ConstantAggregateZero>(LHS)) &&
-           L.match(RHS);
-  }
-};
+template <typename ValueOp_t, typename PointerOp_t> struct StoreClass_match {
+  ValueOp_t ValueOp;
+  PointerOp_t PointerOp;
 
-/// \brief Match an integer negate.
-template <typename LHS> inline neg_match<LHS> m_Neg(const LHS &L) { return L; }
-
-template <typename LHS_t> struct fneg_match {
-  LHS_t L;
-
-  fneg_match(const LHS_t &LHS) : L(LHS) {}
+  StoreClass_match(const ValueOp_t &ValueOpMatch,
+                   const PointerOp_t &PointerOpMatch) :
+    ValueOp(ValueOpMatch), PointerOp(PointerOpMatch)  {}
 
   template <typename OpTy> bool match(OpTy *V) {
-    if (auto *O = dyn_cast<Operator>(V))
-      if (O->getOpcode() == Instruction::FSub)
-        return matchIfFNeg(O->getOperand(0), O->getOperand(1));
-    return false;
-  }
-
-private:
-  bool matchIfFNeg(Value *LHS, Value *RHS) {
-    if (const auto *C = dyn_cast<ConstantFP>(LHS))
-      return C->isNegativeZeroValue() && L.match(RHS);
+    if (auto *LI = dyn_cast<StoreInst>(V))
+      return ValueOp.match(LI->getValueOperand()) &&
+             PointerOp.match(LI->getPointerOperand());
     return false;
   }
 };
 
-/// \brief Match a floating point negate.
-template <typename LHS> inline fneg_match<LHS> m_FNeg(const LHS &L) {
-  return L;
+/// Matches StoreInst.
+template <typename ValueOpTy, typename PointerOpTy>
+inline StoreClass_match<ValueOpTy, PointerOpTy>
+m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
+  return StoreClass_match<ValueOpTy, PointerOpTy>(ValueOp, PointerOp);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1106,6 +1282,8 @@ struct MaxMin_match {
   LHS_t L;
   RHS_t R;
 
+  // The evaluation order is always stable, regardless of Commutability.
+  // The LHS is always matched first.
   MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
 
   template <typename OpTy> bool match(OpTy *V) {
@@ -1132,60 +1310,60 @@ struct MaxMin_match {
       return false;
     // It does!  Bind the operands.
     return (L.match(LHS) && R.match(RHS)) ||
-           (Commutable && R.match(LHS) && L.match(RHS));
+           (Commutable && L.match(RHS) && R.match(LHS));
   }
 };
 
-/// \brief Helper class for identifying signed max predicates.
+/// Helper class for identifying signed max predicates.
 struct smax_pred_ty {
   static bool match(ICmpInst::Predicate Pred) {
     return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
   }
 };
 
-/// \brief Helper class for identifying signed min predicates.
+/// Helper class for identifying signed min predicates.
 struct smin_pred_ty {
   static bool match(ICmpInst::Predicate Pred) {
     return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE;
   }
 };
 
-/// \brief Helper class for identifying unsigned max predicates.
+/// Helper class for identifying unsigned max predicates.
 struct umax_pred_ty {
   static bool match(ICmpInst::Predicate Pred) {
     return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE;
   }
 };
 
-/// \brief Helper class for identifying unsigned min predicates.
+/// Helper class for identifying unsigned min predicates.
 struct umin_pred_ty {
   static bool match(ICmpInst::Predicate Pred) {
     return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE;
   }
 };
 
-/// \brief Helper class for identifying ordered max predicates.
+/// Helper class for identifying ordered max predicates.
 struct ofmax_pred_ty {
   static bool match(FCmpInst::Predicate Pred) {
     return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE;
   }
 };
 
-/// \brief Helper class for identifying ordered min predicates.
+/// Helper class for identifying ordered min predicates.
 struct ofmin_pred_ty {
   static bool match(FCmpInst::Predicate Pred) {
     return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE;
   }
 };
 
-/// \brief Helper class for identifying unordered max predicates.
+/// Helper class for identifying unordered max predicates.
 struct ufmax_pred_ty {
   static bool match(FCmpInst::Predicate Pred) {
     return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE;
   }
 };
 
-/// \brief Helper class for identifying unordered min predicates.
+/// Helper class for identifying unordered min predicates.
 struct ufmin_pred_ty {
   static bool match(FCmpInst::Predicate Pred) {
     return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE;
@@ -1216,7 +1394,7 @@ inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L,
   return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R);
 }
 
-/// \brief Match an 'ordered' floating point maximum function.
+/// Match an 'ordered' floating point maximum function.
 /// Floating point has one special value 'NaN'. Therefore, there is no total
 /// order. However, if we can ignore the 'NaN' value (for example, because of a
 /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
@@ -1231,7 +1409,7 @@ inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L,
   return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R);
 }
 
-/// \brief Match an 'ordered' floating point minimum function.
+/// Match an 'ordered' floating point minimum function.
 /// Floating point has one special value 'NaN'. Therefore, there is no total
 /// order. However, if we can ignore the 'NaN' value (for example, because of a
 /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
@@ -1246,7 +1424,7 @@ inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L,
   return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R);
 }
 
-/// \brief Match an 'unordered' floating point maximum function.
+/// Match an 'unordered' floating point maximum function.
 /// Floating point has one special value 'NaN'. Therefore, there is no total
 /// order. However, if we can ignore the 'NaN' value (for example, because of a
 /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
@@ -1261,7 +1439,7 @@ m_UnordFMax(const LHS &L, const RHS &R) {
   return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
 }
 
-/// \brief Match an 'unordered' floating point minimum function.
+/// Match an 'unordered' floating point minimum function.
 /// Floating point has one special value 'NaN'. Therefore, there is no total
 /// order. However, if we can ignore the 'NaN' value (for example, because of a
 /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
@@ -1312,7 +1490,7 @@ struct UAddWithOverflow_match {
   }
 };
 
-/// \brief Match an icmp instruction checking for unsigned overflow on addition.
+/// Match an icmp instruction checking for unsigned overflow on addition.
 ///
 /// S is matched to the addition whose result is being checked for overflow, and
 /// L and R are matched to the LHS and RHS of S.
@@ -1334,13 +1512,13 @@ template <typename Opnd_t> struct Argument_match {
   }
 };
 
-/// \brief Match an argument.
+/// Match an argument.
 template <unsigned OpI, typename Opnd_t>
 inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
   return Argument_match<Opnd_t>(OpI, Op);
 }
 
-/// \brief Intrinsic matchers.
+/// Intrinsic matchers.
 struct IntrinsicID_match {
   unsigned ID;
 
@@ -1383,7 +1561,7 @@ struct m_Intrinsic_Ty<T0, T1, T2, T3> {
                         Argument_match<T3>>;
 };
 
-/// \brief Match intrinsic calls like this:
+/// Match intrinsic calls like this:
 /// m_Intrinsic<Intrinsic::fabs>(m_Value(X))
 template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
   return IntrinsicID_match(IntrID);
@@ -1424,6 +1602,16 @@ inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
   return m_Intrinsic<Intrinsic::bswap>(Op0);
 }
 
+template <typename Opnd0>
+inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
+  return m_Intrinsic<Intrinsic::fabs>(Op0);
+}
+
+template <typename Opnd0>
+inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) {
+  return m_Intrinsic<Intrinsic::canonicalize>(Op0);
+}
+
 template <typename Opnd0, typename Opnd1>
 inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0,
                                                         const Opnd1 &Op1) {
@@ -1436,57 +1624,17 @@ inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
   return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
 }
 
-template <typename Opnd_t> struct Signum_match {
-  Opnd_t Val;
-  Signum_match(const Opnd_t &V) : Val(V) {}
-
-  template <typename OpTy> bool match(OpTy *V) {
-    unsigned TypeSize = V->getType()->getScalarSizeInBits();
-    if (TypeSize == 0)
-      return false;
-
-    unsigned ShiftWidth = TypeSize - 1;
-    Value *OpL = nullptr, *OpR = nullptr;
-
-    // This is the representation of signum we match:
-    //
-    //  signum(x) == (x >> 63) | (-x >>u 63)
-    //
-    // An i1 value is its own signum, so it's correct to match
-    //
-    //  signum(x) == (x >> 0)  | (-x >>u 0)
-    //
-    // for i1 values.
-
-    auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
-    auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
-    auto Signum = m_Or(LHS, RHS);
-
-    return Signum.match(V) && OpL == OpR && Val.match(OpL);
-  }
-};
-
-/// \brief Matches a signum pattern.
-///
-/// signum(x) =
-///      x >  0  ->  1
-///      x == 0  ->  0
-///      x <  0  -> -1
-template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
-  return Signum_match<Val_t>(V);
-}
-
 //===----------------------------------------------------------------------===//
 // Matchers for two-operands operators with the operators in either order
 //
 
-/// \brief Matches a BinaryOperator with LHS and RHS in either order.
+/// Matches a BinaryOperator with LHS and RHS in either order.
 template <typename LHS, typename RHS>
 inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) {
   return AnyBinaryOp_match<LHS, RHS, true>(L, R);
 }
 
-/// \brief Matches an ICmp with a predicate over LHS and RHS in either order.
+/// Matches an ICmp with a predicate over LHS and RHS in either order.
 /// Does not swap the predicate.
 template <typename LHS, typename RHS>
 inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>
@@ -1495,41 +1643,55 @@ m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
                                                                        R);
 }
 
-/// \brief Matches a Add with LHS and RHS in either order.
+/// Matches a Add with LHS and RHS in either order.
 template <typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
                                                                 const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R);
 }
 
-/// \brief Matches a Mul with LHS and RHS in either order.
+/// Matches a Mul with LHS and RHS in either order.
 template <typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L,
                                                                 const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R);
 }
 
-/// \brief Matches an And with LHS and RHS in either order.
+/// Matches an And with LHS and RHS in either order.
 template <typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L,
                                                                 const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R);
 }
 
-/// \brief Matches an Or with LHS and RHS in either order.
+/// Matches an Or with LHS and RHS in either order.
 template <typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L,
                                                               const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R);
 }
 
-/// \brief Matches an Xor with LHS and RHS in either order.
+/// Matches an Xor with LHS and RHS in either order.
 template <typename LHS, typename RHS>
 inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L,
                                                                 const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R);
 }
 
+/// Matches a 'Neg' as 'sub 0, V'.
+template <typename ValTy>
+inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub>
+m_Neg(const ValTy &V) {
+  return m_Sub(m_ZeroInt(), V);
+}
+
+/// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
+template <typename ValTy>
+inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true>
+m_Not(const ValTy &V) {
+  return m_c_Xor(V, m_AllOnes());
+}
+
 /// Matches an SMin with LHS and RHS in either order.
 template <typename LHS, typename RHS>
 inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>
@@ -1555,6 +1717,60 @@ m_c_UMax(const LHS &L, const RHS &R) {
   return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R);
 }
 
+/// Matches FAdd with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true>
+m_c_FAdd(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R);
+}
+
+/// Matches FMul with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FMul, true>
+m_c_FMul(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R);
+}
+
+template <typename Opnd_t> struct Signum_match {
+  Opnd_t Val;
+  Signum_match(const Opnd_t &V) : Val(V) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    unsigned TypeSize = V->getType()->getScalarSizeInBits();
+    if (TypeSize == 0)
+      return false;
+
+    unsigned ShiftWidth = TypeSize - 1;
+    Value *OpL = nullptr, *OpR = nullptr;
+
+    // This is the representation of signum we match:
+    //
+    //  signum(x) == (x >> 63) | (-x >>u 63)
+    //
+    // An i1 value is its own signum, so it's correct to match
+    //
+    //  signum(x) == (x >> 0)  | (-x >>u 0)
+    //
+    // for i1 values.
+
+    auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
+    auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
+    auto Signum = m_Or(LHS, RHS);
+
+    return Signum.match(V) && OpL == OpR && Val.match(OpL);
+  }
+};
+
+/// Matches a signum pattern.
+///
+/// signum(x) =
+///      x >  0  ->  1
+///      x == 0  ->  0
+///      x <  0  -> -1
+template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
+  return Signum_match<Val_t>(V);
+}
+
 } // end namespace PatternMatch
 } // end namespace llvm
 
diff --git a/include/llvm/IR/ProfileSummary.h b/include/llvm/IR/ProfileSummary.h
index d85ce8c443ec..e38663770a13 100644
--- a/include/llvm/IR/ProfileSummary.h
+++ b/include/llvm/IR/ProfileSummary.h
@@ -51,7 +51,7 @@ private:
   SummaryEntryVector DetailedSummary;
   uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount;
   uint32_t NumCounts, NumFunctions;
-  /// \brief Return detailed summary as metadata.
+  /// Return detailed summary as metadata.
   Metadata *getDetailedSummaryMD(LLVMContext &Context);
 
 public:
@@ -67,9 +67,9 @@ public:
         NumCounts(NumCounts), NumFunctions(NumFunctions) {}
 
   Kind getKind() const { return PSK; }
-  /// \brief Return summary information as metadata.
+  /// Return summary information as metadata.
   Metadata *getMD(LLVMContext &Context);
-  /// \brief Construct profile summary from metdata.
+  /// Construct profile summary from metdata.
   static ProfileSummary *getFromMD(Metadata *MD);
   SummaryEntryVector &getDetailedSummary() { return DetailedSummary; }
   uint32_t getNumFunctions() { return NumFunctions; }
diff --git a/include/llvm/IR/RuntimeLibcalls.def b/include/llvm/IR/RuntimeLibcalls.def
new file mode 100644
index 000000000000..7ed90d959f01
--- /dev/null
+++ b/include/llvm/IR/RuntimeLibcalls.def
@@ -0,0 +1,527 @@
+//===-- llvm/RuntimeLibcalls.def - File that describes libcalls -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the runtime library calls the backend can emit.
+// The various long double types cannot be merged, because 80-bit library
+// functions use "xf" and 128-bit use "tf".
+//
+// When adding PPCF128 functions here, note that their names generally need
+// to be overridden for Darwin with the xxx$LDBL128 form.  See
+// PPCISelLowering.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+// Provide definitions of macros so that users of this file do not have to
+// define everything to use it...
+
+// Declare the enumerator for each libcall, along with its default name. Some
+// libcalls have different names on particular OSes or architectures. These
+// are set in InitLibcallNames() in TargetLoweringBase.cpp and/or by targets
+// using TargetLoweringBase::setLibcallName()
+#ifndef HANDLE_LIBCALL
+#error "HANDLE_LIBCALL must be defined"
+#endif
+
+// Integer
+HANDLE_LIBCALL(SHL_I16, "__ashlhi3")
+HANDLE_LIBCALL(SHL_I32, "__ashlsi3")
+HANDLE_LIBCALL(SHL_I64, "__ashldi3")
+HANDLE_LIBCALL(SHL_I128, "__ashlti3")
+HANDLE_LIBCALL(SRL_I16, "__lshrhi3")
+HANDLE_LIBCALL(SRL_I32, "__lshrsi3")
+HANDLE_LIBCALL(SRL_I64, "__lshrdi3")
+HANDLE_LIBCALL(SRL_I128, "__lshrti3")
+HANDLE_LIBCALL(SRA_I16, "__ashrhi3")
+HANDLE_LIBCALL(SRA_I32, "__ashrsi3")
+HANDLE_LIBCALL(SRA_I64, "__ashrdi3")
+HANDLE_LIBCALL(SRA_I128, "__ashrti3")
+HANDLE_LIBCALL(MUL_I8, "__mulqi3")
+HANDLE_LIBCALL(MUL_I16, "__mulhi3")
+HANDLE_LIBCALL(MUL_I32, "__mulsi3")
+HANDLE_LIBCALL(MUL_I64, "__muldi3")
+HANDLE_LIBCALL(MUL_I128, "__multi3")
+HANDLE_LIBCALL(MULO_I32, "__mulosi4")
+HANDLE_LIBCALL(MULO_I64, "__mulodi4")
+HANDLE_LIBCALL(MULO_I128, "__muloti4")
+HANDLE_LIBCALL(SDIV_I8, "__divqi3")
+HANDLE_LIBCALL(SDIV_I16, "__divhi3")
+HANDLE_LIBCALL(SDIV_I32, "__divsi3")
+HANDLE_LIBCALL(SDIV_I64, "__divdi3")
+HANDLE_LIBCALL(SDIV_I128, "__divti3")
+HANDLE_LIBCALL(UDIV_I8, "__udivqi3")
+HANDLE_LIBCALL(UDIV_I16, "__udivhi3")
+HANDLE_LIBCALL(UDIV_I32, "__udivsi3")
+HANDLE_LIBCALL(UDIV_I64, "__udivdi3")
+HANDLE_LIBCALL(UDIV_I128, "__udivti3")
+HANDLE_LIBCALL(SREM_I8, "__modqi3")
+HANDLE_LIBCALL(SREM_I16, "__modhi3")
+HANDLE_LIBCALL(SREM_I32, "__modsi3")
+HANDLE_LIBCALL(SREM_I64, "__moddi3")
+HANDLE_LIBCALL(SREM_I128, "__modti3")
+HANDLE_LIBCALL(UREM_I8, "__umodqi3")
+HANDLE_LIBCALL(UREM_I16, "__umodhi3")
+HANDLE_LIBCALL(UREM_I32, "__umodsi3")
+HANDLE_LIBCALL(UREM_I64, "__umoddi3")
+HANDLE_LIBCALL(UREM_I128, "__umodti3")
+HANDLE_LIBCALL(SDIVREM_I8, nullptr)
+HANDLE_LIBCALL(SDIVREM_I16, nullptr)
+HANDLE_LIBCALL(SDIVREM_I32, nullptr)
+HANDLE_LIBCALL(SDIVREM_I64, nullptr)
+HANDLE_LIBCALL(SDIVREM_I128, nullptr)
+HANDLE_LIBCALL(UDIVREM_I8, nullptr)
+HANDLE_LIBCALL(UDIVREM_I16, nullptr)
+HANDLE_LIBCALL(UDIVREM_I32, nullptr)
+HANDLE_LIBCALL(UDIVREM_I64, nullptr)
+HANDLE_LIBCALL(UDIVREM_I128, nullptr)
+HANDLE_LIBCALL(NEG_I32, "__negsi2")
+HANDLE_LIBCALL(NEG_I64, "__negdi2")
+
+// Floating-point
+HANDLE_LIBCALL(ADD_F32, "__addsf3")
+HANDLE_LIBCALL(ADD_F64, "__adddf3")
+HANDLE_LIBCALL(ADD_F80, "__addxf3")
+HANDLE_LIBCALL(ADD_F128, "__addtf3")
+HANDLE_LIBCALL(ADD_PPCF128, "__gcc_qadd")
+HANDLE_LIBCALL(SUB_F32, "__subsf3")
+HANDLE_LIBCALL(SUB_F64, "__subdf3")
+HANDLE_LIBCALL(SUB_F80, "__subxf3")
+HANDLE_LIBCALL(SUB_F128, "__subtf3")
+HANDLE_LIBCALL(SUB_PPCF128, "__gcc_qsub")
+HANDLE_LIBCALL(MUL_F32, "__mulsf3")
+HANDLE_LIBCALL(MUL_F64, "__muldf3")
+HANDLE_LIBCALL(MUL_F80, "__mulxf3")
+HANDLE_LIBCALL(MUL_F128, "__multf3")
+HANDLE_LIBCALL(MUL_PPCF128, "__gcc_qmul")
+HANDLE_LIBCALL(DIV_F32, "__divsf3")
+HANDLE_LIBCALL(DIV_F64, "__divdf3")
+HANDLE_LIBCALL(DIV_F80, "__divxf3")
+HANDLE_LIBCALL(DIV_F128, "__divtf3")
+HANDLE_LIBCALL(DIV_PPCF128, "__gcc_qdiv")
+HANDLE_LIBCALL(REM_F32, "fmodf")
+HANDLE_LIBCALL(REM_F64, "fmod")
+HANDLE_LIBCALL(REM_F80, "fmodl")
+HANDLE_LIBCALL(REM_F128, "fmodl")
+HANDLE_LIBCALL(REM_PPCF128, "fmodl")
+HANDLE_LIBCALL(FMA_F32, "fmaf")
+HANDLE_LIBCALL(FMA_F64, "fma")
+HANDLE_LIBCALL(FMA_F80, "fmal")
+HANDLE_LIBCALL(FMA_F128, "fmal")
+HANDLE_LIBCALL(FMA_PPCF128, "fmal")
+HANDLE_LIBCALL(POWI_F32, "__powisf2")
+HANDLE_LIBCALL(POWI_F64, "__powidf2")
+HANDLE_LIBCALL(POWI_F80, "__powixf2")
+HANDLE_LIBCALL(POWI_F128, "__powitf2")
+HANDLE_LIBCALL(POWI_PPCF128, "__powitf2")
+HANDLE_LIBCALL(SQRT_F32, "sqrtf")
+HANDLE_LIBCALL(SQRT_F64, "sqrt")
+HANDLE_LIBCALL(SQRT_F80, "sqrtl")
+HANDLE_LIBCALL(SQRT_F128, "sqrtl")
+HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl")
+HANDLE_LIBCALL(LOG_F32, "logf")
+HANDLE_LIBCALL(LOG_F64, "log")
+HANDLE_LIBCALL(LOG_F80, "logl")
+HANDLE_LIBCALL(LOG_F128, "logl")
+HANDLE_LIBCALL(LOG_PPCF128, "logl")
+HANDLE_LIBCALL(LOG_FINITE_F32, "__logf_finite")
+HANDLE_LIBCALL(LOG_FINITE_F64, "__log_finite")
+HANDLE_LIBCALL(LOG_FINITE_F80, "__logl_finite")
+HANDLE_LIBCALL(LOG_FINITE_F128, "__logl_finite")
+HANDLE_LIBCALL(LOG_FINITE_PPCF128, "__logl_finite")
+HANDLE_LIBCALL(LOG2_F32, "log2f")
+HANDLE_LIBCALL(LOG2_F64, "log2")
+HANDLE_LIBCALL(LOG2_F80, "log2l")
+HANDLE_LIBCALL(LOG2_F128, "log2l")
+HANDLE_LIBCALL(LOG2_PPCF128, "log2l")
+HANDLE_LIBCALL(LOG2_FINITE_F32, "__log2f_finite")
+HANDLE_LIBCALL(LOG2_FINITE_F64, "__log2_finite")
+HANDLE_LIBCALL(LOG2_FINITE_F80, "__log2l_finite")
+HANDLE_LIBCALL(LOG2_FINITE_F128, "__log2l_finite")
+HANDLE_LIBCALL(LOG2_FINITE_PPCF128, "__log2l_finite")
+HANDLE_LIBCALL(LOG10_F32, "log10f")
+HANDLE_LIBCALL(LOG10_F64, "log10")
+HANDLE_LIBCALL(LOG10_F80, "log10l")
+HANDLE_LIBCALL(LOG10_F128, "log10l")
+HANDLE_LIBCALL(LOG10_PPCF128, "log10l")
+HANDLE_LIBCALL(LOG10_FINITE_F32, "__log10f_finite")
+HANDLE_LIBCALL(LOG10_FINITE_F64, "__log10_finite")
+HANDLE_LIBCALL(LOG10_FINITE_F80, "__log10l_finite")
+HANDLE_LIBCALL(LOG10_FINITE_F128, "__log10l_finite")
+HANDLE_LIBCALL(LOG10_FINITE_PPCF128, "__log10l_finite")
+HANDLE_LIBCALL(EXP_F32, "expf")
+HANDLE_LIBCALL(EXP_F64, "exp")
+HANDLE_LIBCALL(EXP_F80, "expl")
+HANDLE_LIBCALL(EXP_F128, "expl")
+HANDLE_LIBCALL(EXP_PPCF128, "expl")
+HANDLE_LIBCALL(EXP_FINITE_F32, "__expf_finite")
+HANDLE_LIBCALL(EXP_FINITE_F64, "__exp_finite")
+HANDLE_LIBCALL(EXP_FINITE_F80, "__expl_finite")
+HANDLE_LIBCALL(EXP_FINITE_F128, "__expl_finite")
+HANDLE_LIBCALL(EXP_FINITE_PPCF128, "__expl_finite")
+HANDLE_LIBCALL(EXP2_F32, "exp2f")
+HANDLE_LIBCALL(EXP2_F64, "exp2")
+HANDLE_LIBCALL(EXP2_F80, "exp2l")
+HANDLE_LIBCALL(EXP2_F128, "exp2l")
+HANDLE_LIBCALL(EXP2_PPCF128, "exp2l")
+HANDLE_LIBCALL(EXP2_FINITE_F32, "__exp2f_finite")
+HANDLE_LIBCALL(EXP2_FINITE_F64, "__exp2_finite")
+HANDLE_LIBCALL(EXP2_FINITE_F80, "__exp2l_finite")
+HANDLE_LIBCALL(EXP2_FINITE_F128, "__exp2l_finite")
+HANDLE_LIBCALL(EXP2_FINITE_PPCF128, "__exp2l_finite")
+HANDLE_LIBCALL(SIN_F32, "sinf")
+HANDLE_LIBCALL(SIN_F64, "sin")
+HANDLE_LIBCALL(SIN_F80, "sinl")
+HANDLE_LIBCALL(SIN_F128, "sinl")
+HANDLE_LIBCALL(SIN_PPCF128, "sinl")
+HANDLE_LIBCALL(COS_F32, "cosf")
+HANDLE_LIBCALL(COS_F64, "cos")
+HANDLE_LIBCALL(COS_F80, "cosl")
+HANDLE_LIBCALL(COS_F128, "cosl")
+HANDLE_LIBCALL(COS_PPCF128, "cosl")
+HANDLE_LIBCALL(SINCOS_F32, nullptr)
+HANDLE_LIBCALL(SINCOS_F64, nullptr)
+HANDLE_LIBCALL(SINCOS_F80, nullptr)
+HANDLE_LIBCALL(SINCOS_F128, nullptr)
+HANDLE_LIBCALL(SINCOS_PPCF128, nullptr)
+HANDLE_LIBCALL(SINCOS_STRET_F32, nullptr)
+HANDLE_LIBCALL(SINCOS_STRET_F64, nullptr)
+HANDLE_LIBCALL(POW_F32, "powf")
+HANDLE_LIBCALL(POW_F64, "pow")
+HANDLE_LIBCALL(POW_F80, "powl")
+HANDLE_LIBCALL(POW_F128, "powl")
+HANDLE_LIBCALL(POW_PPCF128, "powl")
+HANDLE_LIBCALL(POW_FINITE_F32, "__powf_finite")
+HANDLE_LIBCALL(POW_FINITE_F64, "__pow_finite")
+HANDLE_LIBCALL(POW_FINITE_F80, "__powl_finite")
+HANDLE_LIBCALL(POW_FINITE_F128, "__powl_finite")
+HANDLE_LIBCALL(POW_FINITE_PPCF128, "__powl_finite")
+HANDLE_LIBCALL(CEIL_F32, "ceilf")
+HANDLE_LIBCALL(CEIL_F64, "ceil")
+HANDLE_LIBCALL(CEIL_F80, "ceill")
+HANDLE_LIBCALL(CEIL_F128, "ceill")
+HANDLE_LIBCALL(CEIL_PPCF128, "ceill")
+HANDLE_LIBCALL(TRUNC_F32, "truncf")
+HANDLE_LIBCALL(TRUNC_F64, "trunc")
+HANDLE_LIBCALL(TRUNC_F80, "truncl")
+HANDLE_LIBCALL(TRUNC_F128, "truncl")
+HANDLE_LIBCALL(TRUNC_PPCF128, "truncl")
+HANDLE_LIBCALL(RINT_F32, "rintf")
+HANDLE_LIBCALL(RINT_F64, "rint")
+HANDLE_LIBCALL(RINT_F80, "rintl")
+HANDLE_LIBCALL(RINT_F128, "rintl")
+HANDLE_LIBCALL(RINT_PPCF128, "rintl")
+HANDLE_LIBCALL(NEARBYINT_F32, "nearbyintf")
+HANDLE_LIBCALL(NEARBYINT_F64, "nearbyint")
+HANDLE_LIBCALL(NEARBYINT_F80, "nearbyintl")
+HANDLE_LIBCALL(NEARBYINT_F128, "nearbyintl")
+HANDLE_LIBCALL(NEARBYINT_PPCF128, "nearbyintl")
+HANDLE_LIBCALL(ROUND_F32, "roundf")
+HANDLE_LIBCALL(ROUND_F64, "round")
+HANDLE_LIBCALL(ROUND_F80, "roundl")
+HANDLE_LIBCALL(ROUND_F128, "roundl")
+HANDLE_LIBCALL(ROUND_PPCF128, "roundl")
+HANDLE_LIBCALL(FLOOR_F32, "floorf")
+HANDLE_LIBCALL(FLOOR_F64, "floor")
+HANDLE_LIBCALL(FLOOR_F80, "floorl")
+HANDLE_LIBCALL(FLOOR_F128, "floorl")
+HANDLE_LIBCALL(FLOOR_PPCF128, "floorl")
+HANDLE_LIBCALL(COPYSIGN_F32, "copysignf")
+HANDLE_LIBCALL(COPYSIGN_F64, "copysign")
+HANDLE_LIBCALL(COPYSIGN_F80, "copysignl")
+HANDLE_LIBCALL(COPYSIGN_F128, "copysignl")
+HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl")
+HANDLE_LIBCALL(FMIN_F32, "fminf")
+HANDLE_LIBCALL(FMIN_F64, "fmin")
+HANDLE_LIBCALL(FMIN_F80, "fminl")
+HANDLE_LIBCALL(FMIN_F128, "fminl")
+HANDLE_LIBCALL(FMIN_PPCF128, "fminl")
+HANDLE_LIBCALL(FMAX_F32, "fmaxf")
+HANDLE_LIBCALL(FMAX_F64, "fmax")
+HANDLE_LIBCALL(FMAX_F80, "fmaxl")
+HANDLE_LIBCALL(FMAX_F128, "fmaxl")
+HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl")
+
+// Conversion
+HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
+HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq")
+HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")
+HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2")
+HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2")
+HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2")
+HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")
+HANDLE_LIBCALL(FPROUND_F32_F16, "__gnu_f2h_ieee")
+HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2")
+HANDLE_LIBCALL(FPROUND_F80_F16, "__truncxfhf2")
+HANDLE_LIBCALL(FPROUND_F128_F16, "__trunctfhf2")
+HANDLE_LIBCALL(FPROUND_PPCF128_F16, "__trunctfhf2")
+HANDLE_LIBCALL(FPROUND_F64_F32, "__truncdfsf2")
+HANDLE_LIBCALL(FPROUND_F80_F32, "__truncxfsf2")
+HANDLE_LIBCALL(FPROUND_F128_F32, "__trunctfsf2")
+HANDLE_LIBCALL(FPROUND_PPCF128_F32, "__gcc_qtos")
+HANDLE_LIBCALL(FPROUND_F80_F64, "__truncxfdf2")
+HANDLE_LIBCALL(FPROUND_F128_F64, "__trunctfdf2")
+HANDLE_LIBCALL(FPROUND_PPCF128_F64, "__gcc_qtod")
+HANDLE_LIBCALL(FPROUND_F128_F80, "__trunctfxf2")
+HANDLE_LIBCALL(FPTOSINT_F32_I32, "__fixsfsi")
+HANDLE_LIBCALL(FPTOSINT_F32_I64, "__fixsfdi")
+HANDLE_LIBCALL(FPTOSINT_F32_I128, "__fixsfti")
+HANDLE_LIBCALL(FPTOSINT_F64_I32, "__fixdfsi")
+HANDLE_LIBCALL(FPTOSINT_F64_I64, "__fixdfdi")
+HANDLE_LIBCALL(FPTOSINT_F64_I128, "__fixdfti")
+HANDLE_LIBCALL(FPTOSINT_F80_I32, "__fixxfsi")
+HANDLE_LIBCALL(FPTOSINT_F80_I64, "__fixxfdi")
+HANDLE_LIBCALL(FPTOSINT_F80_I128, "__fixxfti")
+HANDLE_LIBCALL(FPTOSINT_F128_I32, "__fixtfsi")
+HANDLE_LIBCALL(FPTOSINT_F128_I64, "__fixtfdi")
+HANDLE_LIBCALL(FPTOSINT_F128_I128, "__fixtfti")
+HANDLE_LIBCALL(FPTOSINT_PPCF128_I32, "__gcc_qtou")
+HANDLE_LIBCALL(FPTOSINT_PPCF128_I64, "__fixtfdi")
+HANDLE_LIBCALL(FPTOSINT_PPCF128_I128, "__fixtfti")
+HANDLE_LIBCALL(FPTOUINT_F32_I32, "__fixunssfsi")
+HANDLE_LIBCALL(FPTOUINT_F32_I64, "__fixunssfdi")
+HANDLE_LIBCALL(FPTOUINT_F32_I128, "__fixunssfti")
+HANDLE_LIBCALL(FPTOUINT_F64_I32, "__fixunsdfsi")
+HANDLE_LIBCALL(FPTOUINT_F64_I64, "__fixunsdfdi")
+HANDLE_LIBCALL(FPTOUINT_F64_I128, "__fixunsdfti")
+HANDLE_LIBCALL(FPTOUINT_F80_I32, "__fixunsxfsi")
+HANDLE_LIBCALL(FPTOUINT_F80_I64, "__fixunsxfdi")
+HANDLE_LIBCALL(FPTOUINT_F80_I128, "__fixunsxfti")
+HANDLE_LIBCALL(FPTOUINT_F128_I32, "__fixunstfsi")
+HANDLE_LIBCALL(FPTOUINT_F128_I64, "__fixunstfdi")
+HANDLE_LIBCALL(FPTOUINT_F128_I128, "__fixunstfti")
+HANDLE_LIBCALL(FPTOUINT_PPCF128_I32, "__fixunstfsi")
+HANDLE_LIBCALL(FPTOUINT_PPCF128_I64, "__fixunstfdi")
+HANDLE_LIBCALL(FPTOUINT_PPCF128_I128, "__fixunstfti")
+HANDLE_LIBCALL(SINTTOFP_I32_F32, "__floatsisf")
+HANDLE_LIBCALL(SINTTOFP_I32_F64, "__floatsidf")
+HANDLE_LIBCALL(SINTTOFP_I32_F80, "__floatsixf")
+HANDLE_LIBCALL(SINTTOFP_I32_F128, "__floatsitf")
+HANDLE_LIBCALL(SINTTOFP_I32_PPCF128, "__gcc_itoq")
+HANDLE_LIBCALL(SINTTOFP_I64_F32, "__floatdisf")
+HANDLE_LIBCALL(SINTTOFP_I64_F64, "__floatdidf")
+HANDLE_LIBCALL(SINTTOFP_I64_F80, "__floatdixf")
+HANDLE_LIBCALL(SINTTOFP_I64_F128, "__floatditf")
+HANDLE_LIBCALL(SINTTOFP_I64_PPCF128, "__floatditf")
+HANDLE_LIBCALL(SINTTOFP_I128_F32, "__floattisf")
+HANDLE_LIBCALL(SINTTOFP_I128_F64, "__floattidf")
+HANDLE_LIBCALL(SINTTOFP_I128_F80, "__floattixf")
+HANDLE_LIBCALL(SINTTOFP_I128_F128, "__floattitf")
+HANDLE_LIBCALL(SINTTOFP_I128_PPCF128, "__floattitf")
+HANDLE_LIBCALL(UINTTOFP_I32_F32, "__floatunsisf")
+HANDLE_LIBCALL(UINTTOFP_I32_F64, "__floatunsidf")
+HANDLE_LIBCALL(UINTTOFP_I32_F80, "__floatunsixf")
+HANDLE_LIBCALL(UINTTOFP_I32_F128, "__floatunsitf")
+HANDLE_LIBCALL(UINTTOFP_I32_PPCF128, "__gcc_utoq")
+HANDLE_LIBCALL(UINTTOFP_I64_F32, "__floatundisf")
+HANDLE_LIBCALL(UINTTOFP_I64_F64, "__floatundidf")
+HANDLE_LIBCALL(UINTTOFP_I64_F80, "__floatundixf")
+HANDLE_LIBCALL(UINTTOFP_I64_F128, "__floatunditf")
+HANDLE_LIBCALL(UINTTOFP_I64_PPCF128, "__floatunditf")
+HANDLE_LIBCALL(UINTTOFP_I128_F32, "__floatuntisf")
+HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf")
+HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf")
+HANDLE_LIBCALL(UINTTOFP_I128_F128, "__floatuntitf")
+HANDLE_LIBCALL(UINTTOFP_I128_PPCF128, "__floatuntitf")
+
+// Comparison
+HANDLE_LIBCALL(OEQ_F32, "__eqsf2")
+HANDLE_LIBCALL(OEQ_F64, "__eqdf2")
+HANDLE_LIBCALL(OEQ_F128, "__eqtf2")
+HANDLE_LIBCALL(OEQ_PPCF128, "__gcc_qeq")
+HANDLE_LIBCALL(UNE_F32, "__nesf2")
+HANDLE_LIBCALL(UNE_F64, "__nedf2")
+HANDLE_LIBCALL(UNE_F128, "__netf2")
+HANDLE_LIBCALL(UNE_PPCF128, "__gcc_qne")
+HANDLE_LIBCALL(OGE_F32, "__gesf2")
+HANDLE_LIBCALL(OGE_F64, "__gedf2")
+HANDLE_LIBCALL(OGE_F128, "__getf2")
+HANDLE_LIBCALL(OGE_PPCF128, "__gcc_qge")
+HANDLE_LIBCALL(OLT_F32, "__ltsf2")
+HANDLE_LIBCALL(OLT_F64, "__ltdf2")
+HANDLE_LIBCALL(OLT_F128, "__lttf2")
+HANDLE_LIBCALL(OLT_PPCF128, "__gcc_qlt")
+HANDLE_LIBCALL(OLE_F32, "__lesf2")
+HANDLE_LIBCALL(OLE_F64, "__ledf2")
+HANDLE_LIBCALL(OLE_F128, "__letf2")
+HANDLE_LIBCALL(OLE_PPCF128, "__gcc_qle")
+HANDLE_LIBCALL(OGT_F32, "__gtsf2")
+HANDLE_LIBCALL(OGT_F64, "__gtdf2")
+HANDLE_LIBCALL(OGT_F128, "__gttf2")
+HANDLE_LIBCALL(OGT_PPCF128, "__gcc_qgt")
+HANDLE_LIBCALL(UO_F32, "__unordsf2")
+HANDLE_LIBCALL(UO_F64, "__unorddf2")
+HANDLE_LIBCALL(UO_F128, "__unordtf2")
+HANDLE_LIBCALL(UO_PPCF128, "__gcc_qunord")
+HANDLE_LIBCALL(O_F32, "__unordsf2")
+HANDLE_LIBCALL(O_F64, "__unorddf2")
+HANDLE_LIBCALL(O_F128, "__unordtf2")
+HANDLE_LIBCALL(O_PPCF128, "__gcc_qunord")
+
+// Memory
+HANDLE_LIBCALL(MEMCPY, "memcpy")
+HANDLE_LIBCALL(MEMMOVE, "memmove")
+HANDLE_LIBCALL(MEMSET, "memset")
+HANDLE_LIBCALL(BZERO, nullptr)
+
+// Element-wise unordered-atomic memory of different sizes
+HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memcpy_element_unordered_atomic_1")
+HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memcpy_element_unordered_atomic_2")
+HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memcpy_element_unordered_atomic_4")
+HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memcpy_element_unordered_atomic_8")
+HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memcpy_element_unordered_atomic_16")
+HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memmove_element_unordered_atomic_1")
+HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memmove_element_unordered_atomic_2")
+HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memmove_element_unordered_atomic_4")
+HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memmove_element_unordered_atomic_8")
+HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memmove_element_unordered_atomic_16")
+HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memset_element_unordered_atomic_1")
+HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memset_element_unordered_atomic_2")
+HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memset_element_unordered_atomic_4")
+HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memset_element_unordered_atomic_8")
+HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memset_element_unordered_atomic_16")
+
+// Exception handling
+HANDLE_LIBCALL(UNWIND_RESUME, "_Unwind_Resume")
+
+// Note: there are two sets of atomics libcalls; see
+// <https://llvm.org/docs/Atomics.html> for more info on the
+// difference between them.
+
+// Atomic '__sync_*' libcalls.
+HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_1, "__sync_val_compare_and_swap_1")
+HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_2, "__sync_val_compare_and_swap_2")
+HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_4, "__sync_val_compare_and_swap_4")
+HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_8, "__sync_val_compare_and_swap_8")
+HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_16, "__sync_val_compare_and_swap_16")
+HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_1, "__sync_lock_test_and_set_1")
+HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_2, "__sync_lock_test_and_set_2")
+HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_4, "__sync_lock_test_and_set_4")
+HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_8, "__sync_lock_test_and_set_8")
+HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_16, "__sync_lock_test_and_set_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_1, "__sync_fetch_and_add_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_2, "__sync_fetch_and_add_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_4, "__sync_fetch_and_add_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_8, "__sync_fetch_and_add_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_16, "__sync_fetch_and_add_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_1, "__sync_fetch_and_sub_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_2, "__sync_fetch_and_sub_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_4, "__sync_fetch_and_sub_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_8, "__sync_fetch_and_sub_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_16, "__sync_fetch_and_sub_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_AND_1, "__sync_fetch_and_and_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_AND_2, "__sync_fetch_and_and_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_AND_4, "__sync_fetch_and_and_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_AND_8, "__sync_fetch_and_and_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_AND_16, "__sync_fetch_and_and_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_OR_1, "__sync_fetch_and_or_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_OR_2, "__sync_fetch_and_or_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_OR_4, "__sync_fetch_and_or_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_OR_8, "__sync_fetch_and_or_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_OR_16, "__sync_fetch_and_or_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_1, "__sync_fetch_and_xor_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_2, "__sync_fetch_and_xor_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_4, "__sync_fetch_and_xor_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_8, "__sync_fetch_and_xor_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_16, "__sync_fetch_and_xor_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_1, "__sync_fetch_and_nand_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_2, "__sync_fetch_and_nand_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_4, "__sync_fetch_and_nand_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_8, "__sync_fetch_and_nand_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_16, "__sync_fetch_and_nand_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_1, "__sync_fetch_and_max_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_2, "__sync_fetch_and_max_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_4, "__sync_fetch_and_max_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_8, "__sync_fetch_and_max_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_16, "__sync_fetch_and_max_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_1, "__sync_fetch_and_umax_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_2, "__sync_fetch_and_umax_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_4, "__sync_fetch_and_umax_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_8, "__sync_fetch_and_umax_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_16, "__sync_fetch_and_umax_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_1, "__sync_fetch_and_min_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_2, "__sync_fetch_and_min_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_4, "__sync_fetch_and_min_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_8, "__sync_fetch_and_min_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_16, "__sync_fetch_and_min_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_1, "__sync_fetch_and_umin_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_2, "__sync_fetch_and_umin_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_4, "__sync_fetch_and_umin_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_8, "__sync_fetch_and_umin_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_16, "__sync_fetch_and_umin_16")
+
+// Atomic `__atomic_*' libcalls.
+HANDLE_LIBCALL(ATOMIC_LOAD, "__atomic_load")
+HANDLE_LIBCALL(ATOMIC_LOAD_1, "__atomic_load_1")
+HANDLE_LIBCALL(ATOMIC_LOAD_2, "__atomic_load_2")
+HANDLE_LIBCALL(ATOMIC_LOAD_4, "__atomic_load_4")
+HANDLE_LIBCALL(ATOMIC_LOAD_8, "__atomic_load_8")
+HANDLE_LIBCALL(ATOMIC_LOAD_16, "__atomic_load_16")
+
+HANDLE_LIBCALL(ATOMIC_STORE, "__atomic_store")
+HANDLE_LIBCALL(ATOMIC_STORE_1, "__atomic_store_1")
+HANDLE_LIBCALL(ATOMIC_STORE_2, "__atomic_store_2")
+HANDLE_LIBCALL(ATOMIC_STORE_4, "__atomic_store_4")
+HANDLE_LIBCALL(ATOMIC_STORE_8, "__atomic_store_8")
+HANDLE_LIBCALL(ATOMIC_STORE_16, "__atomic_store_16")
+
+HANDLE_LIBCALL(ATOMIC_EXCHANGE, "__atomic_exchange")
+HANDLE_LIBCALL(ATOMIC_EXCHANGE_1, "__atomic_exchange_1")
+HANDLE_LIBCALL(ATOMIC_EXCHANGE_2, "__atomic_exchange_2")
+HANDLE_LIBCALL(ATOMIC_EXCHANGE_4, "__atomic_exchange_4")
+HANDLE_LIBCALL(ATOMIC_EXCHANGE_8, "__atomic_exchange_8")
+HANDLE_LIBCALL(ATOMIC_EXCHANGE_16, "__atomic_exchange_16")
+
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE, "__atomic_compare_exchange")
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_1, "__atomic_compare_exchange_1")
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_2, "__atomic_compare_exchange_2")
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_4, "__atomic_compare_exchange_4")
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_8, "__atomic_compare_exchange_8")
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_16, "__atomic_compare_exchange_16")
+
+HANDLE_LIBCALL(ATOMIC_FETCH_ADD_1, "__atomic_fetch_add_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_ADD_2, "__atomic_fetch_add_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_ADD_4, "__atomic_fetch_add_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_ADD_8, "__atomic_fetch_add_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_ADD_16, "__atomic_fetch_add_16")
+HANDLE_LIBCALL(ATOMIC_FETCH_SUB_1, "__atomic_fetch_sub_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_SUB_2, "__atomic_fetch_sub_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_SUB_4, "__atomic_fetch_sub_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_SUB_8, "__atomic_fetch_sub_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_SUB_16, "__atomic_fetch_sub_16")
+HANDLE_LIBCALL(ATOMIC_FETCH_AND_1, "__atomic_fetch_and_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_AND_2, "__atomic_fetch_and_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_AND_4, "__atomic_fetch_and_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_AND_8, "__atomic_fetch_and_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_AND_16, "__atomic_fetch_and_16")
+HANDLE_LIBCALL(ATOMIC_FETCH_OR_1, "__atomic_fetch_or_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_OR_2, "__atomic_fetch_or_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_OR_4, "__atomic_fetch_or_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_OR_8, "__atomic_fetch_or_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_OR_16, "__atomic_fetch_or_16")
+HANDLE_LIBCALL(ATOMIC_FETCH_XOR_1, "__atomic_fetch_xor_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_XOR_2, "__atomic_fetch_xor_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_XOR_4, "__atomic_fetch_xor_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_XOR_8, "__atomic_fetch_xor_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_XOR_16, "__atomic_fetch_xor_16")
+HANDLE_LIBCALL(ATOMIC_FETCH_NAND_1, "__atomic_fetch_nand_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_NAND_2, "__atomic_fetch_nand_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_NAND_4, "__atomic_fetch_nand_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_NAND_8, "__atomic_fetch_nand_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_NAND_16, "__atomic_fetch_nand_16")
+
+// Stack Protector Fail
+HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail")
+
+// Deoptimization
+HANDLE_LIBCALL(DEOPTIMIZE, "__llvm_deoptimize")
+
+HANDLE_LIBCALL(UNKNOWN_LIBCALL, nullptr)
+
+#undef HANDLE_LIBCALL
diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h
index ad9537e9762e..c8e905b21a30 100644
--- a/include/llvm/IR/Statepoint.h
+++ b/include/llvm/IR/Statepoint.h
@@ -196,7 +196,7 @@ public:
     return make_range(arg_begin(), arg_end());
   }
 
-  /// \brief Return true if the call or the callee has the given attribute.
+  /// Return true if the call or the callee has the given attribute.
   bool paramHasAttr(unsigned i, Attribute::AttrKind A) const {
     Function *F = getCalledFunction();
     return getCallSite().paramHasAttr(i + CallArgsBeginPos, A) ||
@@ -465,7 +465,7 @@ struct StatepointDirectives {
 /// AS.
 StatepointDirectives parseStatepointDirectivesFromAttrs(AttributeList AS);
 
-/// Return \c true if the the \p Attr is an attribute that is a statepoint
+/// Return \c true if the \p Attr is an attribute that is a statepoint
 /// directive.
 bool isStatepointDirectiveAttr(Attribute Attr);
 
diff --git a/include/llvm/IR/TrackingMDRef.h b/include/llvm/IR/TrackingMDRef.h
index bdec904ad1e1..084efada221f 100644
--- a/include/llvm/IR/TrackingMDRef.h
+++ b/include/llvm/IR/TrackingMDRef.h
@@ -20,7 +20,7 @@
 
 namespace llvm {
 
-/// \brief Tracking metadata reference.
+/// Tracking metadata reference.
 ///
 /// This class behaves like \a TrackingVH, but for metadata.
 class TrackingMDRef {
@@ -70,7 +70,7 @@ public:
     track();
   }
 
-  /// \brief Check whether this has a trivial destructor.
+  /// Check whether this has a trivial destructor.
   ///
   /// If \c MD isn't replaceable, the destructor will be a no-op.
   bool hasTrivialDestructor() const {
@@ -100,7 +100,7 @@ private:
   }
 };
 
-/// \brief Typed tracking ref.
+/// Typed tracking ref.
 ///
 /// Track refererences of a particular type.  It's useful to use this for \a
 /// MDNode and \a ValueAsMetadata.
@@ -135,7 +135,7 @@ public:
   void reset() { Ref.reset(); }
   void reset(T *MD) { Ref.reset(static_cast<Metadata *>(MD)); }
 
-  /// \brief Check whether this has a trivial destructor.
+  /// Check whether this has a trivial destructor.
   bool hasTrivialDestructor() const { return Ref.hasTrivialDestructor(); }
 };
 
diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h
index 1574fc334ffc..9c1f99d1b3a2 100644
--- a/include/llvm/IR/Type.h
+++ b/include/llvm/IR/Type.h
@@ -208,6 +208,9 @@ public:
     return getScalarType()->isIntegerTy(BitWidth);
   }
 
+  /// Return true if this is an integer type or a pointer type.
+  bool isIntOrPtrTy() const { return isIntegerTy() || isPointerTy(); }
+
   /// True if this is an instance of FunctionType.
   bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
 
@@ -229,7 +232,7 @@ public:
   /// Return true if this type could be converted with a lossless BitCast to
   /// type 'Ty'. For example, i8* to i32*. BitCasts are valid for types of the
   /// same size only where no re-interpretation of the bits is done.
-  /// @brief Determine if this type could be losslessly bitcast to Ty
+  /// Determine if this type could be losslessly bitcast to Ty
   bool canLosslesslyBitCastTo(Type *Ty) const;
 
   /// Return true if this type is empty, that is, it has no elements or all of
@@ -407,6 +410,20 @@ public:
   static IntegerType *getInt32Ty(LLVMContext &C);
   static IntegerType *getInt64Ty(LLVMContext &C);
   static IntegerType *getInt128Ty(LLVMContext &C);
+  template <typename ScalarTy> static Type *getScalarTy(LLVMContext &C) {
+    int noOfBits = sizeof(ScalarTy) * CHAR_BIT;
+    if (std::is_integral<ScalarTy>::value) {
+      return (Type*) Type::getIntNTy(C, noOfBits);
+    } else if (std::is_floating_point<ScalarTy>::value) {
+      switch (noOfBits) {
+      case 32:
+        return Type::getFloatTy(C);
+      case 64:
+        return Type::getDoubleTy(C);
+      }
+    }
+    llvm_unreachable("Unsupported type in Type::getScalarTy");
+  }
 
   //===--------------------------------------------------------------------===//
   // Convenience methods for getting pointer types with one of the above builtin
diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h
index 0ac13935c7ce..25c44e0871a9 100644
--- a/include/llvm/IR/Use.h
+++ b/include/llvm/IR/Use.h
@@ -36,7 +36,7 @@ template <typename> struct simplify_type;
 class User;
 class Value;
 
-/// \brief A Use represents the edge between a Value definition and its users.
+/// A Use represents the edge between a Value definition and its users.
 ///
 /// This is notionally a two-dimensional linked list. It supports traversing
 /// all of the uses for a particular value definition. It also supports jumping
@@ -57,7 +57,7 @@ class Use {
 public:
   Use(const Use &U) = delete;
 
-  /// \brief Provide a fast substitute to std::swap<Use>
+  /// Provide a fast substitute to std::swap<Use>
   /// that also works with less standard-compliant compilers
   void swap(Use &RHS);
 
@@ -107,7 +107,7 @@ public:
   operator Value *() const { return Val; }
   Value *get() const { return Val; }
 
-  /// \brief Returns the User that contains this Use.
+  /// Returns the User that contains this Use.
   ///
   /// For an instruction operand, for example, this will return the
   /// instruction.
@@ -123,16 +123,16 @@ public:
 
   Use *getNext() const { return Next; }
 
-  /// \brief Return the operand # of this use in its User.
+  /// Return the operand # of this use in its User.
   unsigned getOperandNo() const;
 
-  /// \brief Initializes the waymarking tags on an array of Uses.
+  /// Initializes the waymarking tags on an array of Uses.
   ///
   /// This sets up the array of Uses such that getUser() can find the User from
   /// any of those Uses.
   static Use *initTags(Use *Start, Use *Stop);
 
-  /// \brief Destroys Use operands when the number of operands of
+  /// Destroys Use operands when the number of operands of
   /// a User changes.
   static void zap(Use *Start, const Use *Stop, bool del = false);
 
@@ -161,7 +161,7 @@ private:
   }
 };
 
-/// \brief Allow clients to treat uses just like values when using
+/// Allow clients to treat uses just like values when using
 /// casting operators.
 template <> struct simplify_type<Use> {
   using SimpleType = Value *;
diff --git a/include/llvm/IR/UseListOrder.h b/include/llvm/IR/UseListOrder.h
index a8b394fc6302..b6bb0f19a0aa 100644
--- a/include/llvm/IR/UseListOrder.h
+++ b/include/llvm/IR/UseListOrder.h
@@ -23,7 +23,7 @@ namespace llvm {
 class Function;
 class Value;
 
-/// \brief Structure to hold a use-list order.
+/// Structure to hold a use-list order.
 struct UseListOrder {
   const Value *V = nullptr;
   const Function *F = nullptr;
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
index 4dfa19cf241f..d6a603ce845d 100644
--- a/include/llvm/IR/User.h
+++ b/include/llvm/IR/User.h
@@ -36,7 +36,7 @@ namespace llvm {
 template <typename T> class ArrayRef;
 template <typename T> class MutableArrayRef;
 
-/// \brief Compile-time customization of User operands.
+/// Compile-time customization of User operands.
 ///
 /// Customizes operand-related allocators and accessors.
 template <class>
@@ -81,13 +81,13 @@ protected:
            "Error in initializing hung off uses for User");
   }
 
-  /// \brief Allocate the array of Uses, followed by a pointer
+  /// Allocate the array of Uses, followed by a pointer
   /// (with bottom bit set) to the User.
   /// \param IsPhi identifies callers which are phi nodes and which need
   /// N BasicBlock* allocated along with N
   void allocHungoffUses(unsigned N, bool IsPhi = false);
 
-  /// \brief Grow the number of hung off uses.  Note that allocHungoffUses
+  /// Grow the number of hung off uses.  Note that allocHungoffUses
   /// should be called if there are no uses.
   void growHungoffUses(unsigned N, bool IsPhi = false);
 
@@ -97,15 +97,31 @@ protected:
 public:
   User(const User &) = delete;
 
-  /// \brief Free memory allocated for User and Use objects.
+  /// Free memory allocated for User and Use objects.
   void operator delete(void *Usr);
-  /// \brief Placement delete - required by std, but never called.
-  void operator delete(void*, unsigned) {
+  /// Placement delete - required by std, called if the ctor throws.
+  void operator delete(void *Usr, unsigned) {
+    // Note: If a subclass manipulates the information which is required to calculate the 
+    // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has 
+    // to restore the changed information to the original value, since the dtor of that class
+    // is not called if the ctor fails.  
+    User::operator delete(Usr);
+
+#ifndef LLVM_ENABLE_EXCEPTIONS
     llvm_unreachable("Constructor throws?");
+#endif
   }
-  /// \brief Placement delete - required by std, but never called.
-  void operator delete(void*, unsigned, bool) {
+  /// Placement delete - required by std, called if the ctor throws.
+  void operator delete(void *Usr, unsigned, bool) {
+    // Note: If a subclass manipulates the information which is required to calculate the 
+    // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has 
+    // to restore the changed information to the original value, since the dtor of that class
+    // is not called if the ctor fails.  
+    User::operator delete(Usr);
+
+#ifndef LLVM_ENABLE_EXCEPTIONS
     llvm_unreachable("Constructor throws?");
+#endif
   }
 
 protected:
@@ -194,7 +210,7 @@ public:
     NumUserOperands = NumOps;
   }
 
-  /// \brief Subclasses with hung off uses need to manage the operand count
+  /// Subclasses with hung off uses need to manage the operand count
   /// themselves.  In these instances, the operand count isn't used to find the
   /// OperandList, so there's no issue in having the operand count change.
   void setNumHungOffUseOperands(unsigned NumOps) {
@@ -226,7 +242,7 @@ public:
     return const_op_range(op_begin(), op_end());
   }
 
-  /// \brief Iterator for directly iterating over the operand Values.
+  /// Iterator for directly iterating over the operand Values.
   struct value_op_iterator
       : iterator_adaptor_base<value_op_iterator, op_iterator,
                               std::random_access_iterator_tag, Value *,
@@ -268,7 +284,7 @@ public:
     return make_range(value_op_begin(), value_op_end());
   }
 
-  /// \brief Drop all references to operands.
+  /// Drop all references to operands.
   ///
   /// This function is in charge of "letting go" of all objects that this User
   /// refers to.  This allows one to 'delete' a whole class at a time, even
@@ -281,7 +297,7 @@ public:
       U.set(nullptr);
   }
 
-  /// \brief Replace uses of one Value with another.
+  /// Replace uses of one Value with another.
   ///
   /// Replaces all references to the "From" definition with references to the
   /// "To" definition.
diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h
index d848fe921868..f396db995ab0 100644
--- a/include/llvm/IR/Value.h
+++ b/include/llvm/IR/Value.h
@@ -57,7 +57,7 @@ using ValueName = StringMapEntry<Value *>;
 //                                 Value Class
 //===----------------------------------------------------------------------===//
 
-/// \brief LLVM Value Representation
+/// LLVM Value Representation
 ///
 /// This is a very important LLVM class. It is the base class of all values
 /// computed by a program that may be used as operands to other values. Value is
@@ -83,7 +83,7 @@ class Value {
   unsigned char HasValueHandle : 1; // Has a ValueHandle pointing to this?
 
 protected:
-  /// \brief Hold subclass data that can be dropped.
+  /// Hold subclass data that can be dropped.
   ///
   /// This member is similar to SubclassData, however it is for holding
   /// information which may be used to aid optimization, but which may be
@@ -91,7 +91,7 @@ protected:
   unsigned char SubclassOptionalData : 7;
 
 private:
-  /// \brief Hold arbitrary subclass data.
+  /// Hold arbitrary subclass data.
   ///
   /// This member is defined by this class, but is not used for anything.
   /// Subclasses can use it to hold whatever state they find useful.  This
@@ -99,7 +99,7 @@ private:
   unsigned short SubclassData;
 
 protected:
-  /// \brief The number of operands in the subclass.
+  /// The number of operands in the subclass.
   ///
   /// This member is defined by this class, but not used for anything.
   /// Subclasses can use it to store their number of operands, if they have
@@ -173,7 +173,7 @@ private:
     bool operator==(const user_iterator_impl &x) const { return UI == x.UI; }
     bool operator!=(const user_iterator_impl &x) const { return !operator==(x); }
 
-    /// \brief Returns true if this iterator is equal to user_end() on the value.
+    /// Returns true if this iterator is equal to user_end() on the value.
     bool atEnd() const { return *this == user_iterator_impl(); }
 
     user_iterator_impl &operator++() { // Preincrement
@@ -218,17 +218,17 @@ public:
   /// Delete a pointer to a generic Value.
   void deleteValue();
 
-  /// \brief Support for debugging, callable in GDB: V->dump()
+  /// Support for debugging, callable in GDB: V->dump()
   void dump() const;
 
-  /// \brief Implement operator<< on Value.
+  /// Implement operator<< on Value.
   /// @{
   void print(raw_ostream &O, bool IsForDebug = false) const;
   void print(raw_ostream &O, ModuleSlotTracker &MST,
              bool IsForDebug = false) const;
   /// @}
 
-  /// \brief Print the name of this Value out to the specified raw_ostream.
+  /// Print the name of this Value out to the specified raw_ostream.
   ///
   /// This is useful when you just want to print 'int %reg126', not the
   /// instruction that generated it. If you specify a Module for context, then
@@ -241,13 +241,13 @@ public:
                       ModuleSlotTracker &MST) const;
   /// @}
 
-  /// \brief All values are typed, get the type of this value.
+  /// All values are typed, get the type of this value.
   Type *getType() const { return VTy; }
 
-  /// \brief All values hold a context through their type.
+  /// All values hold a context through their type.
   LLVMContext &getContext() const;
 
-  // \brief All values can potentially be named.
+  // All values can potentially be named.
   bool hasName() const { return HasName; }
   ValueName *getValueName() const;
   void setValueName(ValueName *VN);
@@ -258,35 +258,35 @@ private:
   void setNameImpl(const Twine &Name);
 
 public:
-  /// \brief Return a constant reference to the value's name.
+  /// Return a constant reference to the value's name.
   ///
   /// This guaranteed to return the same reference as long as the value is not
   /// modified.  If the value has a name, this does a hashtable lookup, so it's
   /// not free.
   StringRef getName() const;
 
-  /// \brief Change the name of the value.
+  /// Change the name of the value.
   ///
   /// Choose a new unique name if the provided name is taken.
   ///
   /// \param Name The new name; or "" if the value's name should be removed.
   void setName(const Twine &Name);
 
-  /// \brief Transfer the name from V to this value.
+  /// Transfer the name from V to this value.
   ///
   /// After taking V's name, sets V's name to empty.
   ///
   /// \note It is an error to call V->takeName(V).
   void takeName(Value *V);
 
-  /// \brief Change all uses of this to point to a new Value.
+  /// Change all uses of this to point to a new Value.
   ///
   /// Go through the uses list for this definition and make each use point to
   /// "V" instead of "this".  After this completes, 'this's use list is
   /// guaranteed to be empty.
   void replaceAllUsesWith(Value *V);
 
-  /// \brief Change non-metadata uses of this to point to a new Value.
+  /// Change non-metadata uses of this to point to a new Value.
   ///
   /// Go through the uses list for this definition and make each use point to
   /// "V" instead of "this". This function skips metadata entries in the list.
@@ -299,12 +299,6 @@ public:
   /// values or constant users.
   void replaceUsesOutsideBlock(Value *V, BasicBlock *BB);
 
-  /// replaceUsesExceptBlockAddr - Go through the uses list for this definition
-  /// and make each use point to "V" instead of "this" when the use is outside
-  /// the block. 'This's use list is expected to have at least one element.
-  /// Unlike replaceAllUsesWith this function skips blockaddr uses.
-  void replaceUsesExceptBlockAddr(Value *New);
-
   //----------------------------------------------------------------------
   // Methods for handling the chain of uses of this Value.
   //
@@ -411,7 +405,7 @@ public:
     return materialized_users();
   }
 
-  /// \brief Return true if there is exactly one user of this value.
+  /// Return true if there is exactly one user of this value.
   ///
   /// This is specialized because it is a common request and does not require
   /// traversing the whole use list.
@@ -421,27 +415,27 @@ public:
     return ++I == E;
   }
 
-  /// \brief Return true if this Value has exactly N users.
+  /// Return true if this Value has exactly N users.
   bool hasNUses(unsigned N) const;
 
-  /// \brief Return true if this value has N users or more.
+  /// Return true if this value has N users or more.
   ///
   /// This is logically equivalent to getNumUses() >= N.
   bool hasNUsesOrMore(unsigned N) const;
 
-  /// \brief Check if this value is used in the specified basic block.
+  /// Check if this value is used in the specified basic block.
   bool isUsedInBasicBlock(const BasicBlock *BB) const;
 
-  /// \brief This method computes the number of uses of this Value.
+  /// This method computes the number of uses of this Value.
   ///
   /// This is a linear time operation.  Use hasOneUse, hasNUses, or
   /// hasNUsesOrMore to check for specific values.
   unsigned getNumUses() const;
 
-  /// \brief This method should only be used by the Use class.
+  /// This method should only be used by the Use class.
   void addUse(Use &U) { U.addToList(&UseList); }
 
-  /// \brief Concrete subclass of this.
+  /// Concrete subclass of this.
   ///
   /// An enumeration for keeping track of the concrete subclass of Value that
   /// is actually instantiated. Values of this enumeration are kept in the
@@ -456,7 +450,7 @@ public:
 #include "llvm/IR/Value.def"
   };
 
-  /// \brief Return an ID for the concrete type of this object.
+  /// Return an ID for the concrete type of this object.
   ///
   /// This is used to implement the classof checks.  This should not be used
   /// for any other purpose, as the values may change as LLVM evolves.  Also,
@@ -470,36 +464,36 @@ public:
     return SubclassID;
   }
 
-  /// \brief Return the raw optional flags value contained in this value.
+  /// Return the raw optional flags value contained in this value.
   ///
   /// This should only be used when testing two Values for equivalence.
   unsigned getRawSubclassOptionalData() const {
     return SubclassOptionalData;
   }
 
-  /// \brief Clear the optional flags contained in this value.
+  /// Clear the optional flags contained in this value.
   void clearSubclassOptionalData() {
     SubclassOptionalData = 0;
   }
 
-  /// \brief Check the optional flags for equality.
+  /// Check the optional flags for equality.
   bool hasSameSubclassOptionalData(const Value *V) const {
     return SubclassOptionalData == V->SubclassOptionalData;
   }
 
-  /// \brief Return true if there is a value handle associated with this value.
+  /// Return true if there is a value handle associated with this value.
   bool hasValueHandle() const { return HasValueHandle; }
 
-  /// \brief Return true if there is metadata referencing this value.
+  /// Return true if there is metadata referencing this value.
   bool isUsedByMetadata() const { return IsUsedByMD; }
 
-  /// \brief Return true if this value is a swifterror value.
+  /// Return true if this value is a swifterror value.
   ///
   /// swifterror values can be either a function argument or an alloca with a
   /// swifterror attribute.
   bool isSwiftError() const;
 
-  /// \brief Strip off pointer casts, all-zero GEPs, and aliases.
+  /// Strip off pointer casts, all-zero GEPs, and aliases.
   ///
   /// Returns the original uncasted value.  If this is called on a non-pointer
   /// value, it returns 'this'.
@@ -509,18 +503,19 @@ public:
                          static_cast<const Value *>(this)->stripPointerCasts());
   }
 
-  /// \brief Strip off pointer casts, all-zero GEPs, aliases and barriers.
+  /// Strip off pointer casts, all-zero GEPs, aliases and invariant group
+  /// info.
   ///
   /// Returns the original uncasted value.  If this is called on a non-pointer
   /// value, it returns 'this'. This function should be used only in
   /// Alias analysis.
-  const Value *stripPointerCastsAndBarriers() const;
-  Value *stripPointerCastsAndBarriers() {
+  const Value *stripPointerCastsAndInvariantGroups() const;
+  Value *stripPointerCastsAndInvariantGroups() {
     return const_cast<Value *>(
-        static_cast<const Value *>(this)->stripPointerCastsAndBarriers());
+        static_cast<const Value *>(this)->stripPointerCastsAndInvariantGroups());
   }
 
-  /// \brief Strip off pointer casts and all-zero GEPs.
+  /// Strip off pointer casts and all-zero GEPs.
   ///
   /// Returns the original uncasted value.  If this is called on a non-pointer
   /// value, it returns 'this'.
@@ -530,7 +525,7 @@ public:
           static_cast<const Value *>(this)->stripPointerCastsNoFollowAliases());
   }
 
-  /// \brief Strip off pointer casts and all-constant inbounds GEPs.
+  /// Strip off pointer casts and all-constant inbounds GEPs.
   ///
   /// Returns the original pointer value.  If this is called on a non-pointer
   /// value, it returns 'this'.
@@ -540,7 +535,7 @@ public:
               static_cast<const Value *>(this)->stripInBoundsConstantOffsets());
   }
 
-  /// \brief Accumulate offsets from \a stripInBoundsConstantOffsets().
+  /// Accumulate offsets from \a stripInBoundsConstantOffsets().
   ///
   /// Stores the resulting constant offset stripped into the APInt provided.
   /// The provided APInt will be extended or truncated as needed to be the
@@ -555,7 +550,7 @@ public:
         ->stripAndAccumulateInBoundsConstantOffsets(DL, Offset));
   }
 
-  /// \brief Strip off pointer casts and inbounds GEPs.
+  /// Strip off pointer casts and inbounds GEPs.
   ///
   /// Returns the original pointer value.  If this is called on a non-pointer
   /// value, it returns 'this'.
@@ -565,7 +560,7 @@ public:
                       static_cast<const Value *>(this)->stripInBoundsOffsets());
   }
 
-  /// \brief Returns the number of bytes known to be dereferenceable for the
+  /// Returns the number of bytes known to be dereferenceable for the
   /// pointer value.
   ///
   /// If CanBeNull is set by this function the pointer can either be null or be
@@ -573,13 +568,13 @@ public:
   uint64_t getPointerDereferenceableBytes(const DataLayout &DL,
                                           bool &CanBeNull) const;
 
-  /// \brief Returns an alignment of the pointer value.
+  /// Returns an alignment of the pointer value.
   ///
   /// Returns an alignment which is either specified explicitly, e.g. via
   /// align attribute of a function argument, or guaranteed by DataLayout.
   unsigned getPointerAlignment(const DataLayout &DL) const;
 
-  /// \brief Translate PHI node to its predecessor from the given basic block.
+  /// Translate PHI node to its predecessor from the given basic block.
   ///
   /// If this value is a PHI node with CurBB as its parent, return the value in
   /// the PHI node corresponding to PredBB.  If not, return ourself.  This is
@@ -592,14 +587,14 @@ public:
              static_cast<const Value *>(this)->DoPHITranslation(CurBB, PredBB));
   }
 
-  /// \brief The maximum alignment for instructions.
+  /// The maximum alignment for instructions.
   ///
   /// This is the greatest alignment value supported by load, store, and alloca
   /// instructions, and global values.
   static const unsigned MaxAlignmentExponent = 29;
   static const unsigned MaximumAlignment = 1u << MaxAlignmentExponent;
 
-  /// \brief Mutate the type of this Value to be of the specified type.
+  /// Mutate the type of this Value to be of the specified type.
   ///
   /// Note that this is an extremely dangerous operation which can create
   /// completely invalid IR very easily.  It is strongly recommended that you
@@ -609,17 +604,17 @@ public:
     VTy = Ty;
   }
 
-  /// \brief Sort the use-list.
+  /// Sort the use-list.
   ///
   /// Sorts the Value's use-list by Cmp using a stable mergesort.  Cmp is
   /// expected to compare two \a Use references.
   template <class Compare> void sortUseList(Compare Cmp);
 
-  /// \brief Reverse the use-list.
+  /// Reverse the use-list.
   void reverseUseList();
 
 private:
-  /// \brief Merge two lists together.
+  /// Merge two lists together.
   ///
   /// Merges \c L and \c R using \c Cmp.  To enable stable sorts, always pushes
   /// "equal" items from L before items from R.
diff --git a/include/llvm/IR/ValueHandle.h b/include/llvm/IR/ValueHandle.h
index b45cc7b6dc02..d94472ce1be1 100644
--- a/include/llvm/IR/ValueHandle.h
+++ b/include/llvm/IR/ValueHandle.h
@@ -22,7 +22,7 @@
 
 namespace llvm {
 
-/// \brief This is the common base class of value handles.
+/// This is the common base class of value handles.
 ///
 /// ValueHandle's are smart pointers to Value's that have special behavior when
 /// the value is deleted or ReplaceAllUsesWith'd.  See the specific handles
@@ -31,7 +31,7 @@ class ValueHandleBase {
   friend class Value;
 
 protected:
-  /// \brief This indicates what sub class the handle actually is.
+  /// This indicates what sub class the handle actually is.
   ///
   /// This is to avoid having a vtable for the light-weight handle pointers. The
   /// fully general Callback version does have a vtable.
@@ -101,10 +101,10 @@ protected:
            V != DenseMapInfo<Value *>::getTombstoneKey();
   }
 
-  /// \brief Remove this ValueHandle from its current use list.
+  /// Remove this ValueHandle from its current use list.
   void RemoveFromUseList();
 
-  /// \brief Clear the underlying pointer without clearing the use list.
+  /// Clear the underlying pointer without clearing the use list.
   ///
   /// This should only be used if a derived class has manually removed the
   /// handle from the use list.
@@ -121,20 +121,20 @@ private:
   HandleBaseKind getKind() const { return PrevPair.getInt(); }
   void setPrevPtr(ValueHandleBase **Ptr) { PrevPair.setPointer(Ptr); }
 
-  /// \brief Add this ValueHandle to the use list for V.
+  /// Add this ValueHandle to the use list for V.
   ///
   /// List is the address of either the head of the list or a Next node within
   /// the existing use list.
   void AddToExistingUseList(ValueHandleBase **List);
 
-  /// \brief Add this ValueHandle to the use list after Node.
+  /// Add this ValueHandle to the use list after Node.
   void AddToExistingUseListAfter(ValueHandleBase *Node);
 
-  /// \brief Add this ValueHandle to the use list for V.
+  /// Add this ValueHandle to the use list for V.
   void AddToUseList();
 };
 
-/// \brief A nullable Value handle that is nullable.
+/// A nullable Value handle that is nullable.
 ///
 /// This is a value handle that points to a value, and nulls itself
 /// out if that value is deleted.
@@ -172,7 +172,7 @@ template <> struct simplify_type<const WeakVH> {
   static SimpleType getSimplifiedValue(const WeakVH &WVH) { return WVH; }
 };
 
-/// \brief Value handle that is nullable, but tries to track the Value.
+/// Value handle that is nullable, but tries to track the Value.
 ///
 /// This is a value handle that tries hard to point to a Value, even across
 /// RAUW operations, but will null itself out if the value is destroyed.  this
@@ -219,7 +219,7 @@ template <> struct simplify_type<const WeakTrackingVH> {
   }
 };
 
-/// \brief Value handle that asserts if the Value is deleted.
+/// Value handle that asserts if the Value is deleted.
 ///
 /// This is a Value Handle that points to a value and asserts out if the value
 /// is destroyed while the handle is still live.  This is very useful for
@@ -318,7 +318,7 @@ struct isPodLike<AssertingVH<T>> {
 #endif
 };
 
-/// \brief Value handle that tracks a Value across RAUW.
+/// Value handle that tracks a Value across RAUW.
 ///
 /// TrackingVH is designed for situations where a client needs to hold a handle
 /// to a Value (or subclass) across some operations which may move that value,
@@ -379,7 +379,7 @@ public:
   ValueTy &operator*() const { return *getValPtr(); }
 };
 
-/// \brief Value handle with callbacks on RAUW and destruction.
+/// Value handle with callbacks on RAUW and destruction.
 ///
 /// This is a value handle that allows subclasses to define callbacks that run
 /// when the underlying Value has RAUW called on it or is destroyed.  This
@@ -405,7 +405,7 @@ public:
     return getValPtr();
   }
 
-  /// \brief Callback for Value destruction.
+  /// Callback for Value destruction.
   ///
   /// Called when this->getValPtr() is destroyed, inside ~Value(), so you
   /// may call any non-virtual Value method on getValPtr(), but no subclass
@@ -418,7 +418,7 @@ public:
   /// Value that's being destroyed.
   virtual void deleted() { setValPtr(nullptr); }
 
-  /// \brief Callback for Value RAUW.
+  /// Callback for Value RAUW.
   ///
   /// Called when this->getValPtr()->replaceAllUsesWith(new_value) is called,
   /// _before_ any of the uses have actually been replaced.  If WeakTrackingVH
diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h
index 11d5823ee479..e7e33918a613 100644
--- a/include/llvm/IR/ValueMap.h
+++ b/include/llvm/IR/ValueMap.h
@@ -106,8 +106,12 @@ public:
       : Map(NumInitBuckets), Data() {}
   explicit ValueMap(const ExtraData &Data, unsigned NumInitBuckets = 64)
       : Map(NumInitBuckets), Data(Data) {}
+  // ValueMap can't be copied nor moved, beucase the callbacks store pointer
+  // to it.
   ValueMap(const ValueMap &) = delete;
+  ValueMap(ValueMap &&) = delete;
   ValueMap &operator=(const ValueMap &) = delete;
+  ValueMap &operator=(ValueMap &&) = delete;
 
   bool hasMD() const { return bool(MDMap); }
   MDMapT &MD() {
diff --git a/include/llvm/IR/ValueSymbolTable.h b/include/llvm/IR/ValueSymbolTable.h
index 26cbbfabfc0c..012e717c7470 100644
--- a/include/llvm/IR/ValueSymbolTable.h
+++ b/include/llvm/IR/ValueSymbolTable.h
@@ -48,13 +48,13 @@ class ValueSymbolTable {
 /// @name Types
 /// @{
 public:
-  /// @brief A mapping of names to values.
+  /// A mapping of names to values.
   using ValueMap = StringMap<Value*>;
 
-  /// @brief An iterator over a ValueMap.
+  /// An iterator over a ValueMap.
   using iterator = ValueMap::iterator;
 
-  /// @brief A const_iterator over a ValueMap.
+  /// A const_iterator over a ValueMap.
   using const_iterator = ValueMap::const_iterator;
 
 /// @}
@@ -71,35 +71,35 @@ public:
   /// This method finds the value with the given \p Name in the
   /// the symbol table.
   /// @returns the value associated with the \p Name
-  /// @brief Lookup a named Value.
+  /// Lookup a named Value.
   Value *lookup(StringRef Name) const { return vmap.lookup(Name); }
 
   /// @returns true iff the symbol table is empty
-  /// @brief Determine if the symbol table is empty
+  /// Determine if the symbol table is empty
   inline bool empty() const { return vmap.empty(); }
 
-  /// @brief The number of name/type pairs is returned.
+  /// The number of name/type pairs is returned.
   inline unsigned size() const { return unsigned(vmap.size()); }
 
   /// This function can be used from the debugger to display the
   /// content of the symbol table while debugging.
-  /// @brief Print out symbol table on stderr
+  /// Print out symbol table on stderr
   void dump() const;
 
 /// @}
 /// @name Iteration
 /// @{
 
-  /// @brief Get an iterator that from the beginning of the symbol table.
+  /// Get an iterator that from the beginning of the symbol table.
   inline iterator begin() { return vmap.begin(); }
 
-  /// @brief Get a const_iterator that from the beginning of the symbol table.
+  /// Get a const_iterator that from the beginning of the symbol table.
   inline const_iterator begin() const { return vmap.begin(); }
 
-  /// @brief Get an iterator to the end of the symbol table.
+  /// Get an iterator to the end of the symbol table.
   inline iterator end() { return vmap.end(); }
 
-  /// @brief Get a const_iterator to the end of the symbol table.
+  /// Get a const_iterator to the end of the symbol table.
   inline const_iterator end() const { return vmap.end(); }
 
   /// @}
@@ -111,7 +111,7 @@ private:
   /// This method adds the provided value \p N to the symbol table.  The Value
   /// must have a name which is used to place the value in the symbol table.
   /// If the inserted name conflicts, this renames the value.
-  /// @brief Add a named value to the symbol table
+  /// Add a named value to the symbol table
   void reinsertValue(Value *V);
 
   /// createValueName - This method attempts to create a value name and insert
diff --git a/include/llvm/IR/Verifier.h b/include/llvm/IR/Verifier.h
index bc10f330bc8a..7255132e1e65 100644
--- a/include/llvm/IR/Verifier.h
+++ b/include/llvm/IR/Verifier.h
@@ -80,7 +80,7 @@ public:
   bool visitTBAAMetadata(Instruction &I, const MDNode *MD);
 };
 
-/// \brief Check a function for errors, useful for use when debugging a
+/// Check a function for errors, useful for use when debugging a
 /// pass.
 ///
 /// If there are no errors, the function returns false. If an error is found,
@@ -88,7 +88,7 @@ public:
 /// returned.
 bool verifyFunction(const Function &F, raw_ostream *OS = nullptr);
 
-/// \brief Check a module for errors.
+/// Check a module for errors.
 ///
 /// If there are no errors, the function returns false. If an error is
 /// found, a message describing the error is written to OS (if
@@ -124,7 +124,7 @@ public:
 /// "recovered" from by stripping the debug info.
 bool verifyModule(bool &BrokenDebugInfo, const Module &M, raw_ostream *OS);
 
-/// \brief Create a verifier pass.
+/// Create a verifier pass.
 ///
 /// Check a module or function for validity. This is essentially a pass wrapped
 /// around the above verifyFunction and verifyModule routines and