63 files changed, 3199 insertions, 2415 deletions
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index b791ba09adaf..c4b94ede4f55 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -22,7 +22,7 @@
 #include "llvm/IR/Instructions.h"
 
 namespace llvm {
-class GlobalVariable;
+class GlobalValue;
 class TargetLoweringBase;
 class TargetLowering;
 class TargetMachine;
@@ -31,10 +31,21 @@ class SDValue;
 class SelectionDAG;
 struct EVT;
 
-/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
-/// of insertvalue or extractvalue indices that identify a member, return
-/// the linearized index of the start of the member.
+/// \brief Compute the linearized index of a member in a nested
+/// aggregate/struct/array.
 ///
+/// Given an LLVM IR aggregate type and a sequence of insertvalue or
+/// extractvalue indices that identify a member, return the linearized index of
+/// the start of the member, i.e the number of element in memory before the
+/// seeked one. This is disconnected from the number of bytes.
+///
+/// \param Ty is the type indexed by \p Indices.
+/// \param Indices is an optional pointer in the indices list to the current
+/// index.
+/// \param IndicesEnd is the end of the indices list.
+/// \param CurIndex is the current index in the recursion.
+///
+/// \returns \p CurIndex plus the linear index in \p Ty  the indices list.
 unsigned ComputeLinearIndex(Type *Ty,
                             const unsigned *Indices,
                             const unsigned *IndicesEnd,
@@ -59,7 +70,7 @@ void ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
                      uint64_t StartingOffset = 0);
 
 /// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-GlobalVariable *ExtractTypeInfo(Value *V);
+GlobalValue *ExtractTypeInfo(Value *V);
 
 /// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
 /// processed uses a memory 'm' constraint.
@@ -97,6 +108,13 @@ bool returnTypeIsEligibleForTailCall(const Function *F,
                                      const ReturnInst *Ret,
                                      const TargetLoweringBase &TLI);
 
+// True if GV can be left out of the object symbol table. This is the case
+// for linkonce_odr values whose address is not significant. While legal, it is
+// not normally profitable to omit them from the .o symbol table. Using this
+// analysis makes sense when the information can be passed down to the linker
+// or we are in LTO.
+bool canBeOmittedFromSymbolTable(const GlobalValue *GV);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index e1c9a14c9009..e3ce57ad1850 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -44,6 +44,7 @@ class MachineModuleInfo;
 class MCAsmInfo;
 class MCCFIInstruction;
 class MCContext;
+class MCExpr;
 class MCInst;
 class MCInstrInfo;
 class MCSection;
@@ -132,6 +133,7 @@ public:
   virtual ~AsmPrinter();
 
   DwarfDebug *getDwarfDebug() { return DD; }
+  DwarfDebug *getDwarfDebug() const { return DD; }
 
   /// Return true if assembly output should contain comments.
   ///
@@ -203,6 +205,8 @@ public:
 
   void emitCFIInstruction(const MachineInstr &MI);
 
+  void emitFrameAlloc(const MachineInstr &MI);
+
   enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
   CFIMoveType needsCFIMoves();
 
@@ -238,6 +242,9 @@ public:
   /// alignment (if present) and a comment describing it if appropriate.
   void EmitBasicBlockStart(const MachineBasicBlock &MBB) const;
 
+  /// Lower the specified LLVM Constant to an MCExpr.
+  const MCExpr *lowerConstant(const Constant *CV);
+
   /// \brief Print a general LLVM constant to the .s file.
   void EmitGlobalConstant(const Constant *CV);
 
@@ -264,6 +271,9 @@ public:
   /// function.
   virtual void EmitFunctionBodyEnd() {}
 
+  /// Targets can override this to emit stuff at the end of a basic block.
+  virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB) {}
+
   /// Targets should implement this to emit instructions.
   virtual void EmitInstruction(const MachineInstr *) {
     llvm_unreachable("EmitInstruction not implemented");
@@ -346,12 +356,6 @@ public:
   void EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
                            unsigned Size) const;
 
-  /// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the
-  /// directive is specified by Size and Hi/Lo specify the labels.  This
-  /// implicitly uses .set if it is available.
-  void EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
-                                 const MCSymbol *Lo, unsigned Size) const;
-
   /// Emit something like ".long Label+Offset" where the size in bytes of the
   /// directive is specified by Size and Label specifies the label.  This
   /// implicitly uses .set if it is available.
@@ -402,6 +406,13 @@ public:
   /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
   virtual unsigned getISAEncoding() { return 0; }
 
+  /// Emit a dwarf register operation for describing
+  /// - a small value occupying only part of a register or
+  /// - a register representing only part of a value.
+  void EmitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits,
+                        unsigned OffsetInBits = 0) const;
+
+
   /// \brief Emit a partial DWARF register operation.
   /// \param MLoc             the register
   /// \param PieceSize        size and
@@ -418,7 +429,7 @@ public:
                            unsigned PieceSize = 0,
                            unsigned PieceOffset = 0) const;
 
-  /// Emit dwarf register operation.
+  /// EmitDwarfRegOp - Emit a dwarf register operation.
   /// \param Indirect   whether this is a register-indirect address
   virtual void EmitDwarfRegOp(ByteStreamer &BS, const MachineLocation &MLoc,
                               bool Indirect) const;
@@ -461,6 +472,10 @@ public:
                                      unsigned AsmVariant, const char *ExtraCode,
                                      raw_ostream &OS);
 
+  /// Let the target do anything it needs to do before emitting inlineasm.
+  /// \p StartInfo - the subtarget info before parsing inline asm
+  virtual void emitInlineAsmStart(const MCSubtargetInfo &StartInfo) const;
+
   /// Let the target do anything it needs to do after emitting inlineasm.
   /// This callback can be used restore the original mode in case the
   /// inlineasm contains directives to switch modes.
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index 0d79b1d41bdb..91fb0a9d7e77 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -30,8 +30,10 @@ namespace llvm {
   /// @param UseDefFreq Expected number of executed use and def instructions
   ///                   per function call. Derived from block frequencies.
   /// @param Size       Size of live interval as returnexd by getSize()
+  /// @param NumInstr   Number of instructions using this live interval
   ///
-  static inline float normalizeSpillWeight(float UseDefFreq, unsigned Size) {
+  static inline float normalizeSpillWeight(float UseDefFreq, unsigned Size,
+                                           unsigned NumInstr) {
     // The constant 25 instructions is added to avoid depending too much on
     // accidental SlotIndex gaps for small intervals. The effect is that small
     // intervals have a spill weight that is mostly proportional to the number
@@ -44,7 +46,7 @@ namespace llvm {
   /// spill weight and allocation hint.
   class VirtRegAuxInfo {
   public:
-    typedef float (*NormalizingFn)(float, unsigned);
+    typedef float (*NormalizingFn)(float, unsigned, unsigned);
 
   private:
     MachineFunction &MF;
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index abe00a167fd6..dd7703b1dbf7 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -35,18 +35,18 @@ public:
     SExt,      // The value is sign extended in the location.
     ZExt,      // The value is zero extended in the location.
     AExt,      // The value is extended with undefined upper bits.
-    BCvt,      // The value is bit-converted in the location.
-    VExt,      // The value is vector-widened in the location.
-               // FIXME: Not implemented yet. Code that uses AExt to mean
-               // vector-widen should be fixed to use VExt instead.
-    FPExt,     // The floating-point value is fp-extended in the location.
-    Indirect,  // The location contains pointer to the value.
     SExtUpper, // The value is in the upper bits of the location and should be
                // sign extended when retrieved.
     ZExtUpper, // The value is in the upper bits of the location and should be
                // zero extended when retrieved.
-    AExtUpper  // The value is in the upper bits of the location and should be
+    AExtUpper, // The value is in the upper bits of the location and should be
                // extended with undefined upper bits when retrieved.
+    BCvt,      // The value is bit-converted in the location.
+    VExt,      // The value is vector-widened in the location.
+               // FIXME: Not implemented yet. Code that uses AExt to mean
+               // vector-widen should be fixed to use VExt instead.
+    FPExt,     // The floating-point value is fp-extended in the location.
+    Indirect   // The location contains pointer to the value.
     // TODO: a subset of the value is in the location.
   };
 
@@ -158,6 +158,16 @@ public:
   }
 };
 
+/// Describes a register that needs to be forwarded from the prologue to a
+/// musttail call.
+struct ForwardedRegister {
+  ForwardedRegister(unsigned VReg, MCPhysReg PReg, MVT VT)
+      : VReg(VReg), PReg(PReg), VT(VT) {}
+  unsigned VReg;
+  MCPhysReg PReg;
+  MVT VT;
+};
+
 /// CCAssignFn - This function assigns a location for Val, updating State to
 /// reflect the change.  It returns 'true' if it failed to handle Val.
 typedef bool CCAssignFn(unsigned ValNo, MVT ValVT,
@@ -184,7 +194,6 @@ private:
   CallingConv::ID CallingConv;
   bool IsVarArg;
   MachineFunction &MF;
-  const TargetMachine &TM;
   const TargetRegisterInfo &TRI;
   SmallVectorImpl<CCValAssign> &Locs;
   LLVMContext &Context;
@@ -248,15 +257,13 @@ protected:
 
 public:
   CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
-          const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
-          LLVMContext &C);
+          SmallVectorImpl<CCValAssign> &locs, LLVMContext &C);
 
   void addLoc(const CCValAssign &V) {
     Locs.push_back(V);
   }
 
   LLVMContext &getContext() const { return Context; }
-  const TargetMachine &getTarget() const { return TM; }
   MachineFunction &getMachineFunction() const { return MF; }
   CallingConv::ID getCallingConv() const { return CallingConv; }
   bool isVarArg() const { return IsVarArg; }
@@ -348,8 +355,12 @@ public:
   /// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive
   /// registers. If this is not possible, return zero. Otherwise, return the first
   /// register of the block that were allocated, marking the entire block as allocated.
-  unsigned AllocateRegBlock(const uint16_t *Regs, unsigned NumRegs, unsigned RegsRequired) {
-    for (unsigned StartIdx = 0; StartIdx <= NumRegs - RegsRequired; ++StartIdx) {
+  unsigned AllocateRegBlock(ArrayRef<uint16_t> Regs, unsigned RegsRequired) {
+    if (RegsRequired > Regs.size())
+      return 0;
+
+    for (unsigned StartIdx = 0; StartIdx <= Regs.size() - RegsRequired;
+         ++StartIdx) {
       bool BlockAvailable = true;
       // Check for already-allocated regs in this block
       for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) {
@@ -387,8 +398,8 @@ public:
   /// AllocateStack - Allocate a chunk of stack space with the specified size
   /// and alignment.
   unsigned AllocateStack(unsigned Size, unsigned Align) {
-    assert(Align && ((Align-1) & Align) == 0); // Align is power of 2.
-    StackOffset = ((StackOffset + Align-1) & ~(Align-1));
+    assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2.
+    StackOffset = ((StackOffset + Align - 1) & ~(Align - 1));
     unsigned Result = StackOffset;
     StackOffset += Size;
     MF.getFrameInfo()->ensureMaxAlignment(Align);
@@ -469,6 +480,19 @@ public:
     return PendingLocs;
   }
 
+  /// Compute the remaining unused register parameters that would be used for
+  /// the given value type. This is useful when varargs are passed in the
+  /// registers that normal prototyped parameters would be passed in, or for
+  /// implementing perfect forwarding.
+  void getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, MVT VT,
+                                   CCAssignFn Fn);
+
+  /// Compute the set of registers that need to be preserved and forwarded to
+  /// any musttail calls.
+  void analyzeMustTailForwardedRegisters(
+      SmallVectorImpl<ForwardedRegister> &Forwards, ArrayRef<MVT> RegParmTypes,
+      CCAssignFn Fn);
+
 private:
   /// MarkAllocated - Mark a register and all of its aliases as allocated.
   void MarkAllocated(unsigned Reg);
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
index 449d93418a4c..973c5954f9ad 100644
--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -54,6 +54,16 @@ RelocModel("relocation-model",
                       "Relocatable external references, non-relocatable code"),
               clEnumValEnd));
 
+cl::opt<ThreadModel::Model>
+TMModel("thread-model",
+        cl::desc("Choose threading model"),
+        cl::init(ThreadModel::POSIX),
+        cl::values(clEnumValN(ThreadModel::POSIX, "posix",
+                              "POSIX thread model"),
+                   clEnumValN(ThreadModel::Single, "single",
+                              "Single thread model"),
+                   clEnumValEnd));
+
 cl::opt<llvm::CodeModel::Model>
 CMModel("code-model",
         cl::desc("Choose code model"),
@@ -83,11 +93,6 @@ FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
              clEnumValEnd));
 
 cl::opt<bool>
-DisableRedZone("disable-red-zone",
-               cl::desc("Do not emit code that uses the red zone."),
-               cl::init(false));
-
-cl::opt<bool>
 EnableFPMAD("enable-fp-mad",
             cl::desc("Enable less precise MAD instructions to be generated"),
             cl::init(false));
@@ -180,8 +185,8 @@ EnablePIE("enable-pie",
           cl::init(false));
 
 cl::opt<bool>
-UseInitArray("use-init-array",
-             cl::desc("Use .init_array instead of .ctors."),
+UseCtors("use-ctors",
+             cl::desc("Use .ctors instead of .init_array."),
              cl::init(false));
 
 cl::opt<std::string> StopAfter("stop-after",
@@ -217,6 +222,44 @@ JTableType("jump-table-type",
                          "Create one table per unique function type."),
               clEnumValEnd));
 
+cl::opt<bool>
+FCFI("fcfi",
+     cl::desc("Apply forward-edge control-flow integrity"),
+     cl::init(false));
+
+cl::opt<llvm::CFIntegrity>
+CFIType("cfi-type",
+        cl::desc("Choose the type of Control-Flow Integrity check to add"),
+        cl::init(CFIntegrity::Sub),
+        cl::values(
+            clEnumValN(CFIntegrity::Sub, "sub",
+                       "Subtract the pointer from the table base, then mask."),
+            clEnumValN(CFIntegrity::Ror, "ror",
+                       "Use rotate to check the offset from a table base."),
+            clEnumValN(CFIntegrity::Add, "add",
+                       "Mask out the high bits and add to an aligned base."),
+            clEnumValEnd));
+
+cl::opt<bool>
+CFIEnforcing("cfi-enforcing",
+             cl::desc("Enforce CFI or pass the violation to a function."),
+             cl::init(false));
+
+// Note that this option is linked to the cfi-enforcing option above: if
+// cfi-enforcing is set, then the cfi-func-name option is entirely ignored. If
+// cfi-enforcing is false and no cfi-func-name is set, then a default function
+// will be generated that ignores all CFI violations. The expected signature for
+// functions called with CFI violations is
+//
+// void (i8*, i8*)
+//
+// The first pointer is a C string containing the name of the function in which
+// the violation occurs, and the second pointer is the pointer that violated
+// CFI.
+cl::opt<std::string>
+CFIFuncName("cfi-func-name", cl::desc("The name of the CFI function to call"),
+            cl::init(""));
+
 // Common utility function tightly tied to the options listed here. Initializes
 // a TargetOptions object with CodeGen flags and returns it.
 static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
@@ -238,12 +281,18 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
   Options.StackAlignmentOverride = OverrideStackAlignment;
   Options.TrapFuncName = TrapFuncName;
   Options.PositionIndependentExecutable = EnablePIE;
-  Options.UseInitArray = UseInitArray;
+  Options.UseInitArray = !UseCtors;
   Options.DataSections = DataSections;
   Options.FunctionSections = FunctionSections;
 
   Options.MCOptions = InitMCTargetOptionsFromFlags();
   Options.JTType = JTableType;
+  Options.FCFI = FCFI;
+  Options.CFIType = CFIType;
+  Options.CFIEnforcing = CFIEnforcing;
+  Options.CFIFuncName = CFIFuncName;
+
+  Options.ThreadModel = TMModel;
 
   return Options;
 }
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index 9d25fd377b7e..f9cdc2a469ff 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -91,7 +91,6 @@ public:
 // API call is made to prune the dependence.
 class VLIWPacketizerList {
 protected:
-  const TargetMachine &TM;
   const MachineFunction &MF;
   const TargetInstrInfo *TII;
 
@@ -107,9 +106,7 @@ protected:
   std::map<MachineInstr*, SUnit*> MIToSUnit;
 
 public:
-  VLIWPacketizerList(
-    MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
-    bool IsPostRA);
+  VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, bool IsPostRA);
 
   virtual ~VLIWPacketizerList();
 
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
new file mode 100644
index 000000000000..e310aef3dcbb
--- /dev/null
+++ b/include/llvm/CodeGen/DIE.h
@@ -0,0 +1,587 @@
+//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Dwarf.h"
+#include <vector>
+
+namespace llvm {
+class AsmPrinter;
+class MCExpr;
+class MCSymbol;
+class raw_ostream;
+class DwarfTypeUnit;
+
+//===--------------------------------------------------------------------===//
+/// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a
+/// Dwarf abbreviation.
+class DIEAbbrevData {
+  /// Attribute - Dwarf attribute code.
+  ///
+  dwarf::Attribute Attribute;
+
+  /// Form - Dwarf form code.
+  ///
+  dwarf::Form Form;
+
+public:
+  DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {}
+
+  // Accessors.
+  dwarf::Attribute getAttribute() const { return Attribute; }
+  dwarf::Form getForm() const { return Form; }
+
+  /// Profile - Used to gather unique data for the abbreviation folding set.
+  ///
+  void Profile(FoldingSetNodeID &ID) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+/// information object.
+class DIEAbbrev : public FoldingSetNode {
+  /// Unique number for node.
+  ///
+  unsigned Number;
+
+  /// Tag - Dwarf tag code.
+  ///
+  dwarf::Tag Tag;
+
+  /// Children - Whether or not this node has children.
+  ///
+  // This cheats a bit in all of the uses since the values in the standard
+  // are 0 and 1 for no children and children respectively.
+  bool Children;
+
+  /// Data - Raw data bytes for abbreviation.
+  ///
+  SmallVector<DIEAbbrevData, 12> Data;
+
+public:
+  DIEAbbrev(dwarf::Tag T, bool C) : Tag(T), Children(C), Data() {}
+
+  // Accessors.
+  dwarf::Tag getTag() const { return Tag; }
+  unsigned getNumber() const { return Number; }
+  bool hasChildren() const { return Children; }
+  const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
+  void setChildrenFlag(bool hasChild) { Children = hasChild; }
+  void setNumber(unsigned N) { Number = N; }
+
+  /// AddAttribute - Adds another set of attribute information to the
+  /// abbreviation.
+  void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) {
+    Data.push_back(DIEAbbrevData(Attribute, Form));
+  }
+
+  /// Profile - Used to gather unique data for the abbreviation folding set.
+  ///
+  void Profile(FoldingSetNodeID &ID) const;
+
+  /// Emit - Print the abbreviation using the specified asm printer.
+  ///
+  void Emit(AsmPrinter *AP) const;
+
+#ifndef NDEBUG
+  void print(raw_ostream &O);
+  void dump();
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIE - A structured debug information entry.  Has an abbreviation which
+/// describes its organization.
+class DIEValue;
+
+class DIE {
+protected:
+  /// Offset - Offset in debug info section.
+  ///
+  unsigned Offset;
+
+  /// Size - Size of instance + children.
+  ///
+  unsigned Size;
+
+  /// Abbrev - Buffer for constructing abbreviation.
+  ///
+  DIEAbbrev Abbrev;
+
+  /// Children DIEs.
+  ///
+  // This can't be a vector<DIE> because pointer validity is requirent for the
+  // Parent pointer and DIEEntry.
+  // It can't be a list<DIE> because some clients need pointer validity before
+  // the object has been added to any child list
+  // (eg: DwarfUnit::constructVariableDIE). These aren't insurmountable, but may
+  // be more convoluted than beneficial.
+  std::vector<std::unique_ptr<DIE>> Children;
+
+  DIE *Parent;
+
+  /// Attribute values.
+  ///
+  SmallVector<DIEValue *, 12> Values;
+
+protected:
+  DIE()
+      : Offset(0), Size(0), Abbrev((dwarf::Tag)0, dwarf::DW_CHILDREN_no),
+        Parent(nullptr) {}
+
+public:
+  explicit DIE(dwarf::Tag Tag)
+      : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no),
+        Parent(nullptr) {}
+
+  // Accessors.
+  DIEAbbrev &getAbbrev() { return Abbrev; }
+  const DIEAbbrev &getAbbrev() const { return Abbrev; }
+  unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
+  dwarf::Tag getTag() const { return Abbrev.getTag(); }
+  unsigned getOffset() const { return Offset; }
+  unsigned getSize() const { return Size; }
+  const std::vector<std::unique_ptr<DIE>> &getChildren() const {
+    return Children;
+  }
+  const SmallVectorImpl<DIEValue *> &getValues() const { return Values; }
+  DIE *getParent() const { return Parent; }
+  /// Climb up the parent chain to get the compile or type unit DIE this DIE
+  /// belongs to.
+  const DIE *getUnit() const;
+  /// Similar to getUnit, returns null when DIE is not added to an
+  /// owner yet.
+  const DIE *getUnitOrNull() const;
+  void setOffset(unsigned O) { Offset = O; }
+  void setSize(unsigned S) { Size = S; }
+
+  /// addValue - Add a value and attributes to a DIE.
+  ///
+  void addValue(dwarf::Attribute Attribute, dwarf::Form Form, DIEValue *Value) {
+    Abbrev.AddAttribute(Attribute, Form);
+    Values.push_back(Value);
+  }
+
+  /// addChild - Add a child to the DIE.
+  ///
+  void addChild(std::unique_ptr<DIE> Child) {
+    assert(!Child->getParent());
+    Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+    Child->Parent = this;
+    Children.push_back(std::move(Child));
+  }
+
+  /// findAttribute - Find a value in the DIE with the attribute given,
+  /// returns NULL if no such attribute exists.
+  DIEValue *findAttribute(dwarf::Attribute Attribute) const;
+
+#ifndef NDEBUG
+  void print(raw_ostream &O, unsigned IndentCount = 0) const;
+  void dump();
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEValue - A debug information entry value. Some of these roughly correlate
+/// to DWARF attribute classes.
+///
+class DIEValue {
+  virtual void anchor();
+
+public:
+  enum Type {
+    isInteger,
+    isString,
+    isExpr,
+    isLabel,
+    isDelta,
+    isEntry,
+    isTypeSignature,
+    isBlock,
+    isLoc,
+    isLocList,
+  };
+
+protected:
+  /// Ty - Type of data stored in the value.
+  ///
+  Type Ty;
+
+  explicit DIEValue(Type T) : Ty(T) {}
+  virtual ~DIEValue() {}
+
+public:
+  // Accessors
+  Type getType() const { return Ty; }
+
+  /// EmitValue - Emit value via the Dwarf writer.
+  ///
+  virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0;
+
+  /// SizeOf - Return the size of a value in bytes.
+  ///
+  virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0;
+
+#ifndef NDEBUG
+  virtual void print(raw_ostream &O) const = 0;
+  void dump() const;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEInteger - An integer value DIE.
+///
+class DIEInteger : public DIEValue {
+  uint64_t Integer;
+
+public:
+  explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+  /// BestForm - Choose the best form for integer.
+  ///
+  static dwarf::Form BestForm(bool IsSigned, uint64_t Int) {
+    if (IsSigned) {
+      const int64_t SignedInt = Int;
+      if ((char)Int == SignedInt)
+        return dwarf::DW_FORM_data1;
+      if ((short)Int == SignedInt)
+        return dwarf::DW_FORM_data2;
+      if ((int)Int == SignedInt)
+        return dwarf::DW_FORM_data4;
+    } else {
+      if ((unsigned char)Int == Int)
+        return dwarf::DW_FORM_data1;
+      if ((unsigned short)Int == Int)
+        return dwarf::DW_FORM_data2;
+      if ((unsigned int)Int == Int)
+        return dwarf::DW_FORM_data4;
+    }
+    return dwarf::DW_FORM_data8;
+  }
+
+  /// EmitValue - Emit integer of appropriate size.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  uint64_t getValue() const { return Integer; }
+
+  /// SizeOf - Determine size of integer value in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEExpr - An expression DIE.
+//
+class DIEExpr : public DIEValue {
+  const MCExpr *Expr;
+
+public:
+  explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {}
+
+  /// EmitValue - Emit expression value.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// getValue - Get MCExpr.
+  ///
+  const MCExpr *getValue() const { return Expr; }
+
+  /// SizeOf - Determine size of expression value in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) { return E->getType() == isExpr; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIELabel - A label DIE.
+//
+class DIELabel : public DIEValue {
+  const MCSymbol *Label;
+
+public:
+  explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
+
+  /// EmitValue - Emit label value.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// getValue - Get MCSymbol.
+  ///
+  const MCSymbol *getValue() const { return Label; }
+
+  /// SizeOf - Determine size of label value in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEDelta - A simple label difference DIE.
+///
+class DIEDelta : public DIEValue {
+  const MCSymbol *LabelHi;
+  const MCSymbol *LabelLo;
+
+public:
+  DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
+      : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+  /// EmitValue - Emit delta value.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// SizeOf - Determine size of delta value in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEString - A container for string values.
+///
+class DIEString : public DIEValue {
+  const DIEValue *Access;
+  StringRef Str;
+
+public:
+  DIEString(const DIEValue *Acc, StringRef S)
+      : DIEValue(isString), Access(Acc), Str(S) {}
+
+  /// getString - Grab the string out of the object.
+  StringRef getString() const { return Str; }
+
+  /// EmitValue - Emit delta value.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// SizeOf - Determine size of delta value in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *D) { return D->getType() == isString; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEEntry - A pointer to another debug information entry.  An instance of
+/// this class can also be used as a proxy for a debug information entry not
+/// yet defined (ie. types.)
+class DIEEntry : public DIEValue {
+  DIE &Entry;
+
+public:
+  explicit DIEEntry(DIE &E) : DIEValue(isEntry), Entry(E) {
+  }
+
+  DIE &getEntry() const { return Entry; }
+
+  /// EmitValue - Emit debug information entry offset.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// SizeOf - Determine size of debug information entry in bytes.
+  ///
+   unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override {
+    return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP)
+                                           : sizeof(int32_t);
+  }
+
+  /// Returns size of a ref_addr entry.
+  static unsigned getRefAddrSize(AsmPrinter *AP);
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// \brief A signature reference to a type unit.
+class DIETypeSignature : public DIEValue {
+  const DwarfTypeUnit &Unit;
+
+public:
+  explicit DIETypeSignature(const DwarfTypeUnit &Unit)
+      : DIEValue(isTypeSignature), Unit(Unit) {}
+
+  /// \brief Emit type unit signature.
+  void EmitValue(AsmPrinter *Asm, dwarf::Form Form) const override;
+
+  /// Returns size of a ref_sig8 entry.
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override {
+    assert(Form == dwarf::DW_FORM_ref_sig8);
+    return 8;
+  }
+
+  // \brief Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) {
+    return E->getType() == isTypeSignature;
+  }
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+  void dump() const;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIELoc - Represents an expression location.
+//
+class DIELoc : public DIEValue, public DIE {
+  mutable unsigned Size; // Size in bytes excluding size header.
+public:
+  DIELoc() : DIEValue(isLoc), Size(0) {}
+
+  /// ComputeSize - Calculate the size of the location expression.
+  ///
+  unsigned ComputeSize(AsmPrinter *AP) const;
+
+  /// BestForm - Choose the best form for data.
+  ///
+  dwarf::Form BestForm(unsigned DwarfVersion) const {
+    if (DwarfVersion > 3)
+      return dwarf::DW_FORM_exprloc;
+    // Pre-DWARF4 location expressions were blocks and not exprloc.
+    if ((unsigned char)Size == Size)
+      return dwarf::DW_FORM_block1;
+    if ((unsigned short)Size == Size)
+      return dwarf::DW_FORM_block2;
+    if ((unsigned int)Size == Size)
+      return dwarf::DW_FORM_block4;
+    return dwarf::DW_FORM_block;
+  }
+
+  /// EmitValue - Emit location data.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// SizeOf - Determine size of location data in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) { return E->getType() == isLoc; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEBlock - Represents a block of values.
+//
+class DIEBlock : public DIEValue, public DIE {
+  mutable unsigned Size; // Size in bytes excluding size header.
+public:
+  DIEBlock() : DIEValue(isBlock), Size(0) {}
+
+  /// ComputeSize - Calculate the size of the location expression.
+  ///
+  unsigned ComputeSize(AsmPrinter *AP) const;
+
+  /// BestForm - Choose the best form for data.
+  ///
+  dwarf::Form BestForm() const {
+    if ((unsigned char)Size == Size)
+      return dwarf::DW_FORM_block1;
+    if ((unsigned short)Size == Size)
+      return dwarf::DW_FORM_block2;
+    if ((unsigned int)Size == Size)
+      return dwarf::DW_FORM_block4;
+    return dwarf::DW_FORM_block;
+  }
+
+  /// EmitValue - Emit location data.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// SizeOf - Determine size of location data in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIELocList - Represents a pointer to a location list in the debug_loc
+/// section.
+//
+class DIELocList : public DIEValue {
+  // Index into the .debug_loc vector.
+  size_t Index;
+
+public:
+  DIELocList(size_t I) : DIEValue(isLocList), Index(I) {}
+
+  /// getValue - Grab the current index out.
+  size_t getValue() const { return Index; }
+
+  /// EmitValue - Emit location data.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// SizeOf - Determine size of location data in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) { return E->getType() == isLocList; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 0d1b1dc09560..1dca2ce1ab22 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -18,72 +18,52 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
 
-class AllocaInst;
-class Constant;
-class ConstantFP;
-class CallInst;
-class DataLayout;
-class FunctionLoweringInfo;
-class Instruction;
-class IntrinsicInst;
-class LoadInst;
-class MVT;
-class MachineConstantPool;
-class MachineFrameInfo;
-class MachineFunction;
-class MachineInstr;
-class MachineRegisterInfo;
-class TargetInstrInfo;
-class TargetLibraryInfo;
-class TargetLowering;
-class TargetMachine;
-class TargetRegisterClass;
-class TargetRegisterInfo;
-class User;
-class Value;
-
-/// This is a fast-path instruction selection class that generates poor code and
-/// doesn't support illegal types or non-trivial lowering, but runs quickly.
+/// \brief This is a fast-path instruction selection class that generates poor
+/// code and doesn't support illegal types or non-trivial lowering, but runs
+/// quickly.
 class FastISel {
-  public:
+public:
   struct ArgListEntry {
     Value *Val;
     Type *Ty;
-    bool isSExt     : 1;
-    bool isZExt     : 1;
-    bool isInReg    : 1;
-    bool isSRet     : 1;
-    bool isNest     : 1;
-    bool isByVal    : 1;
-    bool isInAlloca : 1;
-    bool isReturned : 1;
+    bool IsSExt : 1;
+    bool IsZExt : 1;
+    bool IsInReg : 1;
+    bool IsSRet : 1;
+    bool IsNest : 1;
+    bool IsByVal : 1;
+    bool IsInAlloca : 1;
+    bool IsReturned : 1;
     uint16_t Alignment;
 
     ArgListEntry()
-      : Val(nullptr), Ty(nullptr), isSExt(false), isZExt(false), isInReg(false),
-        isSRet(false), isNest(false), isByVal(false), isInAlloca(false),
-        isReturned(false), Alignment(0) { }
+        : Val(nullptr), Ty(nullptr), IsSExt(false), IsZExt(false),
+          IsInReg(false), IsSRet(false), IsNest(false), IsByVal(false),
+          IsInAlloca(false), IsReturned(false), Alignment(0) {}
 
+    /// \brief Set CallLoweringInfo attribute flags based on a call instruction
+    /// and called function attributes.
     void setAttributes(ImmutableCallSite *CS, unsigned AttrIdx);
   };
   typedef std::vector<ArgListEntry> ArgListTy;
 
   struct CallLoweringInfo {
     Type *RetTy;
-    bool RetSExt           : 1;
-    bool RetZExt           : 1;
-    bool IsVarArg          : 1;
-    bool IsInReg           : 1;
-    bool DoesNotReturn     : 1;
+    bool RetSExt : 1;
+    bool RetZExt : 1;
+    bool IsVarArg : 1;
+    bool IsInReg : 1;
+    bool DoesNotReturn : 1;
     bool IsReturnValueUsed : 1;
 
-    // IsTailCall should be modified by implementations of
-    // FastLowerCall that perform tail call conversions.
+    // \brief IsTailCall Should be modified by implementations of FastLowerCall
+    // that perform tail call conversions.
     bool IsTailCall;
 
     unsigned NumFixedArgs;
@@ -96,6 +76,8 @@ class FastISel {
     unsigned ResultReg;
     unsigned NumResultRegs;
 
+    bool IsPatchPoint;
+
     SmallVector<Value *, 16> OutVals;
     SmallVector<ISD::ArgFlagsTy, 16> OutFlags;
     SmallVector<unsigned, 16> OutRegs;
@@ -103,12 +85,11 @@ class FastISel {
     SmallVector<unsigned, 4> InRegs;
 
     CallLoweringInfo()
-      : RetTy(nullptr), RetSExt(false), RetZExt(false), IsVarArg(false),
-        IsInReg(false), DoesNotReturn(false), IsReturnValueUsed(true),
-        IsTailCall(false), NumFixedArgs(-1), CallConv(CallingConv::C),
-        Callee(nullptr), SymName(nullptr), CS(nullptr), Call(nullptr),
-        ResultReg(0), NumResultRegs(0)
-    {}
+        : RetTy(nullptr), RetSExt(false), RetZExt(false), IsVarArg(false),
+          IsInReg(false), DoesNotReturn(false), IsReturnValueUsed(true),
+          IsTailCall(false), NumFixedArgs(-1), CallConv(CallingConv::C),
+          Callee(nullptr), SymName(nullptr), CS(nullptr), Call(nullptr),
+          ResultReg(0), NumResultRegs(0), IsPatchPoint(false) {}
 
     CallLoweringInfo &setCallee(Type *ResultTy, FunctionType *FuncTy,
                                 const Value *Target, ArgListTy &&ArgsList,
@@ -124,8 +105,8 @@ class FastISel {
       RetZExt = Call.paramHasAttr(0, Attribute::ZExt);
 
       CallConv = Call.getCallingConv();
-      NumFixedArgs = FuncTy->getNumParams();
       Args = std::move(ArgsList);
+      NumFixedArgs = FuncTy->getNumParams();
 
       CS = &Call;
 
@@ -148,8 +129,8 @@ class FastISel {
       RetZExt = Call.paramHasAttr(0, Attribute::ZExt);
 
       CallConv = Call.getCallingConv();
-      NumFixedArgs = (FixedArgs == ~0U) ? FuncTy->getNumParams() : FixedArgs;
       Args = std::move(ArgsList);
+      NumFixedArgs = (FixedArgs == ~0U) ? FuncTy->getNumParams() : FixedArgs;
 
       CS = &Call;
 
@@ -162,8 +143,19 @@ class FastISel {
       RetTy = ResultTy;
       Callee = Target;
       CallConv = CC;
+      Args = std::move(ArgsList);
       NumFixedArgs = (FixedArgs == ~0U) ? Args.size() : FixedArgs;
+      return *this;
+    }
+
+    CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultTy,
+                                const char *Target, ArgListTy &&ArgsList,
+                                unsigned FixedArgs = ~0U) {
+      RetTy = ResultTy;
+      SymName = Target;
+      CallConv = CC;
       Args = std::move(ArgsList);
+      NumFixedArgs = (FixedArgs == ~0U) ? Args.size() : FixedArgs;
       return *this;
     }
 
@@ -172,10 +164,13 @@ class FastISel {
       return *this;
     }
 
-    ArgListTy &getArgs() {
-      return Args;
+    CallLoweringInfo &setIsPatchPoint(bool Value = true) {
+      IsPatchPoint = Value;
+      return *this;
     }
 
+    ArgListTy &getArgs() { return Args; }
+
     void clearOuts() {
       OutVals.clear();
       OutFlags.clear();
@@ -202,61 +197,64 @@ protected:
   const TargetLowering &TLI;
   const TargetRegisterInfo &TRI;
   const TargetLibraryInfo *LibInfo;
+  bool SkipTargetIndependentISel;
 
-  /// The position of the last instruction for materializing constants for use
-  /// in the current block. It resets to EmitStartPt when it makes sense (for
-  /// example, it's usually profitable to avoid function calls between the
+  /// \brief The position of the last instruction for materializing constants
+  /// for use in the current block. It resets to EmitStartPt when it makes sense
+  /// (for example, it's usually profitable to avoid function calls between the
   /// definition and the use)
   MachineInstr *LastLocalValue;
 
-  /// The top most instruction in the current block that is allowed for emitting
-  /// local variables. LastLocalValue resets to EmitStartPt when it makes sense
-  /// (for example, on function calls)
+  /// \brief The top most instruction in the current block that is allowed for
+  /// emitting local variables. LastLocalValue resets to EmitStartPt when it
+  /// makes sense (for example, on function calls)
   MachineInstr *EmitStartPt;
 
 public:
-  /// Return the position of the last instruction emitted for materializing
-  /// constants for use in the current block.
+  /// \brief Return the position of the last instruction emitted for
+  /// materializing constants for use in the current block.
   MachineInstr *getLastLocalValue() { return LastLocalValue; }
 
-  /// Update the position of the last instruction emitted for materializing
-  /// constants for use in the current block.
+  /// \brief Update the position of the last instruction emitted for
+  /// materializing constants for use in the current block.
   void setLastLocalValue(MachineInstr *I) {
     EmitStartPt = I;
     LastLocalValue = I;
   }
 
-  /// Set the current block to which generated machine instructions will be
-  /// appended, and clear the local CSE map.
+  /// \brief Set the current block to which generated machine instructions will
+  /// be appended, and clear the local CSE map.
   void startNewBlock();
 
-  /// Return current debug location information.
+  /// \brief Return current debug location information.
   DebugLoc getCurDebugLoc() const { return DbgLoc; }
-  
-  /// Do "fast" instruction selection for function arguments and append machine
-  /// instructions to the current block. Return true if it is successful.
-  bool LowerArguments();
 
-  /// Do "fast" instruction selection for the given LLVM IR instruction, and
-  /// append generated machine instructions to the current block. Return true if
-  /// selection was successful.
-  bool SelectInstruction(const Instruction *I);
+  /// \brief Do "fast" instruction selection for function arguments and append
+  /// the machine instructions to the current block. Returns true when
+  /// successful.
+  bool lowerArguments();
 
-  /// Do "fast" instruction selection for the given LLVM IR operator
+  /// \brief Do "fast" instruction selection for the given LLVM IR instruction
+  /// and append the generated machine instructions to the current block.
+  /// Returns true if selection was successful.
+  bool selectInstruction(const Instruction *I);
+
+  /// \brief Do "fast" instruction selection for the given LLVM IR operator
   /// (Instruction or ConstantExpr), and append generated machine instructions
   /// to the current block. Return true if selection was successful.
-  bool SelectOperator(const User *I, unsigned Opcode);
+  bool selectOperator(const User *I, unsigned Opcode);
 
-  /// Create a virtual register and arrange for it to be assigned the value for
-  /// the given LLVM value.
+  /// \brief Create a virtual register and arrange for it to be assigned the
+  /// value for the given LLVM value.
   unsigned getRegForValue(const Value *V);
 
-  /// Look up the value to see if its value is already cached in a register. It
-  /// may be defined by instructions across blocks or defined locally.
+  /// \brief Look up the value to see if its value is already cached in a
+  /// register. It may be defined by instructions across blocks or defined
+  /// locally.
   unsigned lookUpRegForValue(const Value *V);
 
-  /// This is a wrapper around getRegForValue that also takes care of truncating
-  /// or sign-extending the given getelementptr index value.
+  /// \brief This is a wrapper around getRegForValue that also takes care of
+  /// truncating or sign-extending the given getelementptr index value.
   std::pair<unsigned, bool> getRegForGEPIndex(const Value *V);
 
   /// \brief We're checking to see if we can fold \p LI into \p FoldInst. Note
@@ -284,11 +282,11 @@ public:
     return false;
   }
 
-  /// Reset InsertPt to prepare for inserting instructions into the current
-  /// block.
+  /// \brief Reset InsertPt to prepare for inserting instructions into the
+  /// current block.
   void recomputeInsertPt();
 
-  /// Remove all dead instructions between the I and E.
+  /// \brief Remove all dead instructions between the I and E.
   void removeDeadCode(MachineBasicBlock::iterator I,
                       MachineBasicBlock::iterator E);
 
@@ -297,221 +295,195 @@ public:
     DebugLoc DL;
   };
 
-  /// Prepare InsertPt to begin inserting instructions into the local value area
-  /// and return the old insert position.
+  /// \brief Prepare InsertPt to begin inserting instructions into the local
+  /// value area and return the old insert position.
   SavePoint enterLocalValueArea();
 
-  /// Reset InsertPt to the given old insert position.
+  /// \brief Reset InsertPt to the given old insert position.
   void leaveLocalValueArea(SavePoint Old);
 
   virtual ~FastISel();
 
 protected:
-  explicit FastISel(FunctionLoweringInfo &funcInfo,
-                    const TargetLibraryInfo *libInfo);
-
-  /// This method is called by target-independent code when the normal FastISel
-  /// process fails to select an instruction.  This gives targets a chance to
-  /// emit code for anything that doesn't fit into FastISel's framework. It
-  /// returns true if it was successful.
-  virtual bool TargetSelectInstruction(const Instruction *I) = 0;
-  
-  /// This method is called by target-independent code to do target specific
-  /// argument lowering. It returns true if it was successful.
-  virtual bool FastLowerArguments();
-
-  /// \brief This method is called by target-independent code to do target
+  explicit FastISel(FunctionLoweringInfo &FuncInfo,
+                    const TargetLibraryInfo *LibInfo,
+                    bool SkipTargetIndependentISel = false);
+
+  /// \brief This method is called by target-independent code when the normal
+  /// FastISel process fails to select an instruction. This gives targets a
+  /// chance to emit code for anything that doesn't fit into FastISel's
+  /// framework. It returns true if it was successful.
+  virtual bool fastSelectInstruction(const Instruction *I) = 0;
+
+  /// \brief This method is called by target-independent code to do target-
+  /// specific argument lowering. It returns true if it was successful.
+  virtual bool fastLowerArguments();
+
+  /// \brief This method is called by target-independent code to do target-
   /// specific call lowering. It returns true if it was successful.
-  virtual bool FastLowerCall(CallLoweringInfo &CLI);
+  virtual bool fastLowerCall(CallLoweringInfo &CLI);
 
-  /// \brief This method is called by target-independent code to do target
+  /// \brief This method is called by target-independent code to do target-
   /// specific intrinsic lowering. It returns true if it was successful.
-  virtual bool FastLowerIntrinsicCall(const IntrinsicInst *II);
+  virtual bool fastLowerIntrinsicCall(const IntrinsicInst *II);
 
-  /// This method is called by target-independent code to request that an
+  /// \brief This method is called by target-independent code to request that an
   /// instruction with the given type and opcode be emitted.
-  virtual unsigned FastEmit_(MVT VT,
-                             MVT RetVT,
-                             unsigned Opcode);
+  virtual unsigned fastEmit_(MVT VT, MVT RetVT, unsigned Opcode);
 
-  /// This method is called by target-independent code to request that an
+  /// \brief This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and register operand be emitted.
-  virtual unsigned FastEmit_r(MVT VT,
-                              MVT RetVT,
-                              unsigned Opcode,
-                              unsigned Op0, bool Op0IsKill);
+  virtual unsigned fastEmit_r(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0,
+                              bool Op0IsKill);
 
-  /// This method is called by target-independent code to request that an
+  /// \brief This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and register operands be emitted.
-  virtual unsigned FastEmit_rr(MVT VT,
-                               MVT RetVT,
-                               unsigned Opcode,
-                               unsigned Op0, bool Op0IsKill,
-                               unsigned Op1, bool Op1IsKill);
+  virtual unsigned fastEmit_rr(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0,
+                               bool Op0IsKill, unsigned Op1, bool Op1IsKill);
 
-  /// This method is called by target-independent code to request that an
+  /// \brief This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and register and immediate
-  /// operands be emitted.
-  virtual unsigned FastEmit_ri(MVT VT,
-                               MVT RetVT,
-                               unsigned Opcode,
-                               unsigned Op0, bool Op0IsKill,
-                               uint64_t Imm);
+  // operands be emitted.
+  virtual unsigned fastEmit_ri(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0,
+                               bool Op0IsKill, uint64_t Imm);
 
-  /// This method is called by target-independent code to request that an
+  /// \brief This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and register and floating-point
   /// immediate operands be emitted.
-  virtual unsigned FastEmit_rf(MVT VT,
-                               MVT RetVT,
-                               unsigned Opcode,
-                               unsigned Op0, bool Op0IsKill,
-                               const ConstantFP *FPImm);
+  virtual unsigned fastEmit_rf(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0,
+                               bool Op0IsKill, const ConstantFP *FPImm);
 
-  /// This method is called by target-independent code to request that an
+  /// \brief This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and register and immediate
   /// operands be emitted.
-  virtual unsigned FastEmit_rri(MVT VT,
-                                MVT RetVT,
-                                unsigned Opcode,
-                                unsigned Op0, bool Op0IsKill,
-                                unsigned Op1, bool Op1IsKill,
-                                uint64_t Imm);
-
-  /// \brief This method is a wrapper of FastEmit_ri.
-  /// 
+  virtual unsigned fastEmit_rri(MVT VT, MVT RetVT, unsigned Opcode,
+                                unsigned Op0, bool Op0IsKill, unsigned Op1,
+                                bool Op1IsKill, uint64_t Imm);
+
+  /// \brief This method is a wrapper of fastEmit_ri.
+  ///
   /// It first tries to emit an instruction with an immediate operand using
-  /// FastEmit_ri.  If that fails, it materializes the immediate into a register
-  /// and try FastEmit_rr instead.
-  unsigned FastEmit_ri_(MVT VT,
-                        unsigned Opcode,
-                        unsigned Op0, bool Op0IsKill,
+  /// fastEmit_ri.  If that fails, it materializes the immediate into a register
+  /// and try fastEmit_rr instead.
+  unsigned fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill,
                         uint64_t Imm, MVT ImmType);
 
-  /// This method is called by target-independent code to request that an
+  /// \brief This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and immediate operand be emitted.
-  virtual unsigned FastEmit_i(MVT VT,
-                              MVT RetVT,
-                              unsigned Opcode,
-                              uint64_t Imm);
+  virtual unsigned fastEmit_i(MVT VT, MVT RetVT, unsigned Opcode, uint64_t Imm);
 
-  /// This method is called by target-independent code to request that an
+  /// \brief This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and floating-point immediate
   /// operand be emitted.
-  virtual unsigned FastEmit_f(MVT VT,
-                              MVT RetVT,
-                              unsigned Opcode,
+  virtual unsigned fastEmit_f(MVT VT, MVT RetVT, unsigned Opcode,
                               const ConstantFP *FPImm);
 
-  /// Emit a MachineInstr with no operands and a result register in the given
-  /// register class.
-  unsigned FastEmitInst_(unsigned MachineInstOpcode,
+  /// \brief Emit a MachineInstr with no operands and a result register in the
+  /// given register class.
+  unsigned fastEmitInst_(unsigned MachineInstOpcode,
                          const TargetRegisterClass *RC);
 
-  /// Emit a MachineInstr with one register operand and a result register in the
-  /// given register class.
-  unsigned FastEmitInst_r(unsigned MachineInstOpcode,
-                          const TargetRegisterClass *RC,
-                          unsigned Op0, bool Op0IsKill);
-
-  /// Emit a MachineInstr with two register operands and a result register in
-  /// the given register class.
-  unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
-                           const TargetRegisterClass *RC,
-                           unsigned Op0, bool Op0IsKill,
-                           unsigned Op1, bool Op1IsKill);
-
-  /// Emit a MachineInstr with three register operands and a result register in
-  /// the given register class.
-  unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
-                           const TargetRegisterClass *RC,
-                           unsigned Op0, bool Op0IsKill,
-                           unsigned Op1, bool Op1IsKill,
-                           unsigned Op2, bool Op2IsKill);
-
-  /// Emit a MachineInstr with a register operand, an immediate, and a result
+  /// \brief Emit a MachineInstr with one register operand and a result register
+  /// in the given register class.
+  unsigned fastEmitInst_r(unsigned MachineInstOpcode,
+                          const TargetRegisterClass *RC, unsigned Op0,
+                          bool Op0IsKill);
+
+  /// \brief Emit a MachineInstr with two register operands and a result
+  /// register in the given register class.
+  unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
+                           const TargetRegisterClass *RC, unsigned Op0,
+                           bool Op0IsKill, unsigned Op1, bool Op1IsKill);
+
+  /// \brief Emit a MachineInstr with three register operands and a result
   /// register in the given register class.
-  unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
-                           const TargetRegisterClass *RC,
-                           unsigned Op0, bool Op0IsKill,
-                           uint64_t Imm);
-
-  /// Emit a MachineInstr with one register operand and two immediate operands.
-  unsigned FastEmitInst_rii(unsigned MachineInstOpcode,
-                           const TargetRegisterClass *RC,
-                           unsigned Op0, bool Op0IsKill,
-                           uint64_t Imm1, uint64_t Imm2);
-
-  /// Emit a MachineInstr with two register operands and a result register in
-  /// the given register class.
-  unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
-                           const TargetRegisterClass *RC,
-                           unsigned Op0, bool Op0IsKill,
-                           const ConstantFP *FPImm);
-
-  /// Emit a MachineInstr with two register operands, an immediate, and a result
+  unsigned fastEmitInst_rrr(unsigned MachineInstOpcode,
+                            const TargetRegisterClass *RC, unsigned Op0,
+                            bool Op0IsKill, unsigned Op1, bool Op1IsKill,
+                            unsigned Op2, bool Op2IsKill);
+
+  /// \brief Emit a MachineInstr with a register operand, an immediate, and a
+  /// result register in the given register class.
+  unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
+                           const TargetRegisterClass *RC, unsigned Op0,
+                           bool Op0IsKill, uint64_t Imm);
+
+  /// \brief Emit a MachineInstr with one register operand and two immediate
+  /// operands.
+  unsigned fastEmitInst_rii(unsigned MachineInstOpcode,
+                            const TargetRegisterClass *RC, unsigned Op0,
+                            bool Op0IsKill, uint64_t Imm1, uint64_t Imm2);
+
+  /// \brief Emit a MachineInstr with two register operands and a result
   /// register in the given register class.
-  unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
-                            const TargetRegisterClass *RC,
-                            unsigned Op0, bool Op0IsKill,
-                            unsigned Op1, bool Op1IsKill,
+  unsigned fastEmitInst_rf(unsigned MachineInstOpcode,
+                           const TargetRegisterClass *RC, unsigned Op0,
+                           bool Op0IsKill, const ConstantFP *FPImm);
+
+  /// \brief Emit a MachineInstr with two register operands, an immediate, and a
+  /// result register in the given register class.
+  unsigned fastEmitInst_rri(unsigned MachineInstOpcode,
+                            const TargetRegisterClass *RC, unsigned Op0,
+                            bool Op0IsKill, unsigned Op1, bool Op1IsKill,
                             uint64_t Imm);
 
-  /// Emit a MachineInstr with two register operands, two immediates operands,
-  /// and a result register in the given register class.
-  unsigned FastEmitInst_rrii(unsigned MachineInstOpcode,
-                             const TargetRegisterClass *RC,
-                             unsigned Op0, bool Op0IsKill,
-                             unsigned Op1, bool Op1IsKill,
+  /// \brief Emit a MachineInstr with two register operands, two immediates
+  /// operands, and a result register in the given register class.
+  unsigned fastEmitInst_rrii(unsigned MachineInstOpcode,
+                             const TargetRegisterClass *RC, unsigned Op0,
+                             bool Op0IsKill, unsigned Op1, bool Op1IsKill,
                              uint64_t Imm1, uint64_t Imm2);
 
-  /// Emit a MachineInstr with a single immediate operand, and a result register
-  /// in the given register class.
-  unsigned FastEmitInst_i(unsigned MachineInstrOpcode,
-                          const TargetRegisterClass *RC,
-                          uint64_t Imm);
-
-  /// Emit a MachineInstr with a two immediate operands.
-  unsigned FastEmitInst_ii(unsigned MachineInstrOpcode,
-                          const TargetRegisterClass *RC,
-                          uint64_t Imm1, uint64_t Imm2);
-
-  /// Emit a MachineInstr for an extract_subreg from a specified index of a
-  /// superregister to a specified type.
-  unsigned FastEmitInst_extractsubreg(MVT RetVT,
-                                      unsigned Op0, bool Op0IsKill,
+  /// \brief Emit a MachineInstr with a single immediate operand, and a result
+  /// register in the given register class.
+  unsigned fastEmitInst_i(unsigned MachineInstrOpcode,
+                          const TargetRegisterClass *RC, uint64_t Imm);
+
+  /// \brief Emit a MachineInstr with a two immediate operands.
+  unsigned fastEmitInst_ii(unsigned MachineInstrOpcode,
+                           const TargetRegisterClass *RC, uint64_t Imm1,
+                           uint64_t Imm2);
+
+  /// \brief Emit a MachineInstr for an extract_subreg from a specified index of
+  /// a superregister to a specified type.
+  unsigned fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill,
                                       uint32_t Idx);
 
-  /// Emit MachineInstrs to compute the value of Op with all but the least
-  /// significant bit set to zero.
-  unsigned FastEmitZExtFromI1(MVT VT,
-                              unsigned Op0, bool Op0IsKill);
+  /// \brief Emit MachineInstrs to compute the value of Op with all but the
+  /// least significant bit set to zero.
+  unsigned fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill);
 
-  /// Emit an unconditional branch to the given block, unless it is the
+  /// \brief Emit an unconditional branch to the given block, unless it is the
   /// immediate (fall-through) successor, and update the CFG.
-  void FastEmitBranch(MachineBasicBlock *MBB, DebugLoc DL);
+  void fastEmitBranch(MachineBasicBlock *MBB, DebugLoc DL);
 
-  void UpdateValueMap(const Value* I, unsigned Reg, unsigned NumRegs = 1);
+  /// \brief Update the value map to include the new mapping for this
+  /// instruction, or insert an extra copy to get the result in a previous
+  /// determined register.
+  ///
+  /// NOTE: This is only necessary because we might select a block that uses a
+  /// value before we select the block that defines the value. It might be
+  /// possible to fix this by selecting blocks in reverse postorder.
+  void updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs = 1);
 
   unsigned createResultReg(const TargetRegisterClass *RC);
 
-  /// Try to constrain Op so that it is usable by argument OpNum of the provided
-  /// MCInstrDesc. If this fails, create a new virtual register in the correct
-  /// class and COPY the value there.
+  /// \brief Try to constrain Op so that it is usable by argument OpNum of the
+  /// provided MCInstrDesc. If this fails, create a new virtual register in the
+  /// correct class and COPY the value there.
   unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
                                     unsigned OpNum);
 
-  /// Emit a constant in a register using target-specific logic, such as
+  /// \brief Emit a constant in a register using target-specific logic, such as
   /// constant pool loads.
-  virtual unsigned TargetMaterializeConstant(const Constant* C) {
-    return 0;
-  }
+  virtual unsigned fastMaterializeConstant(const Constant *C) { return 0; }
 
-  /// Emit an alloca address in a register using target-specific logic.
-  virtual unsigned TargetMaterializeAlloca(const AllocaInst* C) {
-    return 0;
-  }
+  /// \brief Emit an alloca address in a register using target-specific logic.
+  virtual unsigned fastMaterializeAlloca(const AllocaInst *C) { return 0; }
 
-  virtual unsigned TargetMaterializeFloatZero(const ConstantFP* CF) {
+  /// \brief Emit the floating-point constant +0.0 in a register using target-
+  /// specific logic.
+  virtual unsigned fastMaterializeFloatZero(const ConstantFP *CF) {
     return 0;
   }
 
@@ -524,36 +496,46 @@ protected:
   /// - \c Add has a constant operand.
   bool canFoldAddIntoGEP(const User *GEP, const Value *Add);
 
-  /// Test whether the given value has exactly one use.
-  bool hasTrivialKill(const Value *V) const;
+  /// \brief Test whether the given value has exactly one use.
+  bool hasTrivialKill(const Value *V);
 
   /// \brief Create a machine mem operand from the given instruction.
   MachineMemOperand *createMachineMemOperandFor(const Instruction *I) const;
 
-  bool LowerCallTo(const CallInst *CI, const char *SymName, unsigned NumArgs);
-  bool LowerCallTo(CallLoweringInfo &CLI);
-
-private:
-  bool SelectBinaryOp(const User *I, unsigned ISDOpcode);
-
-  bool SelectFNeg(const User *I);
-
-  bool SelectGetElementPtr(const User *I);
+  CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) const;
 
-  bool SelectStackmap(const CallInst *I);
-  bool SelectPatchpoint(const CallInst *I);
-  bool LowerCall(const CallInst *I);
-  bool SelectCall(const User *Call);
-  bool SelectIntrinsicCall(const IntrinsicInst *II);
+  bool lowerCallTo(const CallInst *CI, const char *SymName, unsigned NumArgs);
+  bool lowerCallTo(CallLoweringInfo &CLI);
 
-  bool SelectBitCast(const User *I);
-
-  bool SelectCast(const User *I, unsigned Opcode);
+  bool isCommutativeIntrinsic(IntrinsicInst const *II) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::sadd_with_overflow:
+    case Intrinsic::uadd_with_overflow:
+    case Intrinsic::smul_with_overflow:
+    case Intrinsic::umul_with_overflow:
+      return true;
+    default:
+      return false;
+    }
+  }
 
-  bool SelectExtractValue(const User *I);
 
-  bool SelectInsertValue(const User *I);
+  bool lowerCall(const CallInst *I);
+  /// \brief Select and emit code for a binary operator instruction, which has
+  /// an opcode which directly corresponds to the given ISD opcode.
+  bool selectBinaryOp(const User *I, unsigned ISDOpcode);
+  bool selectFNeg(const User *I);
+  bool selectGetElementPtr(const User *I);
+  bool selectStackmap(const CallInst *I);
+  bool selectPatchpoint(const CallInst *I);
+  bool selectCall(const User *Call);
+  bool selectIntrinsicCall(const IntrinsicInst *II);
+  bool selectBitCast(const User *I);
+  bool selectCast(const User *I, unsigned Opcode);
+  bool selectExtractValue(const User *I);
+  bool selectInsertValue(const User *I);
 
+private:
   /// \brief Handle PHI nodes in successor blocks.
   ///
   /// Emit code to ensure constants are copied into registers when needed.
@@ -561,18 +543,27 @@ private:
   /// nodes as input.  We cannot just directly add them, because expansion might
   /// result in multiple MBB's for one BB.  As such, the start of the BB might
   /// correspond to a different MBB than the end.
-  bool HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+  bool handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+  /// \brief Helper for materializeRegForValue to materialize a constant in a
+  /// target-independent way.
+  unsigned materializeConstant(const Value *V, MVT VT);
 
-  /// Helper for getRegForVale. This function is called when the value isn't
-  /// already available in a register and must be materialized with new
+  /// \brief Helper for getRegForVale. This function is called when the value
+  /// isn't already available in a register and must be materialized with new
   /// instructions.
   unsigned materializeRegForValue(const Value *V, MVT VT);
 
-  /// Clears LocalValueMap and moves the area for the new local variables to the
-  /// beginning of the block. It helps to avoid spilling cached variables across
-  /// heavy instructions like calls.
+  /// \brief Clears LocalValueMap and moves the area for the new local variables
+  /// to the beginning of the block. It helps to avoid spilling cached variables
+  /// across heavy instructions like calls.
   void flushLocalValueMap();
 
+  /// \brief Insertion point before trying to select the current instruction.
+  MachineBasicBlock::iterator SavedInsertPt;
+
+  /// \brief Add a stackmap or patchpoint intrinsic call's live variable
+  /// operands to a stackmap or patchpoint machine instruction.
   bool addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
                            const CallInst *CI, unsigned StartIdx);
   bool lowerCallOperands(const CallInst *CI, unsigned ArgIdx, unsigned NumArgs,
@@ -580,6 +571,6 @@ private:
                          CallLoweringInfo &CLI);
 };
 
-}
+} // end namespace llvm
 
 #endif
diff --git a/include/llvm/CodeGen/ForwardControlFlowIntegrity.h b/include/llvm/CodeGen/ForwardControlFlowIntegrity.h
new file mode 100644
index 000000000000..ec8e2ef243b7
--- /dev/null
+++ b/include/llvm/CodeGen/ForwardControlFlowIntegrity.h
@@ -0,0 +1,122 @@
+//===-- ForwardControlFlowIntegrity.h: Forward-Edge CFI ---------*- C++ -*-===//
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments indirect calls with checks to ensure that these calls
+// pass through the appropriate jump-instruction table generated by
+// JumpInstrTables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_FORWARDCONTROLFLOWINTEGRITY_H
+#define LLVM_CODEGEN_FORWARDCONTROLFLOWINTEGRITY_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetOptions.h"
+#include <string>
+
+namespace llvm {
+
+class AnalysisUsage;
+class BasicBlock;
+class Constant;
+class Function;
+class Instruction;
+class Module;
+class Value;
+
+/// ForwardControlFlowIntegrity uses the information from JumpInstrTableInfo to
+/// prepend checks to indirect calls to make sure that these calls target valid
+/// locations.
+class ForwardControlFlowIntegrity : public ModulePass {
+public:
+  static char ID;
+
+  ForwardControlFlowIntegrity();
+  ForwardControlFlowIntegrity(JumpTable::JumpTableType JTT,
+                              CFIntegrity CFIType,
+                              bool CFIEnforcing, std::string CFIFuncName);
+  ~ForwardControlFlowIntegrity() override;
+
+  /// Runs the CFI pass on a given module. This works best if the module in
+  /// question is the result of link-time optimization (see lib/LTO).
+  bool runOnModule(Module &M) override;
+  const char *getPassName() const override {
+    return "Forward Control-Flow Integrity";
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+  typedef SmallVector<Instruction *, 64> CallSet;
+
+  /// A structure that is used to keep track of constant table information.
+  struct CFIConstants {
+    Constant *StartValue;
+    Constant *MaskValue;
+    Constant *Size;
+  };
+
+  /// A map from function type to the base of the table for this type and a mask
+  /// for the table
+  typedef DenseMap<FunctionType *, CFIConstants> CFITables;
+
+  CallSet IndirectCalls;
+
+  /// The type of jumptable implementation.
+  JumpTable::JumpTableType JTType;
+
+  /// The type of CFI check to add before each indirect call.
+  CFIntegrity CFIType;
+
+  /// A value that controls whether or not CFI violations cause a halt.
+  bool CFIEnforcing;
+
+  /// The name of the function to call in case of a CFI violation when
+  /// CFIEnforcing is false. There is a default function that ignores
+  /// violations.
+  std::string CFIFuncName;
+
+  /// The alignment of each entry in the table, from JumpInstrTableInfo. The
+  /// JumpInstrTableInfo class always makes this a power of two.
+  uint64_t ByteAlignment;
+
+  /// The base-2 logarithm of ByteAlignment, needed for some of the transforms
+  /// (like CFIntegrity::Ror)
+  unsigned LogByteAlignment;
+
+  /// Adds checks to each indirect call site to make sure that it is calling a
+  /// function in our jump table.
+  void updateIndirectCalls(Module &M, CFITables &CFIT);
+
+  /// Walks the instructions to find all the indirect calls.
+  void getIndirectCalls(Module &M);
+
+  /// Adds a function that handles violations in non-enforcing mode
+  /// (!CFIEnforcing). The default warning function simply returns, since the
+  /// exact details of how to handle CFI violations depend on the application.
+  void addWarningFunction(Module &M);
+
+  /// Rewrites a function pointer in a call/invoke instruction to force it into
+  /// a table.
+  void rewriteFunctionPointer(Module &M, Instruction *I, Value *FunPtr,
+                              Constant *JumpTableStart, Constant *JumpTableMask,
+                              Constant *JumpTableSize);
+
+  /// Inserts a check and a call to a warning function at a given instruction
+  /// that must be an indirect call.
+  void insertWarning(Module &M, BasicBlock *Block, Instruction *I,
+                     Value *FunPtr);
+};
+
+ModulePass *
+createForwardControlFlowIntegrityPass(JumpTable::JumpTableType JTT,
+                                      CFIntegrity CFIType,
+                                      bool CFIEnforcing, StringRef CFIFuncName);
+}
+
+#endif // LLVM_CODEGEN_FORWARDCONTROLFLOWINTEGRITY_H
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 9636b51e303d..7c574df4ba41 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
@@ -50,10 +51,10 @@ class Value;
 /// function that is used when lowering a region of the function.
 ///
 class FunctionLoweringInfo {
-  const TargetMachine &TM;
 public:
   const Function *Fn;
   MachineFunction *MF;
+  const TargetLowering *TLI;
   MachineRegisterInfo *RegInfo;
   BranchProbabilityInfo *BPI;
   /// CanLowerReturn - true iff the function's return value can be lowered to
@@ -87,6 +88,12 @@ public:
   /// RegFixups - Registers which need to be replaced after isel is done.
   DenseMap<unsigned, unsigned> RegFixups;
 
+  /// StatepointStackSlots - A list of temporary stack slots (frame indices) 
+  /// used to spill values at a statepoint.  We store them here to enable
+  /// reuse of the same stack slots across different statepoints in different
+  /// basic blocks.
+  SmallVector<unsigned, 50> StatepointStackSlots;
+
   /// MBB - The current block.
   MachineBasicBlock *MBB;
 
@@ -106,6 +113,10 @@ public:
                     KnownZero(1, 0) {}
   };
 
+  /// Record the preferred extend type (ISD::SIGN_EXTEND or ISD::ZERO_EXTEND)
+  /// for a value.
+  DenseMap<const Value *, ISD::NodeType> PreferredExtendType;
+
   /// VisitedBBs - The set of basic blocks visited thus far by instruction
   /// selection.
   SmallPtrSet<const BasicBlock*, 4> VisitedBBs;
@@ -115,14 +126,13 @@ public:
   /// TODO: This isn't per-function state, it's per-basic-block state. But
   /// there's no other convenient place for it to live right now.
   std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+  unsigned OrigNumPHINodesToUpdate;
 
   /// If the current MBB is a landing pad, the exception pointer and exception
   /// selector registers are copied into these virtual registers by
   /// SelectionDAGISel::PrepareEHLandingPad().
   unsigned ExceptionPointerVirtReg, ExceptionSelectorVirtReg;
 
-  explicit FunctionLoweringInfo(const TargetMachine &TM) : TM(TM) {}
-
   /// set - Initialize this FunctionLoweringInfo with the given Function
   /// and its associated MachineFunction.
   ///
@@ -196,6 +206,9 @@ public:
       return;
 
     unsigned Reg = It->second;
+    if (Reg == 0)
+      return;
+
     LiveOutRegInfo.grow(Reg);
     LiveOutRegInfo[Reg].IsValid = false;
   }
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index ddcc823ecd9e..c7f1ab87fcb1 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -37,7 +37,6 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Pass.h"
-
 #include <memory>
 
 namespace llvm {
@@ -80,8 +79,8 @@ namespace llvm {
   };
 
 
-  /// GCFunctionInfo - Garbage collection metadata for a single function.
-  ///
+  /// Garbage collection metadata for a single function.  Currently, this
+  /// information only applies to GCStrategies which use GCRoot.
   class GCFunctionInfo {
   public:
     typedef std::vector<GCPoint>::iterator iterator;
@@ -160,21 +159,37 @@ namespace llvm {
     size_t live_size(const iterator &p) const { return roots_size(); }
   };
 
-
-  /// GCModuleInfo - Garbage collection metadata for a whole module.
-  ///
+  /// An analysis pass which caches information about the entire Module.
+  /// Records both the function level information used by GCRoots and a
+  /// cache of the 'active' gc strategy objects for the current Module.
   class GCModuleInfo : public ImmutablePass {
     typedef StringMap<GCStrategy*> strategy_map_type;
     typedef std::vector<std::unique_ptr<GCStrategy>> list_type;
-    typedef DenseMap<const Function*,GCFunctionInfo*> finfo_map_type;
 
     strategy_map_type StrategyMap;
     list_type StrategyList;
-    finfo_map_type FInfoMap;
 
     GCStrategy *getOrCreateStrategy(const Module *M, const std::string &Name);
 
   public:
+    /// List of per function info objects.  In theory, Each of these
+    /// may be associated with a different GC.
+    typedef std::vector<std::unique_ptr<GCFunctionInfo>> FuncInfoVec;
+
+    FuncInfoVec::iterator funcinfo_begin() { return Functions.begin(); }
+    FuncInfoVec::iterator funcinfo_end()   { return Functions.end();   }
+    
+
+  private:
+    /// Owning list of all GCFunctionInfos associated with this Module
+    FuncInfoVec Functions;
+
+    /// Non-owning map to bypass linear search when finding the GCFunctionInfo
+    /// associated with a particular Function.
+    typedef DenseMap<const Function*,GCFunctionInfo*> finfo_map_type;
+    finfo_map_type FInfoMap;
+  public:
+
     typedef list_type::const_iterator iterator;
 
     static char ID;
@@ -191,8 +206,9 @@ namespace llvm {
     iterator begin() const { return StrategyList.begin(); }
     iterator end()   const { return StrategyList.end();   }
 
-    /// get - Look up function metadata.
-    ///
+    /// get - Look up function metadata.  This is currently assumed
+    /// have the side effect of initializing the associated GCStrategy.  That
+    /// will soon change.
     GCFunctionInfo &getFunctionInfo(const Function &F);
   };
 
diff --git a/include/llvm/CodeGen/GCMetadataPrinter.h b/include/llvm/CodeGen/GCMetadataPrinter.h
index 4a6b5ac19c36..25fafba93f8b 100644
--- a/include/llvm/CodeGen/GCMetadataPrinter.h
+++ b/include/llvm/CodeGen/GCMetadataPrinter.h
@@ -32,16 +32,11 @@ namespace llvm {
   /// defaults from Registry.
   typedef Registry<GCMetadataPrinter> GCMetadataPrinterRegistry;
 
-  /// GCMetadataPrinter - Emits GC metadata as assembly code.
-  ///
+  /// GCMetadataPrinter - Emits GC metadata as assembly code.  Instances are
+  /// created, managed, and owned by the AsmPrinter.
   class GCMetadataPrinter {
-  public:
-    typedef GCStrategy::list_type list_type;
-    typedef GCStrategy::iterator iterator;
-
   private:
     GCStrategy *S;
-
     friend class AsmPrinter;
 
   protected:
@@ -55,16 +50,15 @@ namespace llvm {
 
   public:
     GCStrategy &getStrategy() { return *S; }
-    const Module &getModule() const { return S->getModule(); }
-
-    /// begin/end - Iterate over the collected function metadata.
-    iterator begin() { return S->begin(); }
-    iterator end()   { return S->end();   }
-
-    /// beginAssembly/finishAssembly - Emit module metadata as assembly code.
-    virtual void beginAssembly(AsmPrinter &AP);
 
-    virtual void finishAssembly(AsmPrinter &AP);
+    /// Called before the assembly for the module is generated by
+    /// the AsmPrinter (but after target specific hooks.)
+    virtual void beginAssembly(Module &M, GCModuleInfo &Info,
+                               AsmPrinter &AP) {}
+    /// Called after the assembly for the module is generated by
+    /// the AsmPrinter (but before target specific hooks)
+    virtual void finishAssembly(Module &M, GCModuleInfo &Info,
+                                AsmPrinter &AP) {}
 
     virtual ~GCMetadataPrinter();
   };
diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h
index 81e1f85286e1..0b0c3124c537 100644
--- a/include/llvm/CodeGen/GCStrategy.h
+++ b/include/llvm/CodeGen/GCStrategy.h
@@ -12,9 +12,14 @@
 // specified in a function's 'gc' attribute. Algorithms are enabled by setting
 // flags in a subclass's constructor, and some virtual methods can be
 // overridden.
+//
+// GCStrategy is relevant for implementations using either gc.root or
+// gc.statepoint based lowering strategies, but is currently focused mostly on
+// options for gc.root.  This will change over time.
 // 
-// When requested, the GCStrategy will be populated with data about each
-// function which uses it. Specifically:
+// When requested by a subclass of GCStrategy, the gc.root implementation will
+// populate GCModuleInfo and GCFunctionInfo with that about each Function in
+// the Module that opts in to garbage collection.  Specifically:
 // 
 // - Safe points
 //   Garbage collection is generally only possible at certain points in code.
@@ -31,40 +36,42 @@
 // This information can used to emit the metadata tables which are required by
 // the target garbage collector runtime.
 //
+// When used with gc.statepoint, information about safepoint and roots can be
+// found in the binary StackMap section after code generation.  Safepoint
+// placement is currently the responsibility of the frontend, though late
+// insertion support is planned.  gc.statepoint does not currently support
+// custom stack map formats; such can be generated by parsing the standard
+// stack map section if desired.
+// 
+// The read and write barrier support can be used with either implementation.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CODEGEN_GCSTRATEGY_H
 #define LLVM_CODEGEN_GCSTRATEGY_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Support/Registry.h"
 #include <string>
 
 namespace llvm {
-  
-  class GCStrategy;
-  
-  /// The GC strategy registry uses all the defaults from Registry.
-  /// 
-  typedef Registry<GCStrategy> GCRegistry;
-  
   /// GCStrategy describes a garbage collector algorithm's code generation
   /// requirements, and provides overridable hooks for those needs which cannot
-  /// be abstractly described.
+  /// be abstractly described.  GCStrategy objects currently must be looked up
+  /// through the GCModuleInfo analysis pass.  They are owned by the analysis
+  /// pass and recreated every time that pass is invalidated.
   class GCStrategy {
-  public:
-    typedef std::vector<std::unique_ptr<GCFunctionInfo>> list_type;
-    typedef list_type::iterator iterator;
-    
   private:
-    friend class GCModuleInfo;
-    const Module *M;
     std::string Name;
-    
-    list_type Functions;
+    friend class GCModuleInfo;
     
   protected:
+    bool UseStatepoints;       /// Uses gc.statepoints as opposed to gc.roots,
+                               /// if set, none of the other options can be
+                               /// anything but their default values.
+
     unsigned NeededSafePoints; ///< Bitmask of required safe points.
     bool CustomReadBarriers;   ///< Default is to insert loads.
     bool CustomWriteBarriers;  ///< Default is to insert stores.
@@ -76,78 +83,114 @@ namespace llvm {
     
   public:
     GCStrategy();
-    
     virtual ~GCStrategy() {}
     
-    
-    /// getName - The name of the GC strategy, for debugging.
-    /// 
+    /// Return the name of the GC strategy.  This is the value of the collector
+    /// name string specified on functions which use this strategy.
     const std::string &getName() const { return Name; }
 
-    /// getModule - The module within which the GC strategy is operating.
-    /// 
-    const Module &getModule() const { return *M; }
+    /// By default, write barriers are replaced with simple store
+    /// instructions. If true, then performCustomLowering must instead lower
+    /// them. 
+    bool customWriteBarrier() const { return CustomWriteBarriers; }
+    
+    /// By default, read barriers are replaced with simple load
+    /// instructions. If true, then performCustomLowering must instead lower
+    /// them. 
+    bool customReadBarrier() const { return CustomReadBarriers; }
+
+    /// Returns true if this strategy is expecting the use of gc.statepoints,
+    /// and false otherwise.
+    bool useStatepoints() const { return UseStatepoints; }
+
+    /** @name Statepoint Specific Properties */
+    ///@{
 
-    /// needsSafePoitns - True if safe points of any kind are required. By
-    //                    default, none are recorded.
+    /// If the value specified can be reliably distinguished, returns true for
+    /// pointers to GC managed locations and false for pointers to non-GC 
+    /// managed locations.  Note a GCStrategy can always return 'None' (i.e. an
+    /// empty optional indicating it can't reliably distinguish.   
+    virtual Optional<bool> isGCManagedPointer(const Value *V) const {
+      return None;
+    }
+    ///@}
+
+    /** @name GCRoot Specific Properties
+     * These properties and overrides only apply to collector strategies using
+     * GCRoot. 
+     */
+    ///@{
+
+    /// True if safe points of any kind are required. By default, none are
+    /// recorded. 
     bool needsSafePoints() const {
       return CustomSafePoints || NeededSafePoints != 0;
     }
     
-    /// needsSafePoint(Kind) - True if the given kind of safe point is
-    //                          required. By default, none are recorded.
+    /// True if the given kind of safe point is required. By default, none are
+    /// recorded. 
     bool needsSafePoint(GC::PointKind Kind) const {
       return (NeededSafePoints & 1 << Kind) != 0;
     }
-    
-    /// customWriteBarrier - By default, write barriers are replaced with simple
-    ///                      store instructions. If true, then
-    ///                      performCustomLowering must instead lower them.
-    bool customWriteBarrier() const { return CustomWriteBarriers; }
-    
-    /// customReadBarrier - By default, read barriers are replaced with simple
-    ///                     load instructions. If true, then
-    ///                     performCustomLowering must instead lower them.
-    bool customReadBarrier() const { return CustomReadBarriers; }
-    
-    /// customRoots - By default, roots are left for the code generator so it
-    ///               can generate a stack map. If true, then
-    //                performCustomLowering must delete them.
+        
+    /// By default, roots are left for the code generator so it can generate a
+    /// stack map. If true, then performCustomLowering must delete them.
     bool customRoots() const { return CustomRoots; }
 
-    /// customSafePoints - By default, the GC analysis will find safe
-    ///                    points according to NeededSafePoints. If true,
-    ///                    then findCustomSafePoints must create them.
+    /// By default, the GC analysis will find safe points according to
+    /// NeededSafePoints. If true, then findCustomSafePoints must create them.
     bool customSafePoints() const { return CustomSafePoints; }
     
-    /// initializeRoots - If set, gcroot intrinsics should initialize their
-    //                    allocas to null before the first use. This is
-    //                    necessary for most GCs and is enabled by default.
+    /// If set, gcroot intrinsics should initialize their allocas to null
+    /// before the first use. This is necessary for most GCs and is enabled by
+    /// default. 
     bool initializeRoots() const { return InitRoots; }
     
-    /// usesMetadata - If set, appropriate metadata tables must be emitted by
-    ///                the back-end (assembler, JIT, or otherwise).
+    /// If set, appropriate metadata tables must be emitted by the back-end
+    /// (assembler, JIT, or otherwise). For statepoint, this method is
+    /// currently unsupported.  The stackmap information can be found in the
+    /// StackMap section as described in the documentation.
     bool usesMetadata() const { return UsesMetadata; }
-    
-    /// begin/end - Iterators for function metadata.
-    /// 
-    iterator begin() { return Functions.begin(); }
-    iterator end()   { return Functions.end();   }
-
-    /// insertFunctionMetadata - Creates metadata for a function.
-    /// 
-    GCFunctionInfo *insertFunctionInfo(const Function &F);
 
+    ///@}
+    
     /// initializeCustomLowering/performCustomLowering - If any of the actions
     /// are set to custom, performCustomLowering must be overriden to transform
     /// the corresponding actions to LLVM IR. initializeCustomLowering is
     /// optional to override. These are the only GCStrategy methods through
-    /// which the LLVM IR can be modified.
-    virtual bool initializeCustomLowering(Module &F);
-    virtual bool performCustomLowering(Function &F);
-    virtual bool findCustomSafePoints(GCFunctionInfo& FI, MachineFunction& MF);
+    /// which the LLVM IR can be modified.  These methods apply mostly to
+    /// gc.root based implementations, but can be overriden to provide custom
+    /// barrier lowerings with gc.statepoint as well.
+    ///@{
+    virtual bool initializeCustomLowering(Module &F) {
+      // No changes made
+      return false;
+    }
+    virtual bool performCustomLowering(Function &F) {
+      llvm_unreachable("GCStrategy subclass specified a configuration which"
+                       "requires a custom lowering without providing one");
+    }
+    ///@}
+    /// Called if customSafepoints returns true, used only by gc.root
+    /// implementations. 
+    virtual bool findCustomSafePoints(GCFunctionInfo& FI, MachineFunction& MF) {
+      llvm_unreachable("GCStrategy subclass specified a configuration which"
+                       "requests custom safepoint identification without"
+                       "providing an implementation for such");
+    }
   };
-  
+
+  /// Subclasses of GCStrategy are made available for use during compilation by
+  /// adding them to the global GCRegistry.  This can done either within the
+  /// LLVM source tree or via a loadable plugin.  An example registeration
+  /// would be:
+  /// static GCRegistry::Add<CustomGC> X("custom-name",
+  ///        "my custom supper fancy gc strategy");
+  /// 
+  /// Note that to use a custom GCMetadataPrinter w/gc.roots, you must also
+  /// register your GCMetadataPrinter subclass with the
+  /// GCMetadataPrinterRegistery as well. 
+  typedef Registry<GCStrategy> GCRegistry;
 }
 
 #endif
diff --git a/include/llvm/CodeGen/GCs.h b/include/llvm/CodeGen/GCs.h
index bb170c85cbf8..51a31842a5c0 100644
--- a/include/llvm/CodeGen/GCs.h
+++ b/include/llvm/CodeGen/GCs.h
@@ -36,6 +36,8 @@ namespace llvm {
   /// Creates a shadow stack garbage collector. This collector requires no code
   /// generator support.
   void linkShadowStackGC();
+
+  void linkStatepointExampleGC();
 }
 
 #endif
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 84447616c989..952362ed6ce3 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -72,6 +72,11 @@ namespace ISD {
     /// the parent's frame or return address, and so on.
     FRAMEADDR, RETURNADDR,
 
+    /// FRAME_ALLOC_RECOVER - Represents the llvm.framerecover
+    /// intrinsic. Materializes the offset from the frame pointer of another
+    /// function to the result of llvm.frameallocate.
+    FRAME_ALLOC_RECOVER,
+
     /// READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on
     /// the DAG, which implements the named register global variables extension.
     READ_REGISTER,
@@ -485,7 +490,8 @@ namespace ISD {
     FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
     FLOG, FLOG2, FLOG10, FEXP, FEXP2,
     FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
-    
+    FMINNUM, FMAXNUM,
+
     /// FSINCOS - Compute both fsin and fcos as a single operation.
     FSINCOS,
 
@@ -674,6 +680,9 @@ namespace ISD {
     ATOMIC_LOAD_UMIN,
     ATOMIC_LOAD_UMAX,
 
+    // Masked load and store
+    MLOAD, MSTORE,
+
     /// This corresponds to the llvm.lifetime.* intrinsics. The first operand
     /// is the chain and the second operand is the alloca pointer.
     LIFETIME_START, LIFETIME_END,
@@ -744,7 +753,7 @@ namespace ISD {
     LAST_LOADEXT_TYPE
   };
 
-  NodeType getExtForLoadExtType(LoadExtType);
+  NodeType getExtForLoadExtType(bool IsFP, LoadExtType);
 
   //===--------------------------------------------------------------------===//
   /// ISD::CondCode enum - These are ordered carefully to make the bitfields
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
deleted file mode 100644
index dc2a0272db4e..000000000000
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ /dev/null
@@ -1,344 +0,0 @@
-//===-- llvm/CodeGen/JITCodeEmitter.h - Code emission ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an abstract interface that is used by the machine code
-// emission framework to output the code.  This allows machine code emission to
-// be separated from concerns such as resolution of call targets, and where the
-// machine code will be written (memory or disk, f.e.).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_JITCODEEMITTER_H
-#define LLVM_CODEGEN_JITCODEEMITTER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/MathExtras.h"
-#include <string>
-
-namespace llvm {
-
-class MachineBasicBlock;
-class MachineConstantPool;
-class MachineJumpTableInfo;
-class MachineFunction;
-class MachineModuleInfo;
-class MachineRelocation;
-class Value;
-class GlobalValue;
-class Function;
-  
-/// JITCodeEmitter - This class defines two sorts of methods: those for
-/// emitting the actual bytes of machine code, and those for emitting auxiliary
-/// structures, such as jump tables, relocations, etc.
-///
-/// Emission of machine code is complicated by the fact that we don't (in
-/// general) know the size of the machine code that we're about to emit before
-/// we emit it.  As such, we preallocate a certain amount of memory, and set the
-/// BufferBegin/BufferEnd pointers to the start and end of the buffer.  As we
-/// emit machine instructions, we advance the CurBufferPtr to indicate the
-/// location of the next byte to emit.  In the case of a buffer overflow (we
-/// need to emit more machine code than we have allocated space for), the
-/// CurBufferPtr will saturate to BufferEnd and ignore stores.  Once the entire
-/// function has been emitted, the overflow condition is checked, and if it has
-/// occurred, more memory is allocated, and we reemit the code into it.
-/// 
-class JITCodeEmitter : public MachineCodeEmitter {
-  void anchor() override;
-public:
-  virtual ~JITCodeEmitter() {}
-
-  /// startFunction - This callback is invoked when the specified function is
-  /// about to be code generated.  This initializes the BufferBegin/End/Ptr
-  /// fields.
-  ///
-  void startFunction(MachineFunction &F) override = 0;
-
-  /// finishFunction - This callback is invoked when the specified function has
-  /// finished code generation.  If a buffer overflow has occurred, this method
-  /// returns true (the callee is required to try again), otherwise it returns
-  /// false.
-  ///
-  bool finishFunction(MachineFunction &F) override = 0;
-
-  /// allocIndirectGV - Allocates and fills storage for an indirect
-  /// GlobalValue, and returns the address.
-  virtual void *allocIndirectGV(const GlobalValue *GV,
-                                const uint8_t *Buffer, size_t Size,
-                                unsigned Alignment) = 0;
-
-  /// emitByte - This callback is invoked when a byte needs to be written to the
-  /// output stream.
-  ///
-  void emitByte(uint8_t B) {
-    if (CurBufferPtr != BufferEnd)
-      *CurBufferPtr++ = B;
-  }
-
-  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
-  /// written to the output stream in little-endian format.
-  ///
-  void emitWordLE(uint32_t W) {
-    if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (uint8_t)(W >>  0);
-      *CurBufferPtr++ = (uint8_t)(W >>  8);
-      *CurBufferPtr++ = (uint8_t)(W >> 16);
-      *CurBufferPtr++ = (uint8_t)(W >> 24);
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-  
-  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
-  /// written to the output stream in big-endian format.
-  ///
-  void emitWordBE(uint32_t W) {
-    if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (uint8_t)(W >> 24);
-      *CurBufferPtr++ = (uint8_t)(W >> 16);
-      *CurBufferPtr++ = (uint8_t)(W >>  8);
-      *CurBufferPtr++ = (uint8_t)(W >>  0);
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-
-  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
-  /// written to the output stream in little-endian format.
-  ///
-  void emitDWordLE(uint64_t W) {
-    if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (uint8_t)(W >>  0);
-      *CurBufferPtr++ = (uint8_t)(W >>  8);
-      *CurBufferPtr++ = (uint8_t)(W >> 16);
-      *CurBufferPtr++ = (uint8_t)(W >> 24);
-      *CurBufferPtr++ = (uint8_t)(W >> 32);
-      *CurBufferPtr++ = (uint8_t)(W >> 40);
-      *CurBufferPtr++ = (uint8_t)(W >> 48);
-      *CurBufferPtr++ = (uint8_t)(W >> 56);
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-  
-  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
-  /// written to the output stream in big-endian format.
-  ///
-  void emitDWordBE(uint64_t W) {
-    if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (uint8_t)(W >> 56);
-      *CurBufferPtr++ = (uint8_t)(W >> 48);
-      *CurBufferPtr++ = (uint8_t)(W >> 40);
-      *CurBufferPtr++ = (uint8_t)(W >> 32);
-      *CurBufferPtr++ = (uint8_t)(W >> 24);
-      *CurBufferPtr++ = (uint8_t)(W >> 16);
-      *CurBufferPtr++ = (uint8_t)(W >>  8);
-      *CurBufferPtr++ = (uint8_t)(W >>  0);
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-
-  /// emitAlignment - Move the CurBufferPtr pointer up to the specified
-  /// alignment (saturated to BufferEnd of course).
-  void emitAlignment(unsigned Alignment) {
-    if (Alignment == 0) Alignment = 1;
-    uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr,
-                                                   Alignment);
-    CurBufferPtr = std::min(NewPtr, BufferEnd);
-  }
-
-  /// emitAlignmentWithFill - Similar to emitAlignment, except that the
-  /// extra bytes are filled with the provided byte.
-  void emitAlignmentWithFill(unsigned Alignment, uint8_t Fill) {
-    if (Alignment == 0) Alignment = 1;
-    uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr,
-                                                   Alignment);
-    // Fail if we don't have room.
-    if (NewPtr > BufferEnd) {
-      CurBufferPtr = BufferEnd;
-      return;
-    }
-    while (CurBufferPtr < NewPtr) {
-      *CurBufferPtr++ = Fill;
-    }
-  }
-
-  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
-  /// written to the output stream.
-  void emitULEB128Bytes(uint64_t Value, unsigned PadTo = 0) {
-    do {
-      uint8_t Byte = Value & 0x7f;
-      Value >>= 7;
-      if (Value || PadTo != 0) Byte |= 0x80;
-      emitByte(Byte);
-    } while (Value);
-
-    if (PadTo) {
-      do {
-        uint8_t Byte = (PadTo > 1) ? 0x80 : 0x0;
-        emitByte(Byte);
-      } while (--PadTo);
-    }
-  }
-  
-  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
-  /// written to the output stream.
-  void emitSLEB128Bytes(int64_t Value) {
-    int32_t Sign = Value >> (8 * sizeof(Value) - 1);
-    bool IsMore;
-  
-    do {
-      uint8_t Byte = Value & 0x7f;
-      Value >>= 7;
-      IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
-      if (IsMore) Byte |= 0x80;
-      emitByte(Byte);
-    } while (IsMore);
-  }
-
-  /// emitString - This callback is invoked when a String needs to be
-  /// written to the output stream.
-  void emitString(const std::string &String) {
-    for (size_t i = 0, N = String.size(); i < N; ++i) {
-      uint8_t C = String[i];
-      emitByte(C);
-    }
-    emitByte(0);
-  }
-  
-  /// emitInt32 - Emit a int32 directive.
-  void emitInt32(uint32_t Value) {
-    if (4 <= BufferEnd-CurBufferPtr) {
-      *((uint32_t*)CurBufferPtr) = Value;
-      CurBufferPtr += 4;
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-
-  /// emitInt64 - Emit a int64 directive.
-  void emitInt64(uint64_t Value) {
-    if (8 <= BufferEnd-CurBufferPtr) {
-      *((uint64_t*)CurBufferPtr) = Value;
-      CurBufferPtr += 8;
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-  
-  /// emitInt32At - Emit the Int32 Value in Addr.
-  void emitInt32At(uintptr_t *Addr, uintptr_t Value) {
-    if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd)
-      (*(uint32_t*)Addr) = (uint32_t)Value;
-  }
-  
-  /// emitInt64At - Emit the Int64 Value in Addr.
-  void emitInt64At(uintptr_t *Addr, uintptr_t Value) {
-    if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd)
-      (*(uint64_t*)Addr) = (uint64_t)Value;
-  }
-  
-  
-  /// emitLabel - Emits a label
-  void emitLabel(MCSymbol *Label) override = 0;
-
-  /// allocateSpace - Allocate a block of space in the current output buffer,
-  /// returning null (and setting conditions to indicate buffer overflow) on
-  /// failure.  Alignment is the alignment in bytes of the buffer desired.
-  void *allocateSpace(uintptr_t Size, unsigned Alignment) override {
-    emitAlignment(Alignment);
-    void *Result;
-    
-    // Check for buffer overflow.
-    if (Size >= (uintptr_t)(BufferEnd-CurBufferPtr)) {
-      CurBufferPtr = BufferEnd;
-      Result = nullptr;
-    } else {
-      // Allocate the space.
-      Result = CurBufferPtr;
-      CurBufferPtr += Size;
-    }
-    
-    return Result;
-  }
-
-  /// allocateGlobal - Allocate memory for a global.  Unlike allocateSpace,
-  /// this method does not allocate memory in the current output buffer,
-  /// because a global may live longer than the current function.
-  virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment) = 0;
-
-  /// StartMachineBasicBlock - This should be called by the target when a new
-  /// basic block is about to be emitted.  This way the MCE knows where the
-  /// start of the block is, and can implement getMachineBasicBlockAddress.
-  void StartMachineBasicBlock(MachineBasicBlock *MBB) override = 0;
-
-  /// getCurrentPCValue - This returns the address that the next emitted byte
-  /// will be output to.
-  ///
-  uintptr_t getCurrentPCValue() const override {
-    return (uintptr_t)CurBufferPtr;
-  }
-
-  /// getCurrentPCOffset - Return the offset from the start of the emitted
-  /// buffer that we are currently writing to.
-  uintptr_t getCurrentPCOffset() const override {
-    return CurBufferPtr-BufferBegin;
-  }
-
-  /// earlyResolveAddresses - True if the code emitter can use symbol addresses 
-  /// during code emission time. The JIT is capable of doing this because it
-  /// creates jump tables or constant pools in memory on the fly while the
-  /// object code emitters rely on a linker to have real addresses and should
-  /// use relocations instead.
-  bool earlyResolveAddresses() const override { return true; }
-
-  /// addRelocation - Whenever a relocatable address is needed, it should be
-  /// noted with this interface.
-  void addRelocation(const MachineRelocation &MR) override = 0;
-
-  /// FIXME: These should all be handled with relocations!
-  
-  /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
-  /// the constant pool that was last emitted with the emitConstantPool method.
-  ///
-  uintptr_t getConstantPoolEntryAddress(unsigned Index) const override = 0;
-
-  /// getJumpTableEntryAddress - Return the address of the jump table with index
-  /// 'Index' in the function that last called initJumpTableInfo.
-  ///
-  uintptr_t getJumpTableEntryAddress(unsigned Index) const override = 0;
-
-  /// getMachineBasicBlockAddress - Return the address of the specified
-  /// MachineBasicBlock, only usable after the label for the MBB has been
-  /// emitted.
-  ///
-  uintptr_t
-    getMachineBasicBlockAddress(MachineBasicBlock *MBB) const override = 0;
-
-  /// getLabelAddress - Return the address of the specified Label, only usable
-  /// after the Label has been emitted.
-  ///
-  uintptr_t getLabelAddress(MCSymbol *Label) const override = 0;
-
-  /// Specifies the MachineModuleInfo object. This is used for exception handling
-  /// purposes.
-  void setModuleInfo(MachineModuleInfo* Info) override = 0;
-
-  /// getLabelLocations - Return the label locations map of the label IDs to
-  /// their address.
-  virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() {
-    return nullptr;
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/CodeGen/JumpInstrTables.h b/include/llvm/CodeGen/JumpInstrTables.h
index 6ca3d7d1765f..005bc1eb2b2d 100644
--- a/include/llvm/CodeGen/JumpInstrTables.h
+++ b/include/llvm/CodeGen/JumpInstrTables.h
@@ -39,13 +39,14 @@ class Module;
 ///   jmp f_orig@PLT
 /// \endverbatim
 ///
-/// Support for an architecture depends on two functions in TargetInstrInfo:
-/// getUnconditionalBranch, and getTrap. AsmPrinter uses these to generate the
-/// appropriate instructions for the jump statement (an unconditional branch)
-/// and for padding to make the table have a size that is a power of two. This
-/// padding uses a trap instruction to ensure that calls to this area halt the
-/// program. The default implementations of these functions call
-/// llvm_unreachable.
+/// Support for an architecture depends on three functions in TargetInstrInfo:
+/// getUnconditionalBranch, getTrap, and getJumpInstrTableEntryBound. AsmPrinter
+/// uses these to generate the appropriate instructions for the jump statement
+/// (an unconditional branch) and for padding to make the table have a size that
+/// is a power of two. This padding uses a trap instruction to ensure that calls
+/// to this area halt the program. The default implementations of these
+/// functions call llvm_unreachable, except for getJumpInstrTableEntryBound,
+/// which returns 0 by default.
 class JumpInstrTables : public ModulePass {
 public:
   static char ID;
@@ -64,6 +65,14 @@ public:
   /// Checks to see if there is already a table for the given FunctionType.
   bool hasTable(FunctionType *FunTy);
 
+  /// Maps the function into a subset of function types, depending on the
+  /// jump-instruction table style selected from JumpTableTypes in
+  /// JumpInstrTables.cpp. The choice of mapping determines the number of
+  /// jump-instruction tables generated by this pass. E.g., the simplest mapping
+  /// converts every function type into void f(); so, all functions end up in a
+  /// single table.
+  static FunctionType *transformType(JumpTable::JumpTableType JTT,
+                                     FunctionType *FunTy);
 private:
   /// The metadata used while a jump table is being built
   struct TableMeta {
@@ -76,14 +85,6 @@ private:
 
   typedef DenseMap<FunctionType *, struct TableMeta> JumpMap;
 
-  /// Maps the function into a subset of function types, depending on the
-  /// jump-instruction table style selected from JumpTableTypes in
-  /// JumpInstrTables.cpp. The choice of mapping determines the number of
-  /// jump-instruction tables generated by this pass. E.g., the simplest mapping
-  /// converts every function type into void f(); so, all functions end up in a
-  /// single table.
-  FunctionType *transformType(FunctionType *FunTy);
-
   /// The current state of functions and jump entries in the table(s).
   JumpMap Metadata;
 
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index 036aea30a510..11a360a491a7 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -19,14 +19,14 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/ValueHandle.h"
-#include <utility>
 #include <unordered_map>
+#include <utility>
 namespace llvm {
 
 class MachineInstr;
@@ -48,6 +48,8 @@ public:
   LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A)
       : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A),
         LastInsn(nullptr), FirstInsn(nullptr), DFSIn(0), DFSOut(0) {
+    assert((!D || D->isResolved()) && "Expected resolved node");
+    assert((!I || I->isResolved()) && "Expected resolved node");
     if (Parent)
       Parent->addChild(this);
   }
@@ -116,8 +118,8 @@ public:
 
 private:
   LexicalScope *Parent;                        // Parent to this scope.
-  AssertingVH<const MDNode> Desc;              // Debug info descriptor.
-  AssertingVH<const MDNode> InlinedAtLocation; // Location at which this
+  const MDNode *Desc;                          // Debug info descriptor.
+  const MDNode *InlinedAtLocation;             // Location at which this
                                                // scope is inlined.
   bool AbstractScope;                          // Abstract Scope
   SmallVector<LexicalScope *, 4> Children;     // Scopes defined in scope.
@@ -148,12 +150,6 @@ public:
   /// empty - Return true if there is any lexical scope information available.
   bool empty() { return CurrentFnLexicalScope == nullptr; }
 
-  /// isCurrentFunctionScope - Return true if given lexical scope represents
-  /// current function.
-  bool isCurrentFunctionScope(const LexicalScope *LS) {
-    return LS == CurrentFnLexicalScope;
-  }
-
   /// getCurrentFunctionScope - Return lexical scope for the current function.
   LexicalScope *getCurrentFunctionScope() const {
     return CurrentFnLexicalScope;
@@ -163,7 +159,7 @@ public:
   /// which have machine instructions that belong to lexical scope identified by
   /// DebugLoc.
   void getMachineBasicBlocks(DebugLoc DL,
-                             SmallPtrSet<const MachineBasicBlock *, 4> &MBBs);
+                             SmallPtrSetImpl<const MachineBasicBlock *> &MBBs);
 
   /// dominates - Return true if DebugLoc's lexical scope dominates at least one
   /// machine instruction's lexical scope in a given machine basic block.
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 372c294da306..e7ccbfa617e5 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -39,6 +39,7 @@ namespace {
       llvm::linkOcamlGC();
       llvm::linkErlangGC();
       llvm::linkShadowStackGC();
+      llvm::linkStatepointExampleGC();
 
       (void) llvm::createBURRListDAGScheduler(nullptr,
                                               llvm::CodeGenOpt::Default);
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 6629e6046532..ce9845ee1673 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -119,6 +119,12 @@ namespace llvm {
       return isDeadDef() ? nullptr : LateVal;
     }
 
+    /// Returns the value alive at the end of the instruction, if any. This can
+    /// be a live-through value, a live def or a dead def.
+    VNInfo *valueOutOrDead() const {
+      return LateVal;
+    }
+
     /// Return the value defined by this instruction, if any. This includes
     /// dead defs, it is the value created by the instruction's def operands.
     VNInfo *valueDefined() const {
@@ -204,6 +210,23 @@ namespace llvm {
     const_vni_iterator vni_begin() const { return valnos.begin(); }
     const_vni_iterator vni_end() const   { return valnos.end(); }
 
+    /// Constructs a new LiveRange object.
+    LiveRange() {
+    }
+
+    /// Constructs a new LiveRange object by copying segments and valnos from
+    /// another LiveRange.
+    LiveRange(const LiveRange &Other, BumpPtrAllocator &Allocator) {
+      // Duplicate valnos.
+      for (const VNInfo *VNI : Other.valnos) {
+        createValueCopy(VNI, Allocator);
+      }
+      // Now we can copy segments and remap their valnos.
+      for (const Segment &S : Other.segments) {
+        segments.push_back(Segment(S.start, S.end, valnos[S.valno->id]));
+      }
+    }
+
     /// advanceTo - Advance the specified iterator to point to the Segment
     /// containing the specified position, or end() if the position is past the
     /// end of the range.  If no Segment contains this position, but the
@@ -217,6 +240,14 @@ namespace llvm {
       return I;
     }
 
+    const_iterator advanceTo(const_iterator I, SlotIndex Pos) const {
+      assert(I != end());
+      if (Pos >= endIndex())
+        return end();
+      while (I->end <= Pos) ++I;
+      return I;
+    }
+
     /// find - Return an iterator pointing to the first segment that ends after
     /// Pos, or end(). This is the same as advanceTo(begin(), Pos), but faster
     /// when searching large ranges.
@@ -397,6 +428,12 @@ namespace llvm {
     /// scanning the Other range starting at I.
     bool overlapsFrom(const LiveRange &Other, const_iterator I) const;
 
+    /// Returns true if all segments of the @p Other live range are completely
+    /// covered by this live range.
+    /// Adjacent live ranges do not affect the covering:the liverange
+    /// [1,5](5,10] covers (3,7].
+    bool covers(const LiveRange &Other) const;
+
     /// Add the specified Segment to this range, merging segments as
     /// appropriate.  This returns an iterator to the inserted segment (which
     /// may have grown since it was inserted).
@@ -435,6 +472,12 @@ namespace llvm {
       removeSegment(S.start, S.end, RemoveDeadValNo);
     }
 
+    /// Remove segment pointed to by iterator @p I from this range.  This does
+    /// not remove dead value numbers.
+    iterator removeSegment(iterator I) {
+      return segments.erase(I);
+    }
+
     /// Query Liveness at Idx.
     /// The sub-instruction slot of Idx doesn't matter, only the instruction
     /// it refers to is considered.
@@ -484,9 +527,9 @@ namespace llvm {
     /// Returns true if the live range is zero length, i.e. no live segments
     /// span instructions. It doesn't pay to spill such a range.
     bool isZeroLength(SlotIndexes *Indexes) const {
-      for (const_iterator i = begin(), e = end(); i != e; ++i)
-        if (Indexes->getNextNonNullIndex(i->start).getBaseIndex() <
-            i->end.getBaseIndex())
+      for (const Segment &S : segments)
+        if (Indexes->getNextNonNullIndex(S.start).getBaseIndex() <
+            S.end.getBaseIndex())
           return false;
       return true;
     }
@@ -509,6 +552,10 @@ namespace llvm {
     void verify() const;
 #endif
 
+  protected:
+    /// Append a segment to the list of segments.
+    void append(const LiveRange::Segment S);
+
   private:
 
     iterator addSegmentFrom(Segment S, iterator From);
@@ -529,11 +576,122 @@ namespace llvm {
   public:
     typedef LiveRange super;
 
+    /// A live range for subregisters. The LaneMask specifies which parts of the
+    /// super register are covered by the interval.
+    /// (@sa TargetRegisterInfo::getSubRegIndexLaneMask()).
+    class SubRange : public LiveRange {
+    public:
+      SubRange *Next;
+      unsigned LaneMask;
+
+      /// Constructs a new SubRange object.
+      SubRange(unsigned LaneMask)
+        : Next(nullptr), LaneMask(LaneMask) {
+      }
+
+      /// Constructs a new SubRange object by copying liveness from @p Other.
+      SubRange(unsigned LaneMask, const LiveRange &Other,
+               BumpPtrAllocator &Allocator)
+        : LiveRange(Other, Allocator), Next(nullptr), LaneMask(LaneMask) {
+      }
+    };
+
+  private:
+    SubRange *SubRanges; ///< Single linked list of subregister live ranges.
+
+  public:
     const unsigned reg;  // the register or stack slot of this interval.
     float weight;        // weight of this interval
 
     LiveInterval(unsigned Reg, float Weight)
-      : reg(Reg), weight(Weight) {}
+      : SubRanges(nullptr), reg(Reg), weight(Weight) {}
+
+    template<typename T>
+    class SingleLinkedListIterator {
+      T *P;
+    public:
+      SingleLinkedListIterator<T>(T *P) : P(P) {}
+      SingleLinkedListIterator<T> &operator++() {
+        P = P->Next;
+        return *this;
+      }
+      SingleLinkedListIterator<T> &operator++(int) {
+        SingleLinkedListIterator res = *this;
+        ++*this;
+        return res;
+      }
+      bool operator!=(const SingleLinkedListIterator<T> &Other) {
+        return P != Other.operator->();
+      }
+      bool operator==(const SingleLinkedListIterator<T> &Other) {
+        return P == Other.operator->();
+      }
+      T &operator*() const {
+        return *P;
+      }
+      T *operator->() const {
+        return P;
+      }
+    };
+
+    typedef SingleLinkedListIterator<SubRange> subrange_iterator;
+    subrange_iterator subrange_begin() {
+      return subrange_iterator(SubRanges);
+    }
+    subrange_iterator subrange_end() {
+      return subrange_iterator(nullptr);
+    }
+
+    typedef SingleLinkedListIterator<const SubRange> const_subrange_iterator;
+    const_subrange_iterator subrange_begin() const {
+      return const_subrange_iterator(SubRanges);
+    }
+    const_subrange_iterator subrange_end() const {
+      return const_subrange_iterator(nullptr);
+    }
+
+    iterator_range<subrange_iterator> subranges() {
+      return make_range(subrange_begin(), subrange_end());
+    }
+
+    iterator_range<const_subrange_iterator> subranges() const {
+      return make_range(subrange_begin(), subrange_end());
+    }
+
+    /// Creates a new empty subregister live range. The range is added at the
+    /// beginning of the subrange list; subrange iterators stay valid.
+    SubRange *createSubRange(BumpPtrAllocator &Allocator, unsigned LaneMask) {
+      SubRange *Range = new (Allocator) SubRange(LaneMask);
+      appendSubRange(Range);
+      return Range;
+    }
+
+    /// Like createSubRange() but the new range is filled with a copy of the
+    /// liveness information in @p CopyFrom.
+    SubRange *createSubRangeFrom(BumpPtrAllocator &Allocator, unsigned LaneMask,
+                                 const LiveRange &CopyFrom) {
+      SubRange *Range = new (Allocator) SubRange(LaneMask, CopyFrom, Allocator);
+      appendSubRange(Range);
+      return Range;
+    }
+
+    /// Returns true if subregister liveness information is available.
+    bool hasSubRanges() const {
+      return SubRanges != nullptr;
+    }
+
+    /// Removes all subregister liveness information.
+    void clearSubRanges() {
+      SubRanges = nullptr;
+    }
+
+    /// Removes all subranges without any segments (subranges without segments
+    /// are not considered valid and should only exist temporarily).
+    void removeEmptySubRanges();
+
+    /// Construct main live range by merging the SubRanges of @p LI.
+    void constructMainRangeFromSubranges(const SlotIndexes &Indexes,
+                                         VNInfo::Allocator &VNIAllocator);
 
     /// getSize - Returns the sum of sizes of all the LiveRange's.
     ///
@@ -558,9 +716,23 @@ namespace llvm {
     void print(raw_ostream &OS) const;
     void dump() const;
 
+    /// \brief Walks the interval and assert if any invariants fail to hold.
+    ///
+    /// Note that this is a no-op when asserts are disabled.
+#ifdef NDEBUG
+    void verify(const MachineRegisterInfo *MRI = nullptr) const {}
+#else
+    void verify(const MachineRegisterInfo *MRI = nullptr) const;
+#endif
+
   private:
     LiveInterval& operator=(const LiveInterval& rhs) LLVM_DELETED_FUNCTION;
 
+    /// Appends @p Range to SubRanges list.
+    void appendSubRange(SubRange *Range) {
+      Range->Next = SubRanges;
+      SubRanges = Range;
+    }
   };
 
   inline raw_ostream &operator<<(raw_ostream &OS, const LiveInterval &LI) {
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 176665bc2566..d8c921fce313 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -17,8 +17,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
-#define LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
+#ifndef LLVM_CODEGEN_LIVEINTERVALANALYSIS_H
+#define LLVM_CODEGEN_LIVEINTERVALANALYSIS_H
 
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallVector.h"
@@ -50,7 +50,6 @@ namespace llvm {
   class LiveIntervals : public MachineFunctionPass {
     MachineFunction* MF;
     MachineRegisterInfo* MRI;
-    const TargetMachine* TM;
     const TargetRegisterInfo* TRI;
     const TargetInstrInfo* TII;
     AliasAnalysis *AA;
@@ -155,16 +154,11 @@ namespace llvm {
     bool shrinkToUses(LiveInterval *li,
                       SmallVectorImpl<MachineInstr*> *dead = nullptr);
 
-    /// \brief Walk the values in the given interval and compute which ones
-    /// are dead.  Dead values are not deleted, however:
-    /// - Dead PHIDef values are marked as unused.
-    /// - New dead machine instructions are added to the dead vector.
-    /// - CanSeparate is set to true if the interval may have been separated
-    ///   into multiple connected components.
-    void computeDeadValues(LiveInterval *li,
-                           LiveRange &LR,
-                           bool *CanSeparate,
-                           SmallVectorImpl<MachineInstr*> *dead);
+    /// Specialized version of
+    /// shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead)
+    /// that works on a subregister live range and only looks at uses matching
+    /// the lane mask of the subregister range.
+    void shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg);
 
     /// extendToIndices - Extend the live range of LI to reach all points in
     /// Indices. The points in the Indices array must be jointly dominated by
@@ -176,14 +170,21 @@ namespace llvm {
     /// See also LiveRangeCalc::extend().
     void extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices);
 
-    /// pruneValue - If an LI value is live at Kill, prune its live range by
-    /// removing any liveness reachable from Kill. Add live range end points to
+
+    /// If @p LR has a live value at @p Kill, prune its live range by removing
+    /// any liveness reachable from Kill. Add live range end points to
     /// EndPoints such that extendToIndices(LI, EndPoints) will reconstruct the
     /// value's live range.
     ///
     /// Calling pruneValue() and extendToIndices() can be used to reconstruct
     /// SSA form after adding defs to a virtual register.
-    void pruneValue(LiveInterval *LI, SlotIndex Kill,
+    void pruneValue(LiveRange &LR, SlotIndex Kill,
+                    SmallVectorImpl<SlotIndex> *EndPoints);
+
+    /// Subregister aware variant of pruneValue(LiveRange &LR, SlotIndex Kill,
+    /// SmallVectorImpl<SlotIndex> &EndPoints). Prunes the value in the main
+    /// range and all sub ranges.
+    void pruneValue(LiveInterval &LI, SlotIndex Kill,
                     SmallVectorImpl<SlotIndex> *EndPoints);
 
     SlotIndexes *getSlotIndexes() const {
@@ -405,6 +406,16 @@ namespace llvm {
     /// Compute RegMaskSlots and RegMaskBits.
     void computeRegMasks();
 
+    /// Walk the values in @p LI and check for dead values:
+    /// - Dead PHIDef values are marked as unused.
+    /// - Dead operands are marked as such.
+    /// - Completely dead machine instructions are added to the @p dead vector
+    ///   if it is not nullptr.
+    /// Returns true if any PHI value numbers have been removed which may
+    /// have separated the interval into multiple connected components.
+    bool computeDeadValues(LiveInterval &LI,
+                           SmallVectorImpl<MachineInstr*> *dead);
+
     static LiveInterval* createInterval(unsigned Reg);
 
     void printInstrs(raw_ostream &O) const;
@@ -414,6 +425,16 @@ namespace llvm {
     void computeRegUnitRange(LiveRange&, unsigned Unit);
     void computeVirtRegInterval(LiveInterval&);
 
+
+    /// Helper function for repairIntervalsInRange(), walks backwards and
+    /// creates/modifies live segments in @p LR to match the operands found.
+    /// Only full operands or operands with subregisters matching @p LaneMask
+    /// are considered.
+    void repairOldRegInRange(MachineBasicBlock::iterator Begin,
+                             MachineBasicBlock::iterator End,
+                             const SlotIndex endIdx, LiveRange &LR,
+                             unsigned Reg, unsigned LaneMask = ~0u);
+
     class HMEditor;
   };
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/LiveIntervalUnion.h b/include/llvm/CodeGen/LiveIntervalUnion.h
index 2f40509a1111..1381c46a2750 100644
--- a/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -84,10 +84,16 @@ public:
   bool changedSince(unsigned tag) const { return tag != Tag; }
 
   // Add a live virtual register to this union and merge its segments.
-  void unify(LiveInterval &VirtReg);
+  void unify(LiveInterval &VirtReg, const LiveRange &Range);
+  void unify(LiveInterval &VirtReg) {
+    unify(VirtReg, VirtReg);
+  }
 
   // Remove a live virtual register's segments from this union.
-  void extract(LiveInterval &VirtReg);
+  void extract(LiveInterval &VirtReg, const LiveRange &Range);
+  void extract(LiveInterval &VirtReg) {
+    extract(VirtReg, VirtReg);
+  }
 
   // Remove all inserted virtual registers.
   void clear() { Segments.clear(); ++Tag; }
diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h
index 847092b1d824..91e4ddcde170 100644
--- a/include/llvm/CodeGen/LivePhysRegs.h
+++ b/include/llvm/CodeGen/LivePhysRegs.h
@@ -26,8 +26,8 @@
 // %XMM0<def> = ..., %YMM0<imp-use> (%YMM0 and all its sub-registers are alive)
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_LIVE_PHYS_REGS_H
-#define LLVM_CODEGEN_LIVE_PHYS_REGS_H
+#ifndef LLVM_CODEGEN_LIVEPHYSREGS_H
+#define LLVM_CODEGEN_LIVEPHYSREGS_H
 
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -143,4 +143,4 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) {
 
 } // namespace llvm
 
-#endif // LLVM_CODEGEN_LIVE_PHYS_REGS_H
+#endif
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index 5767cab1a4db..44c3c4eaf7b1 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 namespace llvm {
 
@@ -111,18 +112,15 @@ public:
   /// @param vrm Map of virtual registers to physical registers for this
   ///            function.  If NULL, no virtual register map updates will
   ///            be done.  This could be the case if called before Regalloc.
-  LiveRangeEdit(LiveInterval *parent,
-                SmallVectorImpl<unsigned> &newRegs,
-                MachineFunction &MF,
-                LiveIntervals &lis,
-                VirtRegMap *vrm,
+  LiveRangeEdit(LiveInterval *parent, SmallVectorImpl<unsigned> &newRegs,
+                MachineFunction &MF, LiveIntervals &lis, VirtRegMap *vrm,
                 Delegate *delegate = nullptr)
-    : Parent(parent), NewRegs(newRegs),
-      MRI(MF.getRegInfo()), LIS(lis), VRM(vrm),
-      TII(*MF.getTarget().getInstrInfo()),
-      TheDelegate(delegate),
-      FirstNew(newRegs.size()),
-      ScannedRemattable(false) { MRI.setDelegate(this); }
+      : Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis),
+        VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()),
+        TheDelegate(delegate), FirstNew(newRegs.size()),
+        ScannedRemattable(false) {
+    MRI.setDelegate(this);
+  }
 
   ~LiveRangeEdit() { MRI.resetDelegate(this); }
 
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index a4a5fcc31e12..55b97dc3e71d 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -134,14 +134,14 @@ private:   // Intermediate data structures
   // PhysRegInfo - Keep track of which instruction was the last def of a
   // physical register. This is a purely local property, because all physical
   // register references are presumed dead across basic blocks.
-  MachineInstr **PhysRegDef;
+  std::vector<MachineInstr *> PhysRegDef;
 
   // PhysRegInfo - Keep track of which instruction was the last use of a
   // physical register. This is a purely local property, because all physical
   // register references are presumed dead across basic blocks.
-  MachineInstr **PhysRegUse;
+  std::vector<MachineInstr *> PhysRegUse;
 
-  SmallVector<unsigned, 4> *PHIVarInfo;
+  std::vector<SmallVector<unsigned, 4>> PHIVarInfo;
 
   // DistanceMap - Keep track the distance of a MI from the start of the
   // current basic block.
@@ -175,6 +175,10 @@ private:   // Intermediate data structures
   /// register which is used in a PHI node. We map that to the BB the vreg
   /// is coming from.
   void analyzePHINodes(const MachineFunction& Fn);
+
+  void runOnInstr(MachineInstr *MI, SmallVectorImpl<unsigned> &Defs);
+
+  void runOnBlock(MachineBasicBlock *MBB, unsigned NumRegs);
 public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index a08cc2eb508a..1440b967aea2 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -486,11 +486,15 @@ public:
   /// Insert a range of instructions into the instruction list before I.
   template<typename IT>
   void insert(iterator I, IT S, IT E) {
+    assert((I == end() || I->getParent() == this) &&
+           "iterator points outside of basic block");
     Insts.insert(I.getInstrIterator(), S, E);
   }
 
   /// Insert MI into the instruction list before I.
   iterator insert(iterator I, MachineInstr *MI) {
+    assert((I == end() || I->getParent() == this) &&
+           "iterator points outside of basic block");
     assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
            "Cannot insert instruction with bundle flags");
     return Insts.insert(I.getInstrIterator(), MI);
@@ -498,6 +502,8 @@ public:
 
   /// Insert MI into the instruction list after I.
   iterator insertAfter(iterator I, MachineInstr *MI) {
+    assert((I == end() || I->getParent() == this) &&
+           "iterator points outside of basic block");
     assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
            "Cannot insert instruction with bundle flags");
     return Insts.insertAfter(I.getInstrIterator(), MI);
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
deleted file mode 100644
index 81b0ba1e7c71..000000000000
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ /dev/null
@@ -1,334 +0,0 @@
-//===-- llvm/CodeGen/MachineCodeEmitter.h - Code emission -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an abstract interface that is used by the machine code
-// emission framework to output the code.  This allows machine code emission to
-// be separated from concerns such as resolution of call targets, and where the
-// machine code will be written (memory or disk, f.e.).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_MACHINECODEEMITTER_H
-#define LLVM_CODEGEN_MACHINECODEEMITTER_H
-
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/Support/DataTypes.h"
-#include <string>
-
-namespace llvm {
-
-class MachineBasicBlock;
-class MachineConstantPool;
-class MachineJumpTableInfo;
-class MachineFunction;
-class MachineModuleInfo;
-class MachineRelocation;
-class Value;
-class GlobalValue;
-class Function;
-class MCSymbol;
-
-/// MachineCodeEmitter - This class defines two sorts of methods: those for
-/// emitting the actual bytes of machine code, and those for emitting auxiliary
-/// structures, such as jump tables, relocations, etc.
-///
-/// Emission of machine code is complicated by the fact that we don't (in
-/// general) know the size of the machine code that we're about to emit before
-/// we emit it.  As such, we preallocate a certain amount of memory, and set the
-/// BufferBegin/BufferEnd pointers to the start and end of the buffer.  As we
-/// emit machine instructions, we advance the CurBufferPtr to indicate the
-/// location of the next byte to emit.  In the case of a buffer overflow (we
-/// need to emit more machine code than we have allocated space for), the
-/// CurBufferPtr will saturate to BufferEnd and ignore stores.  Once the entire
-/// function has been emitted, the overflow condition is checked, and if it has
-/// occurred, more memory is allocated, and we reemit the code into it.
-/// 
-class MachineCodeEmitter {
-  virtual void anchor();
-protected:
-  /// BufferBegin/BufferEnd - Pointers to the start and end of the memory
-  /// allocated for this code buffer.
-  uint8_t *BufferBegin, *BufferEnd;
-  /// CurBufferPtr - Pointer to the next byte of memory to fill when emitting
-  /// code.  This is guaranteed to be in the range [BufferBegin,BufferEnd].  If
-  /// this pointer is at BufferEnd, it will never move due to code emission, and
-  /// all code emission requests will be ignored (this is the buffer overflow
-  /// condition).
-  uint8_t *CurBufferPtr;
-
-public:
-  virtual ~MachineCodeEmitter() {}
-
-  /// startFunction - This callback is invoked when the specified function is
-  /// about to be code generated.  This initializes the BufferBegin/End/Ptr
-  /// fields.
-  ///
-  virtual void startFunction(MachineFunction &F) = 0;
-
-  /// finishFunction - This callback is invoked when the specified function has
-  /// finished code generation.  If a buffer overflow has occurred, this method
-  /// returns true (the callee is required to try again), otherwise it returns
-  /// false.
-  ///
-  virtual bool finishFunction(MachineFunction &F) = 0;
-
-  /// emitByte - This callback is invoked when a byte needs to be written to the
-  /// output stream.
-  ///
-  void emitByte(uint8_t B) {
-    if (CurBufferPtr != BufferEnd)
-      *CurBufferPtr++ = B;
-  }
-
-  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
-  /// written to the output stream in little-endian format.
-  ///
-  void emitWordLE(uint32_t W) {
-    if (4 <= BufferEnd-CurBufferPtr) {
-      emitWordLEInto(CurBufferPtr, W);
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-
-  /// emitWordLEInto - This callback is invoked when a 32-bit word needs to be
-  /// written to an arbitrary buffer in little-endian format.  Buf must have at
-  /// least 4 bytes of available space.
-  ///
-  static void emitWordLEInto(uint8_t *&Buf, uint32_t W) {
-    *Buf++ = (uint8_t)(W >>  0);
-    *Buf++ = (uint8_t)(W >>  8);
-    *Buf++ = (uint8_t)(W >> 16);
-    *Buf++ = (uint8_t)(W >> 24);
-  }
-
-  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
-  /// written to the output stream in big-endian format.
-  ///
-  void emitWordBE(uint32_t W) {
-    if (4 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (uint8_t)(W >> 24);
-      *CurBufferPtr++ = (uint8_t)(W >> 16);
-      *CurBufferPtr++ = (uint8_t)(W >>  8);
-      *CurBufferPtr++ = (uint8_t)(W >>  0);
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-
-  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
-  /// written to the output stream in little-endian format.
-  ///
-  void emitDWordLE(uint64_t W) {
-    if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (uint8_t)(W >>  0);
-      *CurBufferPtr++ = (uint8_t)(W >>  8);
-      *CurBufferPtr++ = (uint8_t)(W >> 16);
-      *CurBufferPtr++ = (uint8_t)(W >> 24);
-      *CurBufferPtr++ = (uint8_t)(W >> 32);
-      *CurBufferPtr++ = (uint8_t)(W >> 40);
-      *CurBufferPtr++ = (uint8_t)(W >> 48);
-      *CurBufferPtr++ = (uint8_t)(W >> 56);
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-  
-  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
-  /// written to the output stream in big-endian format.
-  ///
-  void emitDWordBE(uint64_t W) {
-    if (8 <= BufferEnd-CurBufferPtr) {
-      *CurBufferPtr++ = (uint8_t)(W >> 56);
-      *CurBufferPtr++ = (uint8_t)(W >> 48);
-      *CurBufferPtr++ = (uint8_t)(W >> 40);
-      *CurBufferPtr++ = (uint8_t)(W >> 32);
-      *CurBufferPtr++ = (uint8_t)(W >> 24);
-      *CurBufferPtr++ = (uint8_t)(W >> 16);
-      *CurBufferPtr++ = (uint8_t)(W >>  8);
-      *CurBufferPtr++ = (uint8_t)(W >>  0);
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-
-  /// emitAlignment - Move the CurBufferPtr pointer up to the specified
-  /// alignment (saturated to BufferEnd of course).
-  void emitAlignment(unsigned Alignment) {
-    if (Alignment == 0) Alignment = 1;
-
-    if(Alignment <= (uintptr_t)(BufferEnd-CurBufferPtr)) {
-      // Move the current buffer ptr up to the specified alignment.
-      CurBufferPtr =
-        (uint8_t*)(((uintptr_t)CurBufferPtr+Alignment-1) &
-                   ~(uintptr_t)(Alignment-1));
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-  
-
-  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
-  /// written to the output stream.
-  void emitULEB128Bytes(uint64_t Value) {
-    do {
-      uint8_t Byte = Value & 0x7f;
-      Value >>= 7;
-      if (Value) Byte |= 0x80;
-      emitByte(Byte);
-    } while (Value);
-  }
-  
-  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
-  /// written to the output stream.
-  void emitSLEB128Bytes(uint64_t Value) {
-    uint64_t Sign = Value >> (8 * sizeof(Value) - 1);
-    bool IsMore;
-  
-    do {
-      uint8_t Byte = Value & 0x7f;
-      Value >>= 7;
-      IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
-      if (IsMore) Byte |= 0x80;
-      emitByte(Byte);
-    } while (IsMore);
-  }
-
-  /// emitString - This callback is invoked when a String needs to be
-  /// written to the output stream.
-  void emitString(const std::string &String) {
-    for (unsigned i = 0, N = static_cast<unsigned>(String.size());
-         i < N; ++i) {
-      uint8_t C = String[i];
-      emitByte(C);
-    }
-    emitByte(0);
-  }
-  
-  /// emitInt32 - Emit a int32 directive.
-  void emitInt32(int32_t Value) {
-    if (4 <= BufferEnd-CurBufferPtr) {
-      *((uint32_t*)CurBufferPtr) = Value;
-      CurBufferPtr += 4;
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-
-  /// emitInt64 - Emit a int64 directive.
-  void emitInt64(uint64_t Value) {
-    if (8 <= BufferEnd-CurBufferPtr) {
-      *((uint64_t*)CurBufferPtr) = Value;
-      CurBufferPtr += 8;
-    } else {
-      CurBufferPtr = BufferEnd;
-    }
-  }
-  
-  /// emitInt32At - Emit the Int32 Value in Addr.
-  void emitInt32At(uintptr_t *Addr, uintptr_t Value) {
-    if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd)
-      (*(uint32_t*)Addr) = (uint32_t)Value;
-  }
-  
-  /// emitInt64At - Emit the Int64 Value in Addr.
-  void emitInt64At(uintptr_t *Addr, uintptr_t Value) {
-    if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd)
-      (*(uint64_t*)Addr) = (uint64_t)Value;
-  }
-  
-  /// processDebugLoc - Records debug location information about a
-  /// MachineInstruction.  This is called before emitting any bytes associated
-  /// with the instruction.  Even if successive instructions have the same debug
-  /// location, this method will be called for each one.
-  virtual void processDebugLoc(DebugLoc DL, bool BeforePrintintInsn) {}
-
-  /// emitLabel - Emits a label
-  virtual void emitLabel(MCSymbol *Label) = 0;
-
-  /// allocateSpace - Allocate a block of space in the current output buffer,
-  /// returning null (and setting conditions to indicate buffer overflow) on
-  /// failure.  Alignment is the alignment in bytes of the buffer desired.
-  virtual void *allocateSpace(uintptr_t Size, unsigned Alignment) {
-    emitAlignment(Alignment);
-    void *Result;
-    
-    // Check for buffer overflow.
-    if (Size >= (uintptr_t)(BufferEnd-CurBufferPtr)) {
-      CurBufferPtr = BufferEnd;
-      Result = nullptr;
-    } else {
-      // Allocate the space.
-      Result = CurBufferPtr;
-      CurBufferPtr += Size;
-    }
-    
-    return Result;
-  }
-
-  /// StartMachineBasicBlock - This should be called by the target when a new
-  /// basic block is about to be emitted.  This way the MCE knows where the
-  /// start of the block is, and can implement getMachineBasicBlockAddress.
-  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) = 0;
-  
-  /// getCurrentPCValue - This returns the address that the next emitted byte
-  /// will be output to.
-  ///
-  virtual uintptr_t getCurrentPCValue() const {
-    return (uintptr_t)CurBufferPtr;
-  }
-
-  /// getCurrentPCOffset - Return the offset from the start of the emitted
-  /// buffer that we are currently writing to.
-  virtual uintptr_t getCurrentPCOffset() const {
-    return CurBufferPtr-BufferBegin;
-  }
-
-  /// earlyResolveAddresses - True if the code emitter can use symbol addresses 
-  /// during code emission time. The JIT is capable of doing this because it
-  /// creates jump tables or constant pools in memory on the fly while the
-  /// object code emitters rely on a linker to have real addresses and should
-  /// use relocations instead.
-  virtual bool earlyResolveAddresses() const = 0;
-
-  /// addRelocation - Whenever a relocatable address is needed, it should be
-  /// noted with this interface.
-  virtual void addRelocation(const MachineRelocation &MR) = 0;
-  
-  /// FIXME: These should all be handled with relocations!
-  
-  /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
-  /// the constant pool that was last emitted with the emitConstantPool method.
-  ///
-  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const = 0;
-
-  /// getJumpTableEntryAddress - Return the address of the jump table with index
-  /// 'Index' in the function that last called initJumpTableInfo.
-  ///
-  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const = 0;
-  
-  /// getMachineBasicBlockAddress - Return the address of the specified
-  /// MachineBasicBlock, only usable after the label for the MBB has been
-  /// emitted.
-  ///
-  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const= 0;
-
-  /// getLabelAddress - Return the address of the specified Label, only usable
-  /// after the LabelID has been emitted.
-  ///
-  virtual uintptr_t getLabelAddress(MCSymbol *Label) const = 0;
-  
-  /// Specifies the MachineModuleInfo object. This is used for exception handling
-  /// purposes.
-  virtual void setModuleInfo(MachineModuleInfo* Info) = 0;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/CodeGen/MachineCodeInfo.h b/include/llvm/CodeGen/MachineCodeInfo.h
deleted file mode 100644
index 820bc87425b9..000000000000
--- a/include/llvm/CodeGen/MachineCodeInfo.h
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- MachineCodeInfo.h - Class used to report JIT info -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines MachineCodeInfo, a class used by the JIT ExecutionEngine
-// to report information about the generated machine code.
-//
-// See JIT::runJITOnFunction for usage.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_MACHINECODEINFO_H
-#define LLVM_CODEGEN_MACHINECODEINFO_H
-
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-class MachineCodeInfo {
-private:
-  size_t Size;   // Number of bytes in memory used
-  void *Address; // The address of the function in memory
-
-public:
-  MachineCodeInfo() : Size(0), Address(nullptr) {}
-
-  void setSize(size_t s) {
-    Size = s;
-  }
-
-  void setAddress(void *a) {
-    Address = a;
-  }
-
-  size_t size() const {
-    return Size;
-  }
-
-  void *address() const {
-    return Address;
-  }
-
-};
-
-}
-
-#endif
-
diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h
new file mode 100644
index 000000000000..176af14dc317
--- /dev/null
+++ b/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -0,0 +1,29 @@
+//===-- llvm/CodeGen/MachineCombinerPattern.h - Instruction pattern supported by
+// combiner  ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines instruction pattern supported by combiner
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINECOMBINERPATTERN_H
+#define LLVM_CODEGEN_MACHINECOMBINERPATTERN_H
+
+namespace llvm {
+
+/// Enumeration of instruction pattern supported by machine combiner
+///
+///
+namespace MachineCombinerPattern {
+// Forward declaration
+enum MC_PATTERN : int;
+} // end namespace MachineCombinerPattern
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index f1ae0bf5f9cf..a6980a6daeac 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CODEGEN_MACHINEDOMINATORS_H
 #define LLVM_CODEGEN_MACHINEDOMINATORS_H
 
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -38,6 +39,103 @@ typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
 /// compute a normal dominator tree.
 ///
 class MachineDominatorTree : public MachineFunctionPass {
+  /// \brief Helper structure used to hold all the basic blocks
+  /// involved in the split of a critical edge.
+  struct CriticalEdge {
+    MachineBasicBlock *FromBB;
+    MachineBasicBlock *ToBB;
+    MachineBasicBlock *NewBB;
+    CriticalEdge(MachineBasicBlock *FromBB, MachineBasicBlock *ToBB,
+                 MachineBasicBlock *NewBB)
+        : FromBB(FromBB), ToBB(ToBB), NewBB(NewBB) {}
+  };
+
+  /// \brief Pile up all the critical edges to be split.
+  /// The splitting of a critical edge is local and thus, it is possible
+  /// to apply several of those changes at the same time.
+  mutable SmallVector<CriticalEdge, 32> CriticalEdgesToSplit;
+  /// \brief Remember all the basic blocks that are inserted during
+  /// edge splitting.
+  /// Invariant: NewBBs == all the basic blocks contained in the NewBB
+  /// field of all the elements of CriticalEdgesToSplit.
+  /// I.e., forall elt in CriticalEdgesToSplit, it exists BB in NewBBs
+  /// such as BB == elt.NewBB.
+  mutable SmallSet<MachineBasicBlock *, 32> NewBBs;
+
+  /// \brief Apply all the recorded critical edges to the DT.
+  /// This updates the underlying DT information in a way that uses
+  /// the fast query path of DT as much as possible.
+  ///
+  /// \post CriticalEdgesToSplit.empty().
+  void applySplitCriticalEdges() const {
+    // Bail out early if there is nothing to do.
+    if (CriticalEdgesToSplit.empty())
+      return;
+
+    // For each element in CriticalEdgesToSplit, remember whether or
+    // not element is the new immediate domminator of its successor.
+    // The mapping is done by index, i.e., the information for the ith
+    // element of CriticalEdgesToSplit is the ith element of IsNewIDom.
+    SmallVector<bool, 32> IsNewIDom;
+    IsNewIDom.resize(CriticalEdgesToSplit.size());
+    size_t Idx = 0;
+
+    // Collect all the dominance properties info, before invalidating
+    // the underlying DT.
+    for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+      // Update dominator information.
+      MachineBasicBlock *Succ = Edge.ToBB;
+      MachineDomTreeNode *SucccDTNode = DT->getNode(Succ);
+
+      IsNewIDom[Idx] = true;
+      for (MachineBasicBlock *PredBB : Succ->predecessors()) {
+        if (PredBB == Edge.NewBB)
+          continue;
+        // If we are in this situation:
+        // FromBB1        FromBB2
+        //    +              +
+        //   + +            + +
+        //  +   +          +   +
+        // ...  Split1  Split2 ...
+        //           +   +
+        //            + +
+        //             +
+        //            Succ
+        // Instead of checking the domiance property with Split2, we
+        // check it with FromBB2 since Split2 is still unknown of the
+        // underlying DT structure.
+        if (NewBBs.count(PredBB)) {
+          assert(PredBB->pred_size() == 1 && "A basic block resulting from a "
+                                             "critical edge split has more "
+                                             "than one predecessor!");
+          PredBB = *PredBB->pred_begin();
+        }
+        if (!DT->dominates(SucccDTNode, DT->getNode(PredBB))) {
+          IsNewIDom[Idx] = false;
+          break;
+        }
+      }
+      ++Idx;
+    }
+
+    // Now, update DT with the collected dominance properties info.
+    Idx = 0;
+    for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+      // We know FromBB dominates NewBB.
+      MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB);
+      MachineDomTreeNode *SucccDTNode = DT->getNode(Edge.ToBB);
+
+      // If all the other predecessors of "Succ" are dominated by "Succ" itself
+      // then the new block is the new immediate dominator of "Succ". Otherwise,
+      // the new block doesn't dominate anything.
+      if (IsNewIDom[Idx])
+        DT->changeImmediateDominator(SucccDTNode, NewDTNode);
+      ++Idx;
+    }
+    NewBBs.clear();
+    CriticalEdgesToSplit.clear();
+  }
+
 public:
   static char ID; // Pass ID, replacement for typeid
   DominatorTreeBase<MachineBasicBlock>* DT;
@@ -46,7 +144,10 @@ public:
 
   ~MachineDominatorTree();
 
-  DominatorTreeBase<MachineBasicBlock>& getBase() { return *DT; }
+  DominatorTreeBase<MachineBasicBlock> &getBase() {
+    applySplitCriticalEdges();
+    return *DT;
+  }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
@@ -55,14 +156,17 @@ public:
   /// dominators, this will always be a single block (the entry node).
   ///
   inline const std::vector<MachineBasicBlock*> &getRoots() const {
+    applySplitCriticalEdges();
     return DT->getRoots();
   }
 
   inline MachineBasicBlock *getRoot() const {
+    applySplitCriticalEdges();
     return DT->getRoot();
   }
 
   inline MachineDomTreeNode *getRootNode() const {
+    applySplitCriticalEdges();
     return DT->getRootNode();
   }
 
@@ -70,17 +174,20 @@ public:
 
   inline bool dominates(const MachineDomTreeNode* A,
                         const MachineDomTreeNode* B) const {
+    applySplitCriticalEdges();
     return DT->dominates(A, B);
   }
 
   inline bool dominates(const MachineBasicBlock* A,
                         const MachineBasicBlock* B) const {
+    applySplitCriticalEdges();
     return DT->dominates(A, B);
   }
 
   // dominates - Return true if A dominates B. This performs the
   // special checks necessary if A and B are in the same basic block.
   bool dominates(const MachineInstr *A, const MachineInstr *B) const {
+    applySplitCriticalEdges();
     const MachineBasicBlock *BBA = A->getParent(), *BBB = B->getParent();
     if (BBA != BBB) return DT->dominates(BBA, BBB);
 
@@ -100,11 +207,13 @@ public:
 
   inline bool properlyDominates(const MachineDomTreeNode* A,
                                 const MachineDomTreeNode* B) const {
+    applySplitCriticalEdges();
     return DT->properlyDominates(A, B);
   }
 
   inline bool properlyDominates(const MachineBasicBlock* A,
                                 const MachineBasicBlock* B) const {
+    applySplitCriticalEdges();
     return DT->properlyDominates(A, B);
   }
 
@@ -112,10 +221,12 @@ public:
   /// for basic block A and B. If there is no such block then return NULL.
   inline MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A,
                                                        MachineBasicBlock *B) {
+    applySplitCriticalEdges();
     return DT->findNearestCommonDominator(A, B);
   }
 
   inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+    applySplitCriticalEdges();
     return DT->getNode(BB);
   }
 
@@ -123,6 +234,7 @@ public:
   /// block.  This is the same as using operator[] on this class.
   ///
   inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+    applySplitCriticalEdges();
     return DT->getNode(BB);
   }
 
@@ -131,6 +243,7 @@ public:
   /// the children list of the immediate dominator.
   inline MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB,
                                          MachineBasicBlock *DomBB) {
+    applySplitCriticalEdges();
     return DT->addNewBlock(BB, DomBB);
   }
 
@@ -139,11 +252,13 @@ public:
   ///
   inline void changeImmediateDominator(MachineBasicBlock *N,
                                        MachineBasicBlock* NewIDom) {
+    applySplitCriticalEdges();
     DT->changeImmediateDominator(N, NewIDom);
   }
 
   inline void changeImmediateDominator(MachineDomTreeNode *N,
                                        MachineDomTreeNode* NewIDom) {
+    applySplitCriticalEdges();
     DT->changeImmediateDominator(N, NewIDom);
   }
 
@@ -151,24 +266,49 @@ public:
   /// dominate any other blocks. Removes node from its immediate dominator's
   /// children list. Deletes dominator node associated with basic block BB.
   inline void eraseNode(MachineBasicBlock *BB) {
+    applySplitCriticalEdges();
     DT->eraseNode(BB);
   }
 
   /// splitBlock - BB is split and now it has one successor. Update dominator
   /// tree to reflect this change.
   inline void splitBlock(MachineBasicBlock* NewBB) {
+    applySplitCriticalEdges();
     DT->splitBlock(NewBB);
   }
 
   /// isReachableFromEntry - Return true if A is dominated by the entry
   /// block of the function containing it.
   bool isReachableFromEntry(const MachineBasicBlock *A) {
+    applySplitCriticalEdges();
     return DT->isReachableFromEntry(A);
   }
 
   void releaseMemory() override;
 
   void print(raw_ostream &OS, const Module*) const override;
+
+  /// \brief Record that the critical edge (FromBB, ToBB) has been
+  /// split with NewBB.
+  /// This is best to use this method instead of directly update the
+  /// underlying information, because this helps mitigating the
+  /// number of time the DT information is invalidated.
+  ///
+  /// \note Do not use this method with regular edges.
+  ///
+  /// \note To benefit from the compile time improvement incurred by this
+  /// method, the users of this method have to limit the queries to the DT
+  /// interface between two edges splitting. In other words, they have to
+  /// pack the splitting of critical edges as much as possible.
+  void recordSplitCriticalEdge(MachineBasicBlock *FromBB,
+                              MachineBasicBlock *ToBB,
+                              MachineBasicBlock *NewBB) {
+    bool Inserted = NewBBs.insert(NewBB).second;
+    (void)Inserted;
+    assert(Inserted &&
+           "A basic block inserted via edge splitting cannot appear twice");
+    CriticalEdgesToSplit.push_back(CriticalEdge(FromBB, ToBB, NewBB));
+  }
 };
 
 //===-------------------------------------
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index c51f8fe03bbf..667736021f92 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -109,13 +109,23 @@ class MachineFrameInfo {
     // block and doesn't need additional handling for allocation beyond that.
     bool PreAllocated;
 
+    // If true, an LLVM IR value might point to this object.
+    // Normally, spill slots and fixed-offset objects don't alias IR-accessible
+    // objects, but there are exceptions (on PowerPC, for example, some byval
+    // arguments have ABI-prescribed offsets).
+    bool isAliased;
+
     StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
-                bool isSS, const AllocaInst *Val)
+                bool isSS, const AllocaInst *Val, bool A)
       : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
-        isSpillSlot(isSS), Alloca(Val), PreAllocated(false) {}
+        isSpillSlot(isSS), Alloca(Val), PreAllocated(false), isAliased(A) {}
   };
 
-  const TargetMachine &TM;
+  /// StackAlignment - The alignment of the stack.
+  unsigned StackAlignment;
+
+  /// StackRealignable - Can the stack be realigned.
+  bool StackRealignable;
 
   /// Objects - The list of stack objects allocated...
   ///
@@ -230,10 +240,17 @@ class MachineFrameInfo {
   /// pointer.
   bool HasInlineAsmWithSPAdjust;
 
-  const TargetFrameLowering *getFrameLowering() const;
+  /// True if the function contains a call to the llvm.vastart intrinsic.
+  bool HasVAStart;
+
+  /// True if this is a varargs function that contains a musttail call.
+  bool HasMustTailInVarArgFunc;
+
 public:
-    explicit MachineFrameInfo(const TargetMachine &TM, bool RealignOpt)
-    : TM(TM), RealignOption(RealignOpt) {
+  explicit MachineFrameInfo(unsigned StackAlign, bool isStackRealign,
+                            bool RealignOpt)
+      : StackAlignment(StackAlign), StackRealignable(isStackRealign),
+        RealignOption(RealignOpt) {
     StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0;
     HasVarSizedObjects = false;
     FrameAddressTaken = false;
@@ -250,6 +267,8 @@ public:
     LocalFrameMaxAlign = 0;
     UseLocalStackAllocationBlock = false;
     HasInlineAsmWithSPAdjust = false;
+    HasVAStart = false;
+    HasMustTailInVarArgFunc = false;
   }
 
   /// hasStackObjects - Return true if there are any stack objects in this
@@ -469,6 +488,14 @@ public:
   bool hasInlineAsmWithSPAdjust() const { return HasInlineAsmWithSPAdjust; }
   void setHasInlineAsmWithSPAdjust(bool B) { HasInlineAsmWithSPAdjust = B; }
 
+  /// Returns true if the function calls the llvm.va_start intrinsic.
+  bool hasVAStart() const { return HasVAStart; }
+  void setHasVAStart(bool B) { HasVAStart = B; }
+
+  /// Returns true if the function is variadic and contains a musttail call.
+  bool hasMustTailInVarArgFunc() const { return HasMustTailInVarArgFunc; }
+  void setHasMustTailInVarArgFunc(bool B) { HasMustTailInVarArgFunc = B; }
+
   /// getMaxCallFrameSize - Return the maximum size of a call frame that must be
   /// allocated for an outgoing function call.  This is only available if
   /// CallFrameSetup/Destroy pseudo instructions are used by the target, and
@@ -479,21 +506,34 @@ public:
 
   /// CreateFixedObject - Create a new object at a fixed location on the stack.
   /// All fixed objects should be created before other objects are created for
-  /// efficiency. By default, fixed objects are immutable. This returns an
-  /// index with a negative value.
+  /// efficiency. By default, fixed objects are not pointed to by LLVM IR
+  /// values. This returns an index with a negative value.
   ///
-  int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable);
+  int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable,
+                        bool isAliased = false);
 
   /// CreateFixedSpillStackObject - Create a spill slot at a fixed location
   /// on the stack.  Returns an index with a negative value.
   int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset);
 
+  /// Allocates memory at a fixed, target-specific offset from the frame
+  /// pointer. Marks the function as having its frame address taken.
+  int CreateFrameAllocation(uint64_t Size);
+
   /// isFixedObjectIndex - Returns true if the specified index corresponds to a
   /// fixed stack object.
   bool isFixedObjectIndex(int ObjectIdx) const {
     return ObjectIdx < 0 && (ObjectIdx >= -(int)NumFixedObjects);
   }
 
+  /// isAliasedObjectIndex - Returns true if the specified index corresponds
+  /// to an object that might be pointed to by an LLVM IR value.
+  bool isAliasedObjectIndex(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].isAliased;
+  }
+
   /// isImmutableObjectIndex - Returns true if the specified index corresponds
   /// to an immutable object.
   bool isImmutableObjectIndex(int ObjectIdx) const {
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 042c62b4a887..4e9ff9ebb4fe 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/ilist.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ArrayRecycler.h"
 #include "llvm/Support/Recycler.h"
@@ -38,6 +39,7 @@ class MachineModuleInfo;
 class MCContext;
 class Pass;
 class TargetMachine;
+class TargetSubtargetInfo;
 class TargetRegisterClass;
 struct MachinePointerInfo;
 
@@ -70,15 +72,24 @@ private:
 /// MachineFunction is destroyed.
 struct MachineFunctionInfo {
   virtual ~MachineFunctionInfo();
+
+  /// \brief Factory function: default behavior is to call new using the
+  /// supplied allocator.
+  ///
+  /// This function can be overridden in a derive class.
+  template<typename Ty>
+  static Ty *create(BumpPtrAllocator &Allocator, MachineFunction &MF) {
+    return new (Allocator.Allocate<Ty>()) Ty(MF);
+  }
 };
 
 class MachineFunction {
   const Function *Fn;
   const TargetMachine &Target;
+  const TargetSubtargetInfo *STI;
   MCContext &Ctx;
   MachineModuleInfo &MMI;
-  GCModuleInfo *GMI;
-  
+
   // RegInfo - Information about each register in use in the function.
   MachineRegisterInfo *RegInfo;
 
@@ -138,12 +149,10 @@ class MachineFunction {
   void operator=(const MachineFunction&) LLVM_DELETED_FUNCTION;
 public:
   MachineFunction(const Function *Fn, const TargetMachine &TM,
-                  unsigned FunctionNum, MachineModuleInfo &MMI,
-                  GCModuleInfo* GMI);
+                  unsigned FunctionNum, MachineModuleInfo &MMI);
   ~MachineFunction();
 
   MachineModuleInfo &getMMI() const { return MMI; }
-  GCModuleInfo *getGMI() const { return GMI; }
   MCContext &getContext() const { return Ctx; }
 
   /// getFunction - Return the LLVM function that this machine code represents
@@ -162,6 +171,11 @@ public:
   ///
   const TargetMachine &getTarget() const { return Target; }
 
+  /// getSubtarget - Return the subtarget for which this machine code is being
+  /// compiled.
+  const TargetSubtargetInfo &getSubtarget() const { return *STI; }
+  void setSubtarget(const TargetSubtargetInfo *ST) { STI = ST; }
+
   /// getRegInfo - Return information about the registers currently in use.
   ///
   MachineRegisterInfo &getRegInfo() { return *RegInfo; }
@@ -234,7 +248,7 @@ public:
   template<typename Ty>
   Ty *getInfo() {
     if (!MFInfo)
-      MFInfo = new (Allocator.Allocate<Ty>()) Ty(*this);
+      MFInfo = Ty::template create<Ty>(Allocator, *this);
     return static_cast<Ty*>(MFInfo);
   }
 
@@ -399,7 +413,7 @@ public:
   MachineMemOperand *getMachineMemOperand(MachinePointerInfo PtrInfo,
                                           unsigned f, uint64_t s,
                                           unsigned base_alignment,
-                                          const MDNode *TBAAInfo = nullptr,
+                                          const AAMDNodes &AAInfo = AAMDNodes(),
                                           const MDNode *Ranges = nullptr);
   
   /// getMachineMemOperand - Allocate a new MachineMemOperand by copying
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 3c828116411e..bcf1f5caaa8c 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -244,12 +244,22 @@ public:
   ///
   DebugLoc getDebugLoc() const { return debugLoc; }
 
-  /// getDebugVariable() - Return the debug variable referenced by
+  /// \brief Return the debug variable referenced by
   /// this DBG_VALUE instruction.
   DIVariable getDebugVariable() const {
     assert(isDebugValue() && "not a DBG_VALUE");
-    const MDNode *Var = getOperand(getNumOperands() - 1).getMetadata();
-    return DIVariable(Var);
+    DIVariable Var(getOperand(2).getMetadata());
+    assert(Var.Verify() && "not a DIVariable");
+    return Var;
+  }
+
+  /// \brief Return the complex address expression referenced by
+  /// this DBG_VALUE instruction.
+  DIExpression getDebugExpression() const {
+    assert(isDebugValue() && "not a DBG_VALUE");
+    DIExpression Expr(getOperand(3).getMetadata());
+    assert(Expr.Verify() && "not a DIExpression");
+    return Expr;
   }
 
   /// emitError - Emit an error referring to the source location of this
@@ -510,6 +520,49 @@ public:
     return hasProperty(MCID::FoldableAsLoad, Type);
   }
 
+  /// \brief Return true if this instruction behaves
+  /// the same way as the generic REG_SEQUENCE instructions.
+  /// E.g., on ARM,
+  /// dX VMOVDRR rY, rZ
+  /// is equivalent to
+  /// dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1.
+  ///
+  /// Note that for the optimizers to be able to take advantage of
+  /// this property, TargetInstrInfo::getRegSequenceLikeInputs has to be
+  /// override accordingly.
+  bool isRegSequenceLike(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::RegSequence, Type);
+  }
+
+  /// \brief Return true if this instruction behaves
+  /// the same way as the generic EXTRACT_SUBREG instructions.
+  /// E.g., on ARM,
+  /// rX, rY VMOVRRD dZ
+  /// is equivalent to two EXTRACT_SUBREG:
+  /// rX = EXTRACT_SUBREG dZ, ssub_0
+  /// rY = EXTRACT_SUBREG dZ, ssub_1
+  ///
+  /// Note that for the optimizers to be able to take advantage of
+  /// this property, TargetInstrInfo::getExtractSubregLikeInputs has to be
+  /// override accordingly.
+  bool isExtractSubregLike(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::ExtractSubreg, Type);
+  }
+
+  /// \brief Return true if this instruction behaves
+  /// the same way as the generic INSERT_SUBREG instructions.
+  /// E.g., on ARM,
+  /// dX = VSETLNi32 dY, rZ, Imm
+  /// is equivalent to a INSERT_SUBREG:
+  /// dX = INSERT_SUBREG dY, rZ, translateImmToSubIdx(Imm)
+  ///
+  /// Note that for the optimizers to be able to take advantage of
+  /// this property, TargetInstrInfo::getInsertSubregLikeInputs has to be
+  /// override accordingly.
+  bool isInsertSubregLike(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::InsertSubreg, Type);
+  }
+
   //===--------------------------------------------------------------------===//
   // Side Effect Analysis
   //===--------------------------------------------------------------------===//
@@ -614,7 +667,6 @@ public:
   /// are not marking copies from and to the same register class with this flag.
   bool isAsCheapAsAMove(QueryType Type = AllInBundle) const {
     // Only returns true for a bundle if all bundled instructions are cheap.
-    // FIXME: This probably requires a target hook.
     return hasProperty(MCID::CheapAsAMove, Type);
   }
 
@@ -672,6 +724,12 @@ public:
   /// eraseFromBundle() to erase individual bundled instructions.
   void eraseFromParent();
 
+  /// Unlink 'this' from the containing basic block and delete it.
+  ///
+  /// For all definitions mark their uses in DBG_VALUE nodes
+  /// as undefined. Otherwise like eraseFromParent().
+  void eraseFromParentAndMarkDBGValuesForRemoval();
+
   /// Unlink 'this' form its basic block and delete it.
   ///
   /// If the instruction is part of a bundle, the other instructions in the
@@ -1081,7 +1139,10 @@ public:
   /// setDebugLoc - Replace current source information with new such.
   /// Avoid using this, the constructor argument is preferable.
   ///
-  void setDebugLoc(const DebugLoc dl) { debugLoc = dl; }
+  void setDebugLoc(const DebugLoc dl) {
+    debugLoc = dl;
+    assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+  }
 
   /// RemoveOperand - Erase an operand  from an instruction, leaving it with one
   /// fewer operand than it started with.
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 21a482cdbd4c..8859b6a019ea 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -58,6 +58,10 @@ public:
   MachineInstr *operator->() const { return MI; }
   operator MachineBasicBlock::iterator() const { return MI; }
 
+  /// If conversion operators fail, use this method to get the MachineInstr
+  /// explicitly.
+  MachineInstr *getInstr() const { return MI; }
+
   /// addReg - Add a new virtual register operand...
   ///
   const
@@ -170,6 +174,8 @@ public:
 
   const MachineInstrBuilder &addMetadata(const MDNode *MD) const {
     MI->addOperand(*MF, MachineOperand::CreateMetadata(MD));
+    assert((MI->isDebugValue() ? MI->getDebugVariable().Verify() : true) &&
+           "first MDNode argument of a DBG_VALUE not a DIVariable");
     return *this;
   }
 
@@ -345,24 +351,25 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
 /// address.  The convention is that a DBG_VALUE is indirect iff the
 /// second operand is an immediate.
 ///
-inline MachineInstrBuilder BuildMI(MachineFunction &MF,
-                                   DebugLoc DL,
-                                   const MCInstrDesc &MCID,
-                                   bool IsIndirect,
-                                   unsigned Reg,
-                                   unsigned Offset,
-                                   const MDNode *MD) {
+inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL,
+                                   const MCInstrDesc &MCID, bool IsIndirect,
+                                   unsigned Reg, unsigned Offset,
+                                   const MDNode *Variable, const MDNode *Expr) {
+  assert(DIVariable(Variable).Verify() && "not a DIVariable");
+  assert(DIExpression(Expr).Verify() && "not a DIExpression");
   if (IsIndirect)
     return BuildMI(MF, DL, MCID)
-      .addReg(Reg, RegState::Debug)
-      .addImm(Offset)
-      .addMetadata(MD);
+        .addReg(Reg, RegState::Debug)
+        .addImm(Offset)
+        .addMetadata(Variable)
+        .addMetadata(Expr);
   else {
     assert(Offset == 0 && "A direct address cannot have an offset.");
     return BuildMI(MF, DL, MCID)
-      .addReg(Reg, RegState::Debug)
-      .addReg(0U, RegState::Debug)
-      .addMetadata(MD);
+        .addReg(Reg, RegState::Debug)
+        .addReg(0U, RegState::Debug)
+        .addMetadata(Variable)
+        .addMetadata(Expr);
   }
 }
 
@@ -371,15 +378,15 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF,
 /// address and inserts it at position I.
 ///
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
-                                   MachineBasicBlock::iterator I,
-                                   DebugLoc DL,
-                                   const MCInstrDesc &MCID,
-                                   bool IsIndirect,
-                                   unsigned Reg,
-                                   unsigned Offset,
-                                   const MDNode *MD) {
+                                   MachineBasicBlock::iterator I, DebugLoc DL,
+                                   const MCInstrDesc &MCID, bool IsIndirect,
+                                   unsigned Reg, unsigned Offset,
+                                   const MDNode *Variable, const MDNode *Expr) {
+  assert(DIVariable(Variable).Verify() && "not a DIVariable");
+  assert(DIExpression(Expr).Verify() && "not a DIExpression");
   MachineFunction &MF = *BB.getParent();
-  MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Offset, MD);
+  MachineInstr *MI =
+      BuildMI(MF, DL, MCID, IsIndirect, Reg, Offset, Variable, Expr);
   BB.insert(I, MI);
   return MachineInstrBuilder(MF, MI);
 }
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index 2532c16271f0..eb5086cbe5a5 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Value.h"  // PointerLikeTypeTraits<Value*>
 #include "llvm/Support/DataTypes.h"
 
@@ -91,7 +92,7 @@ class MachineMemOperand {
   MachinePointerInfo PtrInfo;
   uint64_t Size;
   unsigned Flags;
-  const MDNode *TBAAInfo;
+  AAMDNodes AAInfo;
   const MDNode *Ranges;
 
 public:
@@ -117,7 +118,8 @@ public:
   /// MachineMemOperand - Construct an MachineMemOperand object with the
   /// specified PtrInfo, flags, size, and base alignment.
   MachineMemOperand(MachinePointerInfo PtrInfo, unsigned flags, uint64_t s,
-                    unsigned base_alignment, const MDNode *TBAAInfo = nullptr,
+                    unsigned base_alignment,
+                    const AAMDNodes &AAInfo = AAMDNodes(),
                     const MDNode *Ranges = nullptr);
 
   const MachinePointerInfo &getPointerInfo() const { return PtrInfo; }
@@ -161,8 +163,8 @@ public:
   /// base address, without the offset.
   uint64_t getBaseAlignment() const { return (1u << (Flags >> MOMaxBits)) >> 1; }
 
-  /// getTBAAInfo - Return the TBAA tag for the memory reference.
-  const MDNode *getTBAAInfo() const { return TBAAInfo; }
+  /// getAAInfo - Return the AA tags for the memory reference.
+  AAMDNodes getAAInfo() const { return AAInfo; }
 
   /// getRanges - Return the range tag for the memory reference.
   const MDNode *getRanges() const { return Ranges; }
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 6d8d05684c56..f0d0b2dbcdbc 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -66,6 +66,7 @@ struct LandingPadInfo {
   MachineBasicBlock *LandingPadBlock;    // Landing pad block.
   SmallVector<MCSymbol*, 1> BeginLabels; // Labels prior to invoke.
   SmallVector<MCSymbol*, 1> EndLabels;   // Labels after invoke.
+  SmallVector<MCSymbol*, 1> ClauseLabels; // Labels for each clause.
   MCSymbol *LandingPadLabel;             // Label at beginning of landing pad.
   const Function *Personality;           // Personality function.
   std::vector<int> TypeIds;              // List of type ids (filters negative)
@@ -110,10 +111,6 @@ class MachineModuleInfo : public ImmutablePass {
   /// by debug and exception handling consumers.
   std::vector<MCCFIInstruction> FrameInstructions;
 
-  /// CompactUnwindEncoding - If the target supports it, this is the compact
-  /// unwind encoding. It replaces a function's CIE and FDE.
-  uint32_t CompactUnwindEncoding;
-
   /// LandingPads - List of LandingPadInfo describing the landing pad
   /// information in the current function.
   std::vector<LandingPadInfo> LandingPads;
@@ -131,7 +128,7 @@ class MachineModuleInfo : public ImmutablePass {
   unsigned CurCallSite;
 
   /// TypeInfos - List of C++ TypeInfo used in the current function.
-  std::vector<const GlobalVariable *> TypeInfos;
+  std::vector<const GlobalValue *> TypeInfos;
 
   /// FilterIds - List of typeids encoding filters used in the current function.
   std::vector<unsigned> FilterIds;
@@ -165,13 +162,24 @@ class MachineModuleInfo : public ImmutablePass {
   /// to _fltused on Windows targets.
   bool UsesVAFloatArgument;
 
+  /// UsesMorestackAddr - True if the module calls the __morestack function
+  /// indirectly, as is required under the large code model on x86. This is used
+  /// to emit a definition of a symbol, __morestack_addr, containing the
+  /// address. See comments in lib/Target/X86/X86FrameLowering.cpp for more
+  /// details.
+  bool UsesMorestackAddr;
+
 public:
   static char ID; // Pass identification, replacement for typeid
 
   struct VariableDbgInfo {
-    TrackingVH<MDNode> Var;
+    TrackingMDNodeRef Var;
+    TrackingMDNodeRef Expr;
     unsigned Slot;
     DebugLoc Loc;
+
+    VariableDbgInfo(MDNode *Var, MDNode *Expr, unsigned Slot, DebugLoc Loc)
+        : Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {}
   };
   typedef SmallVector<VariableDbgInfo, 4> VariableDbgInfoMapTy;
   VariableDbgInfoMapTy VariableDbgInfos;
@@ -234,6 +242,14 @@ public:
     UsesVAFloatArgument = b;
   }
 
+  bool usesMorestackAddr() const {
+    return UsesMorestackAddr;
+  }
+
+  void setUsesMorestackAddr(bool b) {
+    UsesMorestackAddr = b;
+  }
+
   /// \brief Returns a reference to a list of cfi instructions in the current
   /// function's prologue.  Used to construct frame maps for debug and exception
   /// handling comsumers.
@@ -247,15 +263,6 @@ public:
     return FrameInstructions.size() - 1;
   }
 
-  /// getCompactUnwindEncoding - Returns the compact unwind encoding for a
-  /// function if the target supports the encoding. This encoding replaces a
-  /// function's CIE and FDE.
-  uint32_t getCompactUnwindEncoding() const { return CompactUnwindEncoding; }
-
-  /// setCompactUnwindEncoding - Set the compact unwind encoding for a function
-  /// if the target supports the encoding.
-  void setCompactUnwindEncoding(uint32_t Enc) { CompactUnwindEncoding = Enc; }
-
   /// getAddrLabelSymbol - Return the symbol to be used for the specified basic
   /// block when its address is taken.  This cannot be its normal LBB label
   /// because the block may be accessed outside its containing function.
@@ -313,20 +320,25 @@ public:
   /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
   ///
   void addCatchTypeInfo(MachineBasicBlock *LandingPad,
-                        ArrayRef<const GlobalVariable *> TyInfo);
+                        ArrayRef<const GlobalValue *> TyInfo);
 
   /// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
   ///
   void addFilterTypeInfo(MachineBasicBlock *LandingPad,
-                         ArrayRef<const GlobalVariable *> TyInfo);
+                         ArrayRef<const GlobalValue *> TyInfo);
 
   /// addCleanup - Add a cleanup action for a landing pad.
   ///
   void addCleanup(MachineBasicBlock *LandingPad);
 
+  /// Add a clause for a landing pad. Returns a new label for the clause. This
+  /// is used by EH schemes that have more than one landing pad. In this case,
+  /// each clause gets its own basic block.
+  MCSymbol *addClauseForLandingPad(MachineBasicBlock *LandingPad);
+
   /// getTypeIDFor - Return the type id for the specified typeinfo.  This is
   /// function wide.
-  unsigned getTypeIDFor(const GlobalVariable *TI);
+  unsigned getTypeIDFor(const GlobalValue *TI);
 
   /// getFilterIDFor - Return the id of the filter encoded by TyIds.  This is
   /// function wide.
@@ -387,7 +399,7 @@ public:
 
   /// getTypeInfos - Return a reference to the C++ typeinfo for the current
   /// function.
-  const std::vector<const GlobalVariable *> &getTypeInfos() const {
+  const std::vector<const GlobalValue *> &getTypeInfos() const {
     return TypeInfos;
   }
 
@@ -403,9 +415,9 @@ public:
 
   /// setVariableDbgInfo - Collect information used to emit debugging
   /// information of a variable.
-  void setVariableDbgInfo(MDNode *N, unsigned Slot, DebugLoc Loc) {
-    VariableDbgInfo Info = { N, Slot, Loc };
-    VariableDbgInfos.push_back(std::move(Info));
+  void setVariableDbgInfo(MDNode *Var, MDNode *Expr, unsigned Slot,
+                          DebugLoc Loc) {
+    VariableDbgInfos.emplace_back(Var, Expr, Slot, Loc);
   }
 
   VariableDbgInfoMapTy &getVariableDbgInfo() { return VariableDbgInfos; }
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 22969bc80776..eed1e575f93b 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -506,6 +506,11 @@ public:
     Contents.ImmVal = immVal;
   }
 
+  void setFPImm(const ConstantFP *CFP) {
+    assert(isFPImm() && "Wrong MachineOperand mutator");
+    Contents.CFP = CFP;
+  }
+
   void setOffset(int64_t Offset) {
     assert((isGlobal() || isSymbol() || isCPI() || isTargetIndex() ||
             isBlockAddress()) && "Wrong MachineOperand accessor");
@@ -544,6 +549,11 @@ public:
   /// the setImm method should be used.
   void ChangeToImmediate(int64_t ImmVal);
 
+  /// ChangeToFPImmediate - Replace this operand with a new FP immediate operand
+  /// of the specified value.  If an operand is known to be an FP immediate
+  /// already, the setFPImm method should be used.
+  void ChangeToFPImmediate(const ConstantFP *FPImm);
+
   /// ChangeToRegister - Replace this operand with a new register operand of
   /// the specified value.  If an operand is known to be an register already,
   /// the setReg method should be used.
@@ -702,6 +712,8 @@ public:
   friend class MachineInstr;
   friend class MachineRegisterInfo;
 private:
+  void removeRegFromUses();
+
   //===--------------------------------------------------------------------===//
   // Methods for handling register use/def lists.
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h
index beb2c4f0c5c0..aab5c407629f 100644
--- a/include/llvm/CodeGen/MachinePostDominators.h
+++ b/include/llvm/CodeGen/MachinePostDominators.h
@@ -22,7 +22,7 @@ namespace llvm {
 
 ///
 /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
-/// to compute the a post-dominator tree.
+/// to compute the post-dominator tree.
 ///
 struct MachinePostDominatorTree : public MachineFunctionPass {
 private:
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 51139f72ba22..caa48a5cf0cf 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -17,9 +17,10 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <vector>
 
 namespace llvm {
@@ -39,7 +40,7 @@ public:
   };
 
 private:
-  const TargetMachine &TM;
+  const MachineFunction *MF;
   Delegate *TheDelegate;
 
   /// IsSSA - True when the machine function is in SSA form and virtual
@@ -51,6 +52,9 @@ private:
   /// accurate when after this flag is cleared.
   bool TracksLiveness;
 
+  /// True if subregister liveness is tracked.
+  bool TracksSubRegLiveness;
+
   /// VRegInfo - Information we keep for each virtual register.
   ///
   /// Each element in this list contains the register class of the vreg and the
@@ -69,7 +73,7 @@ private:
 
   /// PhysRegUseDefLists - This is an array of the head of the use/def list for
   /// physical registers.
-  MachineOperand **PhysRegUseDefLists;
+  std::vector<MachineOperand *> PhysRegUseDefLists;
 
   /// getRegUseDefListHead - Return the head pointer for the register use/def
   /// list for the specified virtual or physical register.
@@ -122,11 +126,10 @@ private:
   MachineRegisterInfo(const MachineRegisterInfo&) LLVM_DELETED_FUNCTION;
   void operator=(const MachineRegisterInfo&) LLVM_DELETED_FUNCTION;
 public:
-  explicit MachineRegisterInfo(const TargetMachine &TM);
-  ~MachineRegisterInfo();
+  explicit MachineRegisterInfo(const MachineFunction *MF);
 
   const TargetRegisterInfo *getTargetRegisterInfo() const {
-    return TM.getRegisterInfo();
+    return MF->getSubtarget().getRegisterInfo();
   }
 
   void resetDelegate(Delegate *delegate) {
@@ -179,6 +182,12 @@ public:
   /// information.
   void invalidateLiveness() { TracksLiveness = false; }
 
+  bool tracksSubRegLiveness() const { return TracksSubRegLiveness; }
+
+  void enableSubRegLiveness(bool Enable = true) {
+    TracksSubRegLiveness = Enable;
+  }
+
   //===--------------------------------------------------------------------===//
   // Register Info
   //===--------------------------------------------------------------------===//
@@ -515,8 +524,12 @@ public:
   ///
   /// That function will return NULL if the virtual registers have incompatible
   /// constraints.
+  ///
+  /// Note that if ToReg is a physical register the function will replace and
+  /// apply sub registers to ToReg in order to obtain a final/proper physical
+  /// register.
   void replaceRegWith(unsigned FromReg, unsigned ToReg);
-
+  
   /// getVRegDef - Return the machine instr that defines the specified virtual
   /// register or null if none is found.  This assumes that the code is in SSA
   /// form, so there should only be one definition.
@@ -764,6 +777,10 @@ public:
                         const TargetRegisterInfo &TRI,
                         const TargetInstrInfo &TII);
 
+  /// Returns a mask covering all bits that can appear in lane masks of
+  /// subregisters of the virtual register @p Reg.
+  unsigned getMaxLaneMaskForVReg(unsigned Reg) const;
+
   /// defusechain_iterator - This class provides iterator support for machine
   /// operands in the function that use or define a specific register.  If
   /// ReturnUses is true it returns uses of registers, if ReturnDefs is true it
diff --git a/include/llvm/CodeGen/MachineRelocation.h b/include/llvm/CodeGen/MachineRelocation.h
deleted file mode 100644
index e77845745165..000000000000
--- a/include/llvm/CodeGen/MachineRelocation.h
+++ /dev/null
@@ -1,342 +0,0 @@
-//===-- llvm/CodeGen/MachineRelocation.h - Target Relocation ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the MachineRelocation class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_MACHINERELOCATION_H
-#define LLVM_CODEGEN_MACHINERELOCATION_H
-
-#include "llvm/Support/DataTypes.h"
-#include <cassert>
-
-namespace llvm {
-class GlobalValue;
-class MachineBasicBlock;
-
-/// MachineRelocation - This represents a target-specific relocation value,
-/// produced by the code emitter.  This relocation is resolved after the has
-/// been emitted, either to an object file or to memory, when the target of the
-/// relocation can be resolved.
-///
-/// A relocation is made up of the following logical portions:
-///   1. An offset in the machine code buffer, the location to modify.
-///   2. A target specific relocation type (a number from 0 to 63).
-///   3. A symbol being referenced, either as a GlobalValue* or as a string.
-///   4. An optional constant value to be added to the reference.
-///   5. A bit, CanRewrite, which indicates to the JIT that a function stub is
-///      not needed for the relocation.
-///   6. An index into the GOT, if the target uses a GOT
-///
-class MachineRelocation {
-  enum AddressType {
-    isResult,         // Relocation has be transformed into its result pointer.
-    isGV,             // The Target.GV field is valid.
-    isIndirectSym,    // Relocation of an indirect symbol.
-    isBB,             // Relocation of BB address.
-    isExtSym,         // The Target.ExtSym field is valid.
-    isConstPool,      // Relocation of constant pool address.
-    isJumpTable,      // Relocation of jump table address.
-    isGOTIndex        // The Target.GOTIndex field is valid.
-  };
-  
-  /// Offset - This is the offset from the start of the code buffer of the
-  /// relocation to perform.
-  uintptr_t Offset;
-  
-  /// ConstantVal - A field that may be used by the target relocation type.
-  intptr_t ConstantVal;
-
-  union {
-    void *Result;           // If this has been resolved to a resolved pointer
-    GlobalValue *GV;        // If this is a pointer to a GV or an indirect ref.
-    MachineBasicBlock *MBB; // If this is a pointer to an LLVM BB
-    const char *ExtSym;     // If this is a pointer to a named symbol
-    unsigned Index;         // Constant pool / jump table index
-    unsigned GOTIndex;      // Index in the GOT of this symbol/global
-  } Target;
-
-  unsigned TargetReloType : 6; // The target relocation ID
-  AddressType AddrType    : 4; // The field of Target to use
-  bool MayNeedFarStub     : 1; // True if this relocation may require a far-stub
-  bool GOTRelative        : 1; // Should this relocation be relative to the GOT?
-  bool TargetResolve      : 1; // True if target should resolve the address
-
-public:
- // Relocation types used in a generic implementation.  Currently, relocation
- // entries for all things use the generic VANILLA type until they are refined
- // into target relocation types.
-  enum RelocationType {
-    VANILLA
-  };
-  
-  /// MachineRelocation::getGV - Return a relocation entry for a GlobalValue.
-  ///
-  static MachineRelocation getGV(uintptr_t offset, unsigned RelocationType, 
-                                 GlobalValue *GV, intptr_t cst = 0,
-                                 bool MayNeedFarStub = 0,
-                                 bool GOTrelative = 0) {
-    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
-    MachineRelocation Result;
-    Result.Offset = offset;
-    Result.ConstantVal = cst;
-    Result.TargetReloType = RelocationType;
-    Result.AddrType = isGV;
-    Result.MayNeedFarStub = MayNeedFarStub;
-    Result.GOTRelative = GOTrelative;
-    Result.TargetResolve = false;
-    Result.Target.GV = GV;
-    return Result;
-  }
-
-  /// MachineRelocation::getIndirectSymbol - Return a relocation entry for an
-  /// indirect symbol.
-  static MachineRelocation getIndirectSymbol(uintptr_t offset,
-                                             unsigned RelocationType, 
-                                             GlobalValue *GV, intptr_t cst = 0,
-                                             bool MayNeedFarStub = 0,
-                                             bool GOTrelative = 0) {
-    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
-    MachineRelocation Result;
-    Result.Offset = offset;
-    Result.ConstantVal = cst;
-    Result.TargetReloType = RelocationType;
-    Result.AddrType = isIndirectSym;
-    Result.MayNeedFarStub = MayNeedFarStub;
-    Result.GOTRelative = GOTrelative;
-    Result.TargetResolve = false;
-    Result.Target.GV = GV;
-    return Result;
-  }
-
-  /// MachineRelocation::getBB - Return a relocation entry for a BB.
-  ///
-  static MachineRelocation getBB(uintptr_t offset,unsigned RelocationType,
-                                 MachineBasicBlock *MBB, intptr_t cst = 0) {
-    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
-    MachineRelocation Result;
-    Result.Offset = offset;
-    Result.ConstantVal = cst;
-    Result.TargetReloType = RelocationType;
-    Result.AddrType = isBB;
-    Result.MayNeedFarStub = false;
-    Result.GOTRelative = false;
-    Result.TargetResolve = false;
-    Result.Target.MBB = MBB;
-    return Result;
-  }
-
-  /// MachineRelocation::getExtSym - Return a relocation entry for an external
-  /// symbol, like "free".
-  ///
-  static MachineRelocation getExtSym(uintptr_t offset, unsigned RelocationType, 
-                                     const char *ES, intptr_t cst = 0,
-                                     bool GOTrelative = 0,
-                                     bool NeedStub = true) {
-    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
-    MachineRelocation Result;
-    Result.Offset = offset;
-    Result.ConstantVal = cst;
-    Result.TargetReloType = RelocationType;
-    Result.AddrType = isExtSym;
-    Result.MayNeedFarStub = NeedStub;
-    Result.GOTRelative = GOTrelative;
-    Result.TargetResolve = false;
-    Result.Target.ExtSym = ES;
-    return Result;
-  }
-
-  /// MachineRelocation::getConstPool - Return a relocation entry for a constant
-  /// pool entry.
-  ///
-  static MachineRelocation getConstPool(uintptr_t offset,unsigned RelocationType,
-                                        unsigned CPI, intptr_t cst = 0,
-                                        bool letTargetResolve = false) {
-    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
-    MachineRelocation Result;
-    Result.Offset = offset;
-    Result.ConstantVal = cst;
-    Result.TargetReloType = RelocationType;
-    Result.AddrType = isConstPool;
-    Result.MayNeedFarStub = false;
-    Result.GOTRelative = false;
-    Result.TargetResolve = letTargetResolve;
-    Result.Target.Index = CPI;
-    return Result;
-  }
-
-  /// MachineRelocation::getJumpTable - Return a relocation entry for a jump
-  /// table entry.
-  ///
-  static MachineRelocation getJumpTable(uintptr_t offset,unsigned RelocationType,
-                                        unsigned JTI, intptr_t cst = 0,
-                                        bool letTargetResolve = false) {
-    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
-    MachineRelocation Result;
-    Result.Offset = offset;
-    Result.ConstantVal = cst;
-    Result.TargetReloType = RelocationType;
-    Result.AddrType = isJumpTable;
-    Result.MayNeedFarStub = false;
-    Result.GOTRelative = false;
-    Result.TargetResolve = letTargetResolve;
-    Result.Target.Index = JTI;
-    return Result;
-  }
-
-  /// getMachineCodeOffset - Return the offset into the code buffer that the
-  /// relocation should be performed.
-  intptr_t getMachineCodeOffset() const {
-    return Offset;
-  }
-
-  /// getRelocationType - Return the target-specific relocation ID for this
-  /// relocation.
-  unsigned getRelocationType() const {
-    return TargetReloType;
-  }
-
-  /// getConstantVal - Get the constant value associated with this relocation.
-  /// This is often an offset from the symbol.
-  ///
-  intptr_t getConstantVal() const {
-    return ConstantVal;
-  }
-
-  /// setConstantVal - Set the constant value associated with this relocation.
-  /// This is often an offset from the symbol.
-  ///
-  void setConstantVal(intptr_t val) {
-    ConstantVal = val;
-  }
-
-  /// isGlobalValue - Return true if this relocation is a GlobalValue, as
-  /// opposed to a constant string.
-  bool isGlobalValue() const {
-    return AddrType == isGV;
-  }
-
-  /// isIndirectSymbol - Return true if this relocation is the address an
-  /// indirect symbol
-  bool isIndirectSymbol() const {
-    return AddrType == isIndirectSym;
-  }
-
-  /// isBasicBlock - Return true if this relocation is a basic block reference.
-  ///
-  bool isBasicBlock() const {
-    return AddrType == isBB;
-  }
-
-  /// isExternalSymbol - Return true if this is a constant string.
-  ///
-  bool isExternalSymbol() const {
-    return AddrType == isExtSym;
-  }
-
-  /// isConstantPoolIndex - Return true if this is a constant pool reference.
-  ///
-  bool isConstantPoolIndex() const {
-    return AddrType == isConstPool;
-  }
-
-  /// isJumpTableIndex - Return true if this is a jump table reference.
-  ///
-  bool isJumpTableIndex() const {
-    return AddrType == isJumpTable;
-  }
-
-  /// isGOTRelative - Return true the target wants the index into the GOT of
-  /// the symbol rather than the address of the symbol.
-  bool isGOTRelative() const {
-    return GOTRelative;
-  }
-
-  /// mayNeedFarStub - This function returns true if the JIT for this target may
-  /// need either a stub function or an indirect global-variable load to handle
-  /// the relocated GlobalValue reference.  For example, the x86-64 call
-  /// instruction can only call functions within +/-2GB of the call site.
-  /// Anything farther away needs a longer mov+call sequence, which can't just
-  /// be written on top of the existing call.
-  bool mayNeedFarStub() const {
-    return MayNeedFarStub;
-  }
-
-  /// letTargetResolve - Return true if the target JITInfo is usually
-  /// responsible for resolving the address of this relocation.
-  bool letTargetResolve() const {
-    return TargetResolve;
-  }
-
-  /// getGlobalValue - If this is a global value reference, return the
-  /// referenced global.
-  GlobalValue *getGlobalValue() const {
-    assert((isGlobalValue() || isIndirectSymbol()) &&
-           "This is not a global value reference!");
-    return Target.GV;
-  }
-
-  MachineBasicBlock *getBasicBlock() const {
-    assert(isBasicBlock() && "This is not a basic block reference!");
-    return Target.MBB;
-  }
-
-  /// getString - If this is a string value, return the string reference.
-  ///
-  const char *getExternalSymbol() const {
-    assert(isExternalSymbol() && "This is not an external symbol reference!");
-    return Target.ExtSym;
-  }
-
-  /// getConstantPoolIndex - If this is a const pool reference, return
-  /// the index into the constant pool.
-  unsigned getConstantPoolIndex() const {
-    assert(isConstantPoolIndex() && "This is not a constant pool reference!");
-    return Target.Index;
-  }
-
-  /// getJumpTableIndex - If this is a jump table reference, return
-  /// the index into the jump table.
-  unsigned getJumpTableIndex() const {
-    assert(isJumpTableIndex() && "This is not a jump table reference!");
-    return Target.Index;
-  }
-
-  /// getResultPointer - Once this has been resolved to point to an actual
-  /// address, this returns the pointer.
-  void *getResultPointer() const {
-    assert(AddrType == isResult && "Result pointer isn't set yet!");
-    return Target.Result;
-  }
-
-  /// setResultPointer - Set the result to the specified pointer value.
-  ///
-  void setResultPointer(void *Ptr) {
-    Target.Result = Ptr;
-    AddrType = isResult;
-  }
-
-  /// setGOTIndex - Set the GOT index to a specific value.
-  void setGOTIndex(unsigned idx) {
-    AddrType = isGOTIndex;
-    Target.GOTIndex = idx;
-  }
-
-  /// getGOTIndex - Once this has been resolved to an entry in the GOT,
-  /// this returns that index.  The index is from the lowest address entry
-  /// in the GOT.
-  unsigned getGOTIndex() const {
-    assert(AddrType == isGOTIndex);
-    return Target.GOTIndex;
-  }
-};
-}
-
-#endif
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 7d85432101b5..a31940161ca5 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -80,7 +80,6 @@
 #include "llvm/CodeGen/MachinePassRegistry.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
-
 #include <memory>
 
 namespace llvm {
@@ -250,7 +249,7 @@ protected:
 public:
   ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
                 bool IsPostRA)
-      : ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, IsPostRA,
+      : ScheduleDAGInstrs(*C->MF, C->MLI, IsPostRA,
                           /*RemoveKillFlags=*/IsPostRA, C->LIS),
         AA(C->AA), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU), CurrentTop(),
         CurrentBottom(), NextClusterPred(nullptr), NextClusterSucc(nullptr) {
diff --git a/include/llvm/CodeGen/MachineTraceMetrics.h b/include/llvm/CodeGen/MachineTraceMetrics.h
index 323b694f3933..bfe6e945b6da 100644
--- a/include/llvm/CodeGen/MachineTraceMetrics.h
+++ b/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -44,8 +44,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
-#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+#ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
+#define LLVM_CODEGEN_MACHINETRACEMETRICS_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -264,8 +264,9 @@ public:
     /// classes are included. For the caller to account for extra machine
     /// instructions, it must first resolve each instruction's scheduling class.
     unsigned getResourceLength(
-                ArrayRef<const MachineBasicBlock*> Extrablocks = None,
-                ArrayRef<const MCSchedClassDesc*> ExtraInstrs = None) const;
+        ArrayRef<const MachineBasicBlock *> Extrablocks = None,
+        ArrayRef<const MCSchedClassDesc *> ExtraInstrs = None,
+        ArrayRef<const MCSchedClassDesc *> RemoveInstrs = None) const;
 
     /// Return the length of the (data dependency) critical path through the
     /// trace.
@@ -286,6 +287,12 @@ public:
     /// Return the Depth of a PHI instruction in a trace center block successor.
     /// The PHI does not have to be part of the trace.
     unsigned getPHIDepth(const MachineInstr *PHI) const;
+
+    /// A dependence is useful if the basic block of the defining instruction
+    /// is part of the trace of the user instruction. It is assumed that DefMI
+    /// dominates UseMI (see also isUsefulDominator).
+    bool isDepInTrace(const MachineInstr *DefMI,
+                      const MachineInstr *UseMI) const;
   };
 
   /// A trace ensemble is a collection of traces selected using the same
diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h
index ad215ec09843..e3fbfe89c203 100644
--- a/include/llvm/CodeGen/MachineValueType.h
+++ b/include/llvm/CodeGen/MachineValueType.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CODEGEN_MACHINEVALUETYPE_H
 #define LLVM_CODEGEN_MACHINEVALUETYPE_H
 
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 
@@ -118,6 +119,7 @@ namespace llvm {
                               // unspecified type.  The register class
                               // will be determined by the opcode.
 
+      FIRST_VALUETYPE = 0,    // This is always the beginning of the list.
       LAST_VALUETYPE =  58,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
@@ -165,6 +167,12 @@ namespace llvm {
     bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; }
     bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; }
 
+    /// isValid - Return true if this is a valid simple valuetype.
+    bool isValid() const {
+      return (SimpleTy >= MVT::FIRST_VALUETYPE &&
+              SimpleTy < MVT::LAST_VALUETYPE);
+    }
+
     /// isFloatingPoint - Return true if this is a FP, or a vector FP type.
     bool isFloatingPoint() const {
       return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE &&
@@ -196,21 +204,24 @@ namespace llvm {
     /// is32BitVector - Return true if this is a 32-bit vector type.
     bool is32BitVector() const {
       return (SimpleTy == MVT::v4i8  || SimpleTy == MVT::v2i16 ||
-              SimpleTy == MVT::v1i32);
+              SimpleTy == MVT::v1i32 || SimpleTy == MVT::v2f16 ||
+              SimpleTy == MVT::v1f32);
     }
 
     /// is64BitVector - Return true if this is a 64-bit vector type.
     bool is64BitVector() const {
       return (SimpleTy == MVT::v8i8  || SimpleTy == MVT::v4i16 ||
               SimpleTy == MVT::v2i32 || SimpleTy == MVT::v1i64 ||
-              SimpleTy == MVT::v1f64 || SimpleTy == MVT::v2f32);
+              SimpleTy == MVT::v4f16 || SimpleTy == MVT::v2f32 ||
+              SimpleTy == MVT::v1f64);
     }
 
     /// is128BitVector - Return true if this is a 128-bit vector type.
     bool is128BitVector() const {
       return (SimpleTy == MVT::v16i8 || SimpleTy == MVT::v8i16 ||
               SimpleTy == MVT::v4i32 || SimpleTy == MVT::v2i64 ||
-              SimpleTy == MVT::v4f32 || SimpleTy == MVT::v2f64);
+              SimpleTy == MVT::v8f16 || SimpleTy == MVT::v4f32 ||
+              SimpleTy == MVT::v2f64);
     }
 
     /// is256BitVector - Return true if this is a 256-bit vector type.
@@ -572,6 +583,52 @@ namespace llvm {
     /// returned as Other, otherwise they are invalid.
     static MVT getVT(Type *Ty, bool HandleUnknown = false);
 
+  private:
+    /// A simple iterator over the MVT::SimpleValueType enum.
+    struct mvt_iterator {
+      SimpleValueType VT;
+      mvt_iterator(SimpleValueType VT) : VT(VT) {}
+      MVT operator*() const { return VT; }
+      bool operator!=(const mvt_iterator &LHS) const { return VT != LHS.VT; }
+      mvt_iterator& operator++() {
+        VT = (MVT::SimpleValueType)((int)VT + 1);
+        assert((int)VT <= MVT::MAX_ALLOWED_VALUETYPE &&
+               "MVT iterator overflowed.");
+        return *this;
+      }
+    };
+    /// A range of the MVT::SimpleValueType enum.
+    typedef iterator_range<mvt_iterator> mvt_range;
+
+  public:
+    /// SimpleValueType Iteration
+    /// @{
+    static mvt_range all_valuetypes() {
+      return mvt_range(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE);
+    }
+    static mvt_range integer_valuetypes() {
+      return mvt_range(MVT::FIRST_INTEGER_VALUETYPE,
+                       (MVT::SimpleValueType)(MVT::LAST_INTEGER_VALUETYPE + 1));
+    }
+    static mvt_range fp_valuetypes() {
+      return mvt_range(MVT::FIRST_FP_VALUETYPE,
+                       (MVT::SimpleValueType)(MVT::LAST_FP_VALUETYPE + 1));
+    }
+    static mvt_range vector_valuetypes() {
+      return mvt_range(MVT::FIRST_VECTOR_VALUETYPE,
+                       (MVT::SimpleValueType)(MVT::LAST_VECTOR_VALUETYPE + 1));
+    }
+    static mvt_range integer_vector_valuetypes() {
+      return mvt_range(
+          MVT::FIRST_INTEGER_VECTOR_VALUETYPE,
+          (MVT::SimpleValueType)(MVT::LAST_INTEGER_VECTOR_VALUETYPE + 1));
+    }
+    static mvt_range fp_vector_valuetypes() {
+      return mvt_range(
+          MVT::FIRST_FP_VECTOR_VALUETYPE,
+          (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1));
+    }
+    /// @}
   };
 
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/PBQP/CostAllocator.h b/include/llvm/CodeGen/PBQP/CostAllocator.h
index ff62c0959344..02d39fe383f1 100644
--- a/include/llvm/CodeGen/PBQP/CostAllocator.h
+++ b/include/llvm/CodeGen/PBQP/CostAllocator.h
@@ -15,117 +15,101 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_COSTALLOCATOR_H
-#define LLVM_COSTALLOCATOR_H
+#ifndef LLVM_CODEGEN_PBQP_COSTALLOCATOR_H
+#define LLVM_CODEGEN_PBQP_COSTALLOCATOR_H
 
-#include <set>
+#include "llvm/ADT/DenseSet.h"
+#include <memory>
 #include <type_traits>
 
+namespace llvm {
 namespace PBQP {
 
-template <typename CostT,
-          typename CostKeyTComparator>
-class CostPool {
+template <typename ValueT>
+class ValuePool {
 public:
+  typedef std::shared_ptr<const ValueT> PoolRef;
 
-  class PoolEntry {
+private:
+
+  class PoolEntry : public std::enable_shared_from_this<PoolEntry> {
   public:
-    template <typename CostKeyT>
-    PoolEntry(CostPool &pool, CostKeyT cost)
-      : pool(pool), cost(std::move(cost)), refCount(0) {}
-    ~PoolEntry() { pool.removeEntry(this); }
-    void incRef() { ++refCount; }
-    bool decRef() { --refCount; return (refCount == 0); }
-    CostT& getCost() { return cost; }
-    const CostT& getCost() const { return cost; }
+    template <typename ValueKeyT>
+    PoolEntry(ValuePool &Pool, ValueKeyT Value)
+        : Pool(Pool), Value(std::move(Value)) {}
+    ~PoolEntry() { Pool.removeEntry(this); }
+    const ValueT& getValue() const { return Value; }
   private:
-    CostPool &pool;
-    CostT cost;
-    std::size_t refCount;
+    ValuePool &Pool;
+    ValueT Value;
   };
 
-  class PoolRef {
+  class PoolEntryDSInfo {
   public:
-    PoolRef(PoolEntry *entry) : entry(entry) {
-      this->entry->incRef();
+    static inline PoolEntry* getEmptyKey() { return nullptr; }
+
+    static inline PoolEntry* getTombstoneKey() {
+      return reinterpret_cast<PoolEntry*>(static_cast<uintptr_t>(1));
     }
-    PoolRef(const PoolRef &r) {
-      entry = r.entry;
-      entry->incRef();
+
+    template <typename ValueKeyT>
+    static unsigned getHashValue(const ValueKeyT &C) {
+      return hash_value(C);
     }
-    PoolRef& operator=(const PoolRef &r) {
-      assert(entry != nullptr && "entry should not be null.");
-      PoolEntry *temp = r.entry;
-      temp->incRef();
-      entry->decRef();
-      entry = temp;
-      return *this;
+
+    static unsigned getHashValue(PoolEntry *P) {
+      return getHashValue(P->getValue());
     }
 
-    ~PoolRef() {
-      if (entry->decRef())
-        delete entry;
+    static unsigned getHashValue(const PoolEntry *P) {
+      return getHashValue(P->getValue());
     }
-    void reset(PoolEntry *entry) {
-      entry->incRef();
-      this->entry->decRef();
-      this->entry = entry;
+
+    template <typename ValueKeyT1, typename ValueKeyT2>
+    static
+    bool isEqual(const ValueKeyT1 &C1, const ValueKeyT2 &C2) {
+      return C1 == C2;
     }
-    CostT& operator*() { return entry->getCost(); }
-    const CostT& operator*() const { return entry->getCost(); }
-    CostT* operator->() { return &entry->getCost(); }
-    const CostT* operator->() const { return &entry->getCost(); }
-  private:
-    PoolEntry *entry;
-  };
 
-private:
-  class EntryComparator {
-  public:
-    template <typename CostKeyT>
-    typename std::enable_if<
-               !std::is_same<PoolEntry*,
-                             typename std::remove_const<CostKeyT>::type>::value,
-               bool>::type
-    operator()(const PoolEntry* a, const CostKeyT &b) {
-      return compare(a->getCost(), b);
+    template <typename ValueKeyT>
+    static bool isEqual(const ValueKeyT &C, PoolEntry *P) {
+      if (P == getEmptyKey() || P == getTombstoneKey())
+        return false;
+      return isEqual(C, P->getValue());
     }
-    bool operator()(const PoolEntry* a, const PoolEntry* b) {
-      return compare(a->getCost(), b->getCost());
+
+    static bool isEqual(PoolEntry *P1, PoolEntry *P2) {
+      if (P1 == getEmptyKey() || P1 == getTombstoneKey())
+        return P1 == P2;
+      return isEqual(P1->getValue(), P2);
     }
-  private:
-    CostKeyTComparator compare;
+
   };
 
-  typedef std::set<PoolEntry*, EntryComparator> EntrySet;
+  typedef DenseSet<PoolEntry*, PoolEntryDSInfo> EntrySetT;
 
-  EntrySet entrySet;
+  EntrySetT EntrySet;
 
-  void removeEntry(PoolEntry *p) { entrySet.erase(p); }
+  void removeEntry(PoolEntry *P) { EntrySet.erase(P); }
 
 public:
+  template <typename ValueKeyT> PoolRef getValue(ValueKeyT ValueKey) {
+    typename EntrySetT::iterator I = EntrySet.find_as(ValueKey);
 
-  template <typename CostKeyT>
-  PoolRef getCost(CostKeyT costKey) {
-    typename EntrySet::iterator itr =
-      std::lower_bound(entrySet.begin(), entrySet.end(), costKey,
-                       EntryComparator());
-
-    if (itr != entrySet.end() && costKey == (*itr)->getCost())
-      return PoolRef(*itr);
+    if (I != EntrySet.end())
+      return PoolRef((*I)->shared_from_this(), &(*I)->getValue());
 
-    PoolEntry *p = new PoolEntry(*this, std::move(costKey));
-    entrySet.insert(itr, p);
-    return PoolRef(p);
+    auto P = std::make_shared<PoolEntry>(*this, std::move(ValueKey));
+    EntrySet.insert(P.get());
+    return PoolRef(std::move(P), &P->getValue());
   }
 };
 
-template <typename VectorT, typename VectorTComparator,
-          typename MatrixT, typename MatrixTComparator>
+template <typename VectorT, typename MatrixT>
 class PoolCostAllocator {
 private:
-  typedef CostPool<VectorT, VectorTComparator> VectorCostPool;
-  typedef CostPool<MatrixT, MatrixTComparator> MatrixCostPool;
+  typedef ValuePool<VectorT> VectorCostPool;
+  typedef ValuePool<MatrixT> MatrixCostPool;
 public:
   typedef VectorT Vector;
   typedef MatrixT Matrix;
@@ -133,15 +117,16 @@ public:
   typedef typename MatrixCostPool::PoolRef MatrixPtr;
 
   template <typename VectorKeyT>
-  VectorPtr getVector(VectorKeyT v) { return vectorPool.getCost(std::move(v)); }
+  VectorPtr getVector(VectorKeyT v) { return VectorPool.getValue(std::move(v)); }
 
   template <typename MatrixKeyT>
-  MatrixPtr getMatrix(MatrixKeyT m) { return matrixPool.getCost(std::move(m)); }
+  MatrixPtr getMatrix(MatrixKeyT m) { return MatrixPool.getValue(std::move(m)); }
 private:
-  VectorCostPool vectorPool;
-  MatrixCostPool matrixPool;
+  VectorCostPool VectorPool;
+  MatrixCostPool MatrixPool;
 };
 
-}
+} // namespace PBQP
+} // namespace llvm
 
-#endif // LLVM_COSTALLOCATOR_H
+#endif
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index a55f0ea96c0a..4dc5674ae134 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -17,11 +17,12 @@
 
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include <list>
 #include <map>
 #include <set>
 
+namespace llvm {
 namespace PBQP {
 
   class GraphBase {
@@ -29,12 +30,12 @@ namespace PBQP {
     typedef unsigned NodeId;
     typedef unsigned EdgeId;
 
-    /// \brief Returns a value representing an invalid (non-existent) node.
+    /// @brief Returns a value representing an invalid (non-existent) node.
     static NodeId invalidNodeId() {
       return std::numeric_limits<NodeId>::max();
     }
 
-    /// \brief Returns a value representing an invalid (non-existent) edge.
+    /// @brief Returns a value representing an invalid (non-existent) edge.
     static EdgeId invalidEdgeId() {
       return std::numeric_limits<EdgeId>::max();
     }
@@ -56,6 +57,7 @@ namespace PBQP {
     typedef typename CostAllocator::MatrixPtr MatrixPtr;
     typedef typename SolverT::NodeMetadata NodeMetadata;
     typedef typename SolverT::EdgeMetadata EdgeMetadata;
+    typedef typename SolverT::GraphMetadata GraphMetadata;
 
   private:
 
@@ -172,6 +174,7 @@ namespace PBQP {
 
     // ----- MEMBERS -----
 
+    GraphMetadata Metadata;
     CostAllocator CostAlloc;
     SolverT *Solver;
 
@@ -187,13 +190,19 @@ namespace PBQP {
 
     // ----- INTERNAL METHODS -----
 
-    NodeEntry& getNode(NodeId NId) { return Nodes[NId]; }
-    const NodeEntry& getNode(NodeId NId) const { return Nodes[NId]; }
+    NodeEntry &getNode(NodeId NId) {
+      assert(NId < Nodes.size() && "Out of bound NodeId");
+      return Nodes[NId];
+    }
+    const NodeEntry &getNode(NodeId NId) const {
+      assert(NId < Nodes.size() && "Out of bound NodeId");
+      return Nodes[NId];
+    }
 
     EdgeEntry& getEdge(EdgeId EId) { return Edges[EId]; }
     const EdgeEntry& getEdge(EdgeId EId) const { return Edges[EId]; }
 
-    NodeId addConstructedNode(const NodeEntry &N) {
+    NodeId addConstructedNode(NodeEntry N) {
       NodeId NId = 0;
       if (!FreeNodeIds.empty()) {
         NId = FreeNodeIds.back();
@@ -206,7 +215,7 @@ namespace PBQP {
       return NId;
     }
 
-    EdgeId addConstructedEdge(const EdgeEntry &E) {
+    EdgeId addConstructedEdge(EdgeEntry E) {
       assert(findEdge(E.getN1Id(), E.getN2Id()) == invalidEdgeId() &&
              "Attempt to add duplicate edge.");
       EdgeId EId = 0;
@@ -235,6 +244,12 @@ namespace PBQP {
 
     class NodeItr {
     public:
+      typedef std::forward_iterator_tag iterator_category;
+      typedef NodeId value_type;
+      typedef int difference_type;
+      typedef NodeId* pointer;
+      typedef NodeId& reference;
+
       NodeItr(NodeId CurNId, const Graph &G)
         : CurNId(CurNId), EndNId(G.Nodes.size()), FreeNodeIds(G.FreeNodeIds) {
         this->CurNId = findNextInUse(CurNId); // Move to first in-use node id
@@ -249,7 +264,7 @@ namespace PBQP {
       NodeId findNextInUse(NodeId NId) const {
         while (NId < EndNId &&
                std::find(FreeNodeIds.begin(), FreeNodeIds.end(), NId) !=
-                 FreeNodeIds.end()) {
+               FreeNodeIds.end()) {
           ++NId;
         }
         return NId;
@@ -328,10 +343,19 @@ namespace PBQP {
       const NodeEntry &NE;
     };
 
-    /// \brief Construct an empty PBQP graph.
-    Graph() : Solver(nullptr) { }
+    /// @brief Construct an empty PBQP graph.
+    Graph() : Solver(nullptr) {}
+
+    /// @brief Construct an empty PBQP graph with the given graph metadata.
+    Graph(GraphMetadata Metadata) : Metadata(Metadata), Solver(nullptr) {}
+
+    /// @brief Get a reference to the graph metadata.
+    GraphMetadata& getMetadata() { return Metadata; }
 
-    /// \brief Lock this graph to the given solver instance in preparation
+    /// @brief Get a const-reference to the graph metadata.
+    const GraphMetadata& getMetadata() const { return Metadata; }
+
+    /// @brief Lock this graph to the given solver instance in preparation
     /// for running the solver. This method will call solver.handleAddNode for
     /// each node in the graph, and handleAddEdge for each edge, to give the
     /// solver an opportunity to set up any requried metadata.
@@ -344,13 +368,13 @@ namespace PBQP {
         Solver->handleAddEdge(EId);
     }
 
-    /// \brief Release from solver instance.
+    /// @brief Release from solver instance.
     void unsetSolver() {
       assert(Solver && "Solver not set.");
       Solver = nullptr;
     }
 
-    /// \brief Add a node with the given costs.
+    /// @brief Add a node with the given costs.
     /// @param Costs Cost vector for the new node.
     /// @return Node iterator for the added node.
     template <typename OtherVectorT>
@@ -363,9 +387,29 @@ namespace PBQP {
       return NId;
     }
 
-    /// \brief Add an edge between the given nodes with the given costs.
+    /// @brief Add a node bypassing the cost allocator.
+    /// @param Costs Cost vector ptr for the new node (must be convertible to
+    ///        VectorPtr).
+    /// @return Node iterator for the added node.
+    ///
+    ///   This method allows for fast addition of a node whose costs don't need
+    /// to be passed through the cost allocator. The most common use case for
+    /// this is when duplicating costs from an existing node (when using a
+    /// pooling allocator). These have already been uniqued, so we can avoid
+    /// re-constructing and re-uniquing them by attaching them directly to the
+    /// new node.
+    template <typename OtherVectorPtrT>
+    NodeId addNodeBypassingCostAllocator(OtherVectorPtrT Costs) {
+      NodeId NId = addConstructedNode(NodeEntry(Costs));
+      if (Solver)
+        Solver->handleAddNode(NId);
+      return NId;
+    }
+
+    /// @brief Add an edge between the given nodes with the given costs.
     /// @param N1Id First node.
     /// @param N2Id Second node.
+    /// @param Costs Cost matrix for new edge.
     /// @return Edge iterator for the added edge.
     template <typename OtherVectorT>
     EdgeId addEdge(NodeId N1Id, NodeId N2Id, OtherVectorT Costs) {
@@ -380,7 +424,32 @@ namespace PBQP {
       return EId;
     }
 
-    /// \brief Returns true if the graph is empty.
+    /// @brief Add an edge bypassing the cost allocator.
+    /// @param N1Id First node.
+    /// @param N2Id Second node.
+    /// @param Costs Cost matrix for new edge.
+    /// @return Edge iterator for the added edge.
+    ///
+    ///   This method allows for fast addition of an edge whose costs don't need
+    /// to be passed through the cost allocator. The most common use case for
+    /// this is when duplicating costs from an existing edge (when using a
+    /// pooling allocator). These have already been uniqued, so we can avoid
+    /// re-constructing and re-uniquing them by attaching them directly to the
+    /// new edge.
+    template <typename OtherMatrixPtrT>
+    NodeId addEdgeBypassingCostAllocator(NodeId N1Id, NodeId N2Id,
+                                         OtherMatrixPtrT Costs) {
+      assert(getNodeCosts(N1Id).getLength() == Costs->getRows() &&
+             getNodeCosts(N2Id).getLength() == Costs->getCols() &&
+             "Matrix dimensions mismatch.");
+      // Get cost matrix from the problem domain.
+      EdgeId EId = addConstructedEdge(EdgeEntry(N1Id, N2Id, Costs));
+      if (Solver)
+        Solver->handleAddEdge(EId);
+      return EId;
+    }
+
+    /// @brief Returns true if the graph is empty.
     bool empty() const { return NodeIdSet(*this).empty(); }
 
     NodeIdSet nodeIds() const { return NodeIdSet(*this); }
@@ -388,15 +457,15 @@ namespace PBQP {
 
     AdjEdgeIdSet adjEdgeIds(NodeId NId) { return AdjEdgeIdSet(getNode(NId)); }
 
-    /// \brief Get the number of nodes in the graph.
+    /// @brief Get the number of nodes in the graph.
     /// @return Number of nodes in the graph.
     unsigned getNumNodes() const { return NodeIdSet(*this).size(); }
 
-    /// \brief Get the number of edges in the graph.
+    /// @brief Get the number of edges in the graph.
     /// @return Number of edges in the graph.
     unsigned getNumEdges() const { return EdgeIdSet(*this).size(); }
 
-    /// \brief Set a node's cost vector.
+    /// @brief Set a node's cost vector.
     /// @param NId Node to update.
     /// @param Costs New costs to set.
     template <typename OtherVectorT>
@@ -407,11 +476,23 @@ namespace PBQP {
       getNode(NId).Costs = AllocatedCosts;
     }
 
-    /// \brief Get a node's cost vector (const version).
+    /// @brief Get a VectorPtr to a node's cost vector. Rarely useful - use
+    ///        getNodeCosts where possible.
+    /// @param NId Node id.
+    /// @return VectorPtr to node cost vector.
+    ///
+    ///   This method is primarily useful for duplicating costs quickly by
+    /// bypassing the cost allocator. See addNodeBypassingCostAllocator. Prefer
+    /// getNodeCosts when dealing with node cost values.
+    const VectorPtr& getNodeCostsPtr(NodeId NId) const {
+      return getNode(NId).Costs;
+    }
+
+    /// @brief Get a node's cost vector.
     /// @param NId Node id.
     /// @return Node cost vector.
     const Vector& getNodeCosts(NodeId NId) const {
-      return *getNode(NId).Costs;
+      return *getNodeCostsPtr(NId);
     }
 
     NodeMetadata& getNodeMetadata(NodeId NId) {
@@ -426,7 +507,7 @@ namespace PBQP {
       return getNode(NId).getAdjEdgeIds().size();
     }
 
-    /// \brief Set an edge's cost matrix.
+    /// @brief Set an edge's cost matrix.
     /// @param EId Edge id.
     /// @param Costs New cost matrix.
     template <typename OtherMatrixT>
@@ -437,34 +518,48 @@ namespace PBQP {
       getEdge(EId).Costs = AllocatedCosts;
     }
 
-    /// \brief Get an edge's cost matrix (const version).
+    /// @brief Get a MatrixPtr to a node's cost matrix. Rarely useful - use
+    ///        getEdgeCosts where possible.
+    /// @param EId Edge id.
+    /// @return MatrixPtr to edge cost matrix.
+    ///
+    ///   This method is primarily useful for duplicating costs quickly by
+    /// bypassing the cost allocator. See addNodeBypassingCostAllocator. Prefer
+    /// getEdgeCosts when dealing with edge cost values.
+    const MatrixPtr& getEdgeCostsPtr(EdgeId EId) const {
+      return getEdge(EId).Costs;
+    }
+
+    /// @brief Get an edge's cost matrix.
     /// @param EId Edge id.
     /// @return Edge cost matrix.
-    const Matrix& getEdgeCosts(EdgeId EId) const { return *getEdge(EId).Costs; }
+    const Matrix& getEdgeCosts(EdgeId EId) const {
+      return *getEdge(EId).Costs;
+    }
 
-    EdgeMetadata& getEdgeMetadata(EdgeId NId) {
-      return getEdge(NId).Metadata;
+    EdgeMetadata& getEdgeMetadata(EdgeId EId) {
+      return getEdge(EId).Metadata;
     }
 
-    const EdgeMetadata& getEdgeMetadata(EdgeId NId) const {
-      return getEdge(NId).Metadata;
+    const EdgeMetadata& getEdgeMetadata(EdgeId EId) const {
+      return getEdge(EId).Metadata;
     }
 
-    /// \brief Get the first node connected to this edge.
+    /// @brief Get the first node connected to this edge.
     /// @param EId Edge id.
     /// @return The first node connected to the given edge.
     NodeId getEdgeNode1Id(EdgeId EId) {
       return getEdge(EId).getN1Id();
     }
 
-    /// \brief Get the second node connected to this edge.
+    /// @brief Get the second node connected to this edge.
     /// @param EId Edge id.
     /// @return The second node connected to the given edge.
     NodeId getEdgeNode2Id(EdgeId EId) {
       return getEdge(EId).getN2Id();
     }
 
-    /// \brief Get the "other" node connected to this edge.
+    /// @brief Get the "other" node connected to this edge.
     /// @param EId Edge id.
     /// @param NId Node id for the "given" node.
     /// @return The iterator for the "other" node connected to this edge.
@@ -476,7 +571,7 @@ namespace PBQP {
       return E.getN1Id();
     }
 
-    /// \brief Get the edge connecting two nodes.
+    /// @brief Get the edge connecting two nodes.
     /// @param N1Id First node id.
     /// @param N2Id Second node id.
     /// @return An id for edge (N1Id, N2Id) if such an edge exists,
@@ -491,7 +586,7 @@ namespace PBQP {
       return invalidEdgeId();
     }
 
-    /// \brief Remove a node from the graph.
+    /// @brief Remove a node from the graph.
     /// @param NId Node id.
     void removeNode(NodeId NId) {
       if (Solver)
@@ -499,7 +594,7 @@ namespace PBQP {
       NodeEntry &N = getNode(NId);
       // TODO: Can this be for-each'd?
       for (AdjEdgeItr AEItr = N.adjEdgesBegin(),
-                      AEEnd = N.adjEdgesEnd();
+             AEEnd = N.adjEdgesEnd();
            AEItr != AEEnd;) {
         EdgeId EId = *AEItr;
         ++AEItr;
@@ -508,7 +603,7 @@ namespace PBQP {
       FreeNodeIds.push_back(NId);
     }
 
-    /// \brief Disconnect an edge from the given node.
+    /// @brief Disconnect an edge from the given node.
     ///
     /// Removes the given edge from the adjacency list of the given node.
     /// This operation leaves the edge in an 'asymmetric' state: It will no
@@ -541,14 +636,14 @@ namespace PBQP {
       E.disconnectFrom(*this, NId);
     }
 
-    /// \brief Convenience method to disconnect all neighbours from the given
+    /// @brief Convenience method to disconnect all neighbours from the given
     ///        node.
     void disconnectAllNeighborsFromNode(NodeId NId) {
       for (auto AEId : adjEdgeIds(NId))
         disconnectEdge(AEId, getEdgeOtherNodeId(AEId, NId));
     }
 
-    /// \brief Re-attach an edge to its nodes.
+    /// @brief Re-attach an edge to its nodes.
     ///
     /// Adds an edge that had been previously disconnected back into the
     /// adjacency set of the nodes that the edge connects.
@@ -559,7 +654,7 @@ namespace PBQP {
         Solver->handleReconnectEdge(EId, NId);
     }
 
-    /// \brief Remove an edge from the graph.
+    /// @brief Remove an edge from the graph.
     /// @param EId Edge id.
     void removeEdge(EdgeId EId) {
       if (Solver)
@@ -570,7 +665,7 @@ namespace PBQP {
       Edges[EId].invalidate();
     }
 
-    /// \brief Remove all nodes and edges from the graph.
+    /// @brief Remove all nodes and edges from the graph.
     void clear() {
       Nodes.clear();
       FreeNodeIds.clear();
@@ -578,9 +673,9 @@ namespace PBQP {
       FreeEdgeIds.clear();
     }
 
-    /// \brief Dump a graph to an output stream.
+    /// @brief Dump a graph to an output stream.
     template <typename OStream>
-    void dump(OStream &OS) {
+    void dumpToStream(OStream &OS) {
       OS << nodeIds().size() << " " << edgeIds().size() << "\n";
 
       for (auto NId : nodeIds()) {
@@ -613,7 +708,12 @@ namespace PBQP {
       }
     }
 
-    /// \brief Print a representation of this graph in DOT format.
+    /// @brief Dump this graph to dbgs().
+    void dump() {
+      dumpToStream(dbgs());
+    }
+
+    /// @brief Print a representation of this graph in DOT format.
     /// @param OS Output stream to print on.
     template <typename OStream>
     void printDot(OStream &OS) {
@@ -637,6 +737,7 @@ namespace PBQP {
     }
   };
 
-}
+}  // namespace PBQP
+}  // namespace llvm
 
 #endif // LLVM_CODEGEN_PBQP_GRAPH_HPP
diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h
index 69a9d83cc092..2792608e29cc 100644
--- a/include/llvm/CodeGen/PBQP/Math.h
+++ b/include/llvm/CodeGen/PBQP/Math.h
@@ -10,17 +10,19 @@
 #ifndef LLVM_CODEGEN_PBQP_MATH_H
 #define LLVM_CODEGEN_PBQP_MATH_H
 
+#include "llvm/ADT/Hashing.h"
 #include <algorithm>
 #include <cassert>
 #include <functional>
 
+namespace llvm {
 namespace PBQP {
 
 typedef float PBQPNum;
 
 /// \brief PBQP Vector class.
 class Vector {
-  friend class VectorComparator;
+  friend hash_code hash_value(const Vector &);
 public:
 
   /// \brief Construct a PBQP vector of the given size.
@@ -136,21 +138,12 @@ private:
   PBQPNum *Data;
 };
 
-class VectorComparator {
-public:
-  bool operator()(const Vector &A, const Vector &B) {
-    if (A.Length < B.Length)
-      return true;
-    if (B.Length < A.Length)
-      return false;
-    char *AData = reinterpret_cast<char*>(A.Data);
-    char *BData = reinterpret_cast<char*>(B.Data);
-    return std::lexicographical_compare(AData,
-                                        AData + A.Length * sizeof(PBQPNum),
-                                        BData,
-                                        BData + A.Length * sizeof(PBQPNum));
-  }
-};
+/// \brief Return a hash_value for the given vector.
+inline hash_code hash_value(const Vector &V) {
+  unsigned *VBegin = reinterpret_cast<unsigned*>(V.Data);
+  unsigned *VEnd = reinterpret_cast<unsigned*>(V.Data + V.Length);
+  return hash_combine(V.Length, hash_combine_range(VBegin, VEnd));
+}
 
 /// \brief Output a textual representation of the given vector on the given
 ///        output stream.
@@ -166,11 +159,10 @@ OStream& operator<<(OStream &OS, const Vector &V) {
   return OS;
 }
 
-
 /// \brief PBQP Matrix class
 class Matrix {
 private:
-  friend class MatrixComparator;
+  friend hash_code hash_value(const Matrix &);
 public:
 
   /// \brief Construct a PBQP Matrix with the given dimensions.
@@ -384,24 +376,12 @@ private:
   PBQPNum *Data;
 };
 
-class MatrixComparator {
-public:
-  bool operator()(const Matrix &A, const Matrix &B) {
-    if (A.Rows < B.Rows)
-      return true;
-    if (B.Rows < A.Rows)
-      return false;
-    if (A.Cols < B.Cols)
-      return true;
-    if (B.Cols < A.Cols)
-      return false;
-    char *AData = reinterpret_cast<char*>(A.Data);
-    char *BData = reinterpret_cast<char*>(B.Data);
-    return std::lexicographical_compare(
-             AData, AData + (A.Rows * A.Cols * sizeof(PBQPNum)),
-             BData, BData + (A.Rows * A.Cols * sizeof(PBQPNum)));
-  }
-};
+/// \brief Return a hash_code for the given matrix.
+inline hash_code hash_value(const Matrix &M) {
+  unsigned *MBegin = reinterpret_cast<unsigned*>(M.Data);
+  unsigned *MEnd = reinterpret_cast<unsigned*>(M.Data + (M.Rows * M.Cols));
+  return hash_combine(M.Rows, M.Cols, hash_combine_range(MBegin, MEnd));
+}
 
 /// \brief Output a textual representation of the given matrix on the given
 ///        output stream.
@@ -409,7 +389,7 @@ template <typename OStream>
 OStream& operator<<(OStream &OS, const Matrix &M) {
   assert((M.getRows() != 0) && "Zero-row matrix badness.");
   for (unsigned i = 0; i < M.getRows(); ++i)
-    OS << M.getRowAsVector(i);
+    OS << M.getRowAsVector(i) << "\n";
   return OS;
 }
 
@@ -424,6 +404,11 @@ private:
 };
 
 template <typename Metadata>
+inline hash_code hash_value(const MDVector<Metadata> &V) {
+  return hash_value(static_cast<const Vector&>(V));
+}
+
+template <typename Metadata>
 class MDMatrix : public Matrix {
 public:
   MDMatrix(const Matrix &m) : Matrix(m), md(*this) { }
@@ -433,6 +418,12 @@ private:
   Metadata md;
 };
 
+template <typename Metadata>
+inline hash_code hash_value(const MDMatrix<Metadata> &M) {
+  return hash_value(static_cast<const Matrix&>(M));
 }
 
+} // namespace PBQP
+} // namespace llvm
+
 #endif // LLVM_CODEGEN_PBQP_MATH_H
diff --git a/include/llvm/CodeGen/PBQP/ReductionRules.h b/include/llvm/CodeGen/PBQP/ReductionRules.h
index a55a06033c4e..21fde4d8a5cd 100644
--- a/include/llvm/CodeGen/PBQP/ReductionRules.h
+++ b/include/llvm/CodeGen/PBQP/ReductionRules.h
@@ -11,13 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_REDUCTIONRULES_H
-#define LLVM_REDUCTIONRULES_H
+#ifndef LLVM_CODEGEN_PBQP_REDUCTIONRULES_H
+#define LLVM_CODEGEN_PBQP_REDUCTIONRULES_H
 
 #include "Graph.h"
 #include "Math.h"
 #include "Solution.h"
 
+namespace llvm {
 namespace PBQP {
 
   /// \brief Reduce a node of degree one.
@@ -186,6 +187,7 @@ namespace PBQP {
     return s;
   }
 
-}
+} // namespace PBQP
+} // namespace llvm
 
-#endif // LLVM_REDUCTIONRULES_H
+#endif
diff --git a/include/llvm/CodeGen/PBQP/RegAllocSolver.h b/include/llvm/CodeGen/PBQP/RegAllocSolver.h
deleted file mode 100644
index 977c34843bbd..000000000000
--- a/include/llvm/CodeGen/PBQP/RegAllocSolver.h
+++ /dev/null
@@ -1,359 +0,0 @@
-//===-- RegAllocSolver.h - Heuristic PBQP Solver for reg alloc --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Heuristic PBQP solver for register allocation problems. This solver uses a
-// graph reduction approach. Nodes of degree 0, 1 and 2 are eliminated with
-// optimality-preserving rules (see ReductionRules.h). When no low-degree (<3)
-// nodes are present, a heuristic derived from Brigg's graph coloring approach
-// is used.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_REGALLOCSOLVER_H
-#define LLVM_CODEGEN_PBQP_REGALLOCSOLVER_H
-
-#include "CostAllocator.h"
-#include "Graph.h"
-#include "ReductionRules.h"
-#include "Solution.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <limits>
-#include <vector>
-
-namespace PBQP {
-
-  namespace RegAlloc {
-
-    /// \brief Metadata to speed allocatability test.
-    ///
-    /// Keeps track of the number of infinities in each row and column.
-    class MatrixMetadata {
-    private:
-      MatrixMetadata(const MatrixMetadata&);
-      void operator=(const MatrixMetadata&);
-    public:
-      MatrixMetadata(const PBQP::Matrix& M)
-        : WorstRow(0), WorstCol(0),
-          UnsafeRows(new bool[M.getRows() - 1]()),
-          UnsafeCols(new bool[M.getCols() - 1]()) {
-
-        unsigned* ColCounts = new unsigned[M.getCols() - 1]();
-
-        for (unsigned i = 1; i < M.getRows(); ++i) {
-          unsigned RowCount = 0;
-          for (unsigned j = 1; j < M.getCols(); ++j) {
-            if (M[i][j] == std::numeric_limits<PBQP::PBQPNum>::infinity()) {
-              ++RowCount;
-              ++ColCounts[j - 1];
-              UnsafeRows[i - 1] = true;
-              UnsafeCols[j - 1] = true;
-            }
-          }
-          WorstRow = std::max(WorstRow, RowCount);
-        }
-        unsigned WorstColCountForCurRow =
-          *std::max_element(ColCounts, ColCounts + M.getCols() - 1);
-        WorstCol = std::max(WorstCol, WorstColCountForCurRow);
-        delete[] ColCounts;
-      }
-
-      ~MatrixMetadata() {
-        delete[] UnsafeRows;
-        delete[] UnsafeCols;
-      }
-
-      unsigned getWorstRow() const { return WorstRow; }
-      unsigned getWorstCol() const { return WorstCol; }
-      const bool* getUnsafeRows() const { return UnsafeRows; }
-      const bool* getUnsafeCols() const { return UnsafeCols; }
-
-    private:
-      unsigned WorstRow, WorstCol;
-      bool* UnsafeRows;
-      bool* UnsafeCols;
-    };
-
-    class NodeMetadata {
-    public:
-      typedef enum { Unprocessed,
-                     OptimallyReducible,
-                     ConservativelyAllocatable,
-                     NotProvablyAllocatable } ReductionState;
-
-      NodeMetadata() : RS(Unprocessed), DeniedOpts(0), OptUnsafeEdges(nullptr){}
-      ~NodeMetadata() { delete[] OptUnsafeEdges; }
-
-      void setup(const Vector& Costs) {
-        NumOpts = Costs.getLength() - 1;
-        OptUnsafeEdges = new unsigned[NumOpts]();
-      }
-
-      ReductionState getReductionState() const { return RS; }
-      void setReductionState(ReductionState RS) { this->RS = RS; }
-
-      void handleAddEdge(const MatrixMetadata& MD, bool Transpose) {
-        DeniedOpts += Transpose ? MD.getWorstCol() : MD.getWorstRow();
-        const bool* UnsafeOpts =
-          Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows();
-        for (unsigned i = 0; i < NumOpts; ++i)
-          OptUnsafeEdges[i] += UnsafeOpts[i];
-      }
-
-      void handleRemoveEdge(const MatrixMetadata& MD, bool Transpose) {
-        DeniedOpts -= Transpose ? MD.getWorstCol() : MD.getWorstRow();
-        const bool* UnsafeOpts =
-          Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows();
-        for (unsigned i = 0; i < NumOpts; ++i)
-          OptUnsafeEdges[i] -= UnsafeOpts[i];
-      }
-
-      bool isConservativelyAllocatable() const {
-        return (DeniedOpts < NumOpts) ||
-               (std::find(OptUnsafeEdges, OptUnsafeEdges + NumOpts, 0) !=
-                  OptUnsafeEdges + NumOpts);
-      }
-
-    private:
-      ReductionState RS;
-      unsigned NumOpts;
-      unsigned DeniedOpts;
-      unsigned* OptUnsafeEdges;
-    };
-
-    class RegAllocSolverImpl {
-    private:
-      typedef PBQP::MDMatrix<MatrixMetadata> RAMatrix;
-    public:
-      typedef PBQP::Vector RawVector;
-      typedef PBQP::Matrix RawMatrix;
-      typedef PBQP::Vector Vector;
-      typedef RAMatrix     Matrix;
-      typedef PBQP::PoolCostAllocator<
-                Vector, PBQP::VectorComparator,
-                Matrix, PBQP::MatrixComparator> CostAllocator;
-
-      typedef PBQP::GraphBase::NodeId NodeId;
-      typedef PBQP::GraphBase::EdgeId EdgeId;
-
-      typedef RegAlloc::NodeMetadata NodeMetadata;
-
-      struct EdgeMetadata { };
-
-      typedef PBQP::Graph<RegAllocSolverImpl> Graph;
-
-      RegAllocSolverImpl(Graph &G) : G(G) {}
-
-      Solution solve() {
-        G.setSolver(*this);
-        Solution S;
-        setup();
-        S = backpropagate(G, reduce());
-        G.unsetSolver();
-        return S;
-      }
-
-      void handleAddNode(NodeId NId) {
-        G.getNodeMetadata(NId).setup(G.getNodeCosts(NId));
-      }
-      void handleRemoveNode(NodeId NId) {}
-      void handleSetNodeCosts(NodeId NId, const Vector& newCosts) {}
-
-      void handleAddEdge(EdgeId EId) {
-        handleReconnectEdge(EId, G.getEdgeNode1Id(EId));
-        handleReconnectEdge(EId, G.getEdgeNode2Id(EId));
-      }
-
-      void handleRemoveEdge(EdgeId EId) {
-        handleDisconnectEdge(EId, G.getEdgeNode1Id(EId));
-        handleDisconnectEdge(EId, G.getEdgeNode2Id(EId));
-      }
-
-      void handleDisconnectEdge(EdgeId EId, NodeId NId) {
-        NodeMetadata& NMd = G.getNodeMetadata(NId);
-        const MatrixMetadata& MMd = G.getEdgeCosts(EId).getMetadata();
-        NMd.handleRemoveEdge(MMd, NId == G.getEdgeNode2Id(EId));
-        if (G.getNodeDegree(NId) == 3) {
-          // This node is becoming optimally reducible.
-          moveToOptimallyReducibleNodes(NId);
-        } else if (NMd.getReductionState() ==
-                     NodeMetadata::NotProvablyAllocatable &&
-                   NMd.isConservativelyAllocatable()) {
-          // This node just became conservatively allocatable.
-          moveToConservativelyAllocatableNodes(NId);
-        }
-      }
-
-      void handleReconnectEdge(EdgeId EId, NodeId NId) {
-        NodeMetadata& NMd = G.getNodeMetadata(NId);
-        const MatrixMetadata& MMd = G.getEdgeCosts(EId).getMetadata();
-        NMd.handleAddEdge(MMd, NId == G.getEdgeNode2Id(EId));
-      }
-
-      void handleSetEdgeCosts(EdgeId EId, const Matrix& NewCosts) {
-        handleRemoveEdge(EId);
-
-        NodeId N1Id = G.getEdgeNode1Id(EId);
-        NodeId N2Id = G.getEdgeNode2Id(EId);
-        NodeMetadata& N1Md = G.getNodeMetadata(N1Id);
-        NodeMetadata& N2Md = G.getNodeMetadata(N2Id);
-        const MatrixMetadata& MMd = NewCosts.getMetadata();
-        N1Md.handleAddEdge(MMd, N1Id != G.getEdgeNode1Id(EId));
-        N2Md.handleAddEdge(MMd, N2Id != G.getEdgeNode1Id(EId));
-      }
-
-    private:
-
-      void removeFromCurrentSet(NodeId NId) {
-        switch (G.getNodeMetadata(NId).getReductionState()) {
-          case NodeMetadata::Unprocessed: break;
-          case NodeMetadata::OptimallyReducible:
-            assert(OptimallyReducibleNodes.find(NId) !=
-                     OptimallyReducibleNodes.end() &&
-                   "Node not in optimally reducible set.");
-            OptimallyReducibleNodes.erase(NId);
-            break;
-          case NodeMetadata::ConservativelyAllocatable:
-            assert(ConservativelyAllocatableNodes.find(NId) !=
-                     ConservativelyAllocatableNodes.end() &&
-                   "Node not in conservatively allocatable set.");
-            ConservativelyAllocatableNodes.erase(NId);
-            break;
-          case NodeMetadata::NotProvablyAllocatable:
-            assert(NotProvablyAllocatableNodes.find(NId) !=
-                     NotProvablyAllocatableNodes.end() &&
-                   "Node not in not-provably-allocatable set.");
-            NotProvablyAllocatableNodes.erase(NId);
-            break;
-        }
-      }
-
-      void moveToOptimallyReducibleNodes(NodeId NId) {
-        removeFromCurrentSet(NId);
-        OptimallyReducibleNodes.insert(NId);
-        G.getNodeMetadata(NId).setReductionState(
-          NodeMetadata::OptimallyReducible);
-      }
-
-      void moveToConservativelyAllocatableNodes(NodeId NId) {
-        removeFromCurrentSet(NId);
-        ConservativelyAllocatableNodes.insert(NId);
-        G.getNodeMetadata(NId).setReductionState(
-          NodeMetadata::ConservativelyAllocatable);
-      }
-
-      void moveToNotProvablyAllocatableNodes(NodeId NId) {
-        removeFromCurrentSet(NId);
-        NotProvablyAllocatableNodes.insert(NId);
-        G.getNodeMetadata(NId).setReductionState(
-          NodeMetadata::NotProvablyAllocatable);
-      }
-
-      void setup() {
-        // Set up worklists.
-        for (auto NId : G.nodeIds()) {
-          if (G.getNodeDegree(NId) < 3)
-            moveToOptimallyReducibleNodes(NId);
-          else if (G.getNodeMetadata(NId).isConservativelyAllocatable())
-            moveToConservativelyAllocatableNodes(NId);
-          else
-            moveToNotProvablyAllocatableNodes(NId);
-        }
-      }
-
-      // Compute a reduction order for the graph by iteratively applying PBQP
-      // reduction rules. Locally optimal rules are applied whenever possible (R0,
-      // R1, R2). If no locally-optimal rules apply then any conservatively
-      // allocatable node is reduced. Finally, if no conservatively allocatable
-      // node exists then the node with the lowest spill-cost:degree ratio is
-      // selected.
-      std::vector<GraphBase::NodeId> reduce() {
-        assert(!G.empty() && "Cannot reduce empty graph.");
-
-        typedef GraphBase::NodeId NodeId;
-        std::vector<NodeId> NodeStack;
-
-        // Consume worklists.
-        while (true) {
-          if (!OptimallyReducibleNodes.empty()) {
-            NodeSet::iterator NItr = OptimallyReducibleNodes.begin();
-            NodeId NId = *NItr;
-            OptimallyReducibleNodes.erase(NItr);
-            NodeStack.push_back(NId);
-            switch (G.getNodeDegree(NId)) {
-              case 0:
-                break;
-              case 1:
-                applyR1(G, NId);
-                break;
-              case 2:
-                applyR2(G, NId);
-                break;
-              default: llvm_unreachable("Not an optimally reducible node.");
-            }
-          } else if (!ConservativelyAllocatableNodes.empty()) {
-            // Conservatively allocatable nodes will never spill. For now just
-            // take the first node in the set and push it on the stack. When we
-            // start optimizing more heavily for register preferencing, it may
-            // would be better to push nodes with lower 'expected' or worst-case
-            // register costs first (since early nodes are the most
-            // constrained).
-            NodeSet::iterator NItr = ConservativelyAllocatableNodes.begin();
-            NodeId NId = *NItr;
-            ConservativelyAllocatableNodes.erase(NItr);
-            NodeStack.push_back(NId);
-            G.disconnectAllNeighborsFromNode(NId);
-
-          } else if (!NotProvablyAllocatableNodes.empty()) {
-            NodeSet::iterator NItr =
-              std::min_element(NotProvablyAllocatableNodes.begin(),
-                               NotProvablyAllocatableNodes.end(),
-                               SpillCostComparator(G));
-            NodeId NId = *NItr;
-            NotProvablyAllocatableNodes.erase(NItr);
-            NodeStack.push_back(NId);
-            G.disconnectAllNeighborsFromNode(NId);
-          } else
-            break;
-        }
-
-        return NodeStack;
-      }
-
-      class SpillCostComparator {
-      public:
-        SpillCostComparator(const Graph& G) : G(G) {}
-        bool operator()(NodeId N1Id, NodeId N2Id) {
-          PBQPNum N1SC = G.getNodeCosts(N1Id)[0] / G.getNodeDegree(N1Id);
-          PBQPNum N2SC = G.getNodeCosts(N2Id)[0] / G.getNodeDegree(N2Id);
-          return N1SC < N2SC;
-        }
-      private:
-        const Graph& G;
-      };
-
-      Graph& G;
-      typedef std::set<NodeId> NodeSet;
-      NodeSet OptimallyReducibleNodes;
-      NodeSet ConservativelyAllocatableNodes;
-      NodeSet NotProvablyAllocatableNodes;
-    };
-
-    typedef Graph<RegAllocSolverImpl> Graph;
-
-    inline Solution solve(Graph& G) {
-      if (G.empty())
-        return Solution();
-      RegAllocSolverImpl RegAllocSolver(G);
-      return RegAllocSolver.solve();
-    }
-
-  }
-}
-
-#endif // LLVM_CODEGEN_PBQP_REGALLOCSOLVER_H
diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h
index 3556e60f3967..a3bfaeb7e6c7 100644
--- a/include/llvm/CodeGen/PBQP/Solution.h
+++ b/include/llvm/CodeGen/PBQP/Solution.h
@@ -18,6 +18,7 @@
 #include "Math.h"
 #include <map>
 
+namespace llvm {
 namespace PBQP {
 
   /// \brief Represents a solution to a PBQP problem.
@@ -87,6 +88,7 @@ namespace PBQP {
 
   };
 
-}
+} // namespace PBQP
+} // namespace llvm
 
 #endif // LLVM_CODEGEN_PBQP_SOLUTION_H
diff --git a/include/llvm/CodeGen/PBQPRAConstraint.h b/include/llvm/CodeGen/PBQPRAConstraint.h
new file mode 100644
index 000000000000..833b9bad613f
--- /dev/null
+++ b/include/llvm/CodeGen/PBQPRAConstraint.h
@@ -0,0 +1,69 @@
+//===-- RegAllocPBQP.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PBQPBuilder interface, for classes which build PBQP
+// instances to represent register allocation problems, and the RegAllocPBQP
+// interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQPRACONSTRAINT_H
+#define LLVM_CODEGEN_PBQPRACONSTRAINT_H
+
+#include <memory>
+#include <vector>
+
+namespace llvm {
+namespace PBQP {
+namespace RegAlloc {
+// Forward declare PBQP graph class.
+class PBQPRAGraph;
+}
+}
+
+class LiveIntervals;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+class TargetRegisterInfo;
+
+typedef PBQP::RegAlloc::PBQPRAGraph PBQPRAGraph;
+
+/// @brief Abstract base for classes implementing PBQP register allocation
+///        constraints (e.g. Spill-costs, interference, coalescing).
+class PBQPRAConstraint {
+public:
+  virtual ~PBQPRAConstraint() = 0;
+  virtual void apply(PBQPRAGraph &G) = 0;
+private:
+  virtual void anchor();
+};
+
+/// @brief PBQP register allocation constraint composer.
+///
+///   Constraints added to this list will be applied, in the order that they are
+/// added, to the PBQP graph.
+class PBQPRAConstraintList : public PBQPRAConstraint {
+public:
+  void apply(PBQPRAGraph &G) override {
+    for (auto &C : Constraints)
+      C->apply(G);
+  }
+
+  void addConstraint(std::unique_ptr<PBQPRAConstraint> C) {
+    if (C)
+      Constraints.push_back(std::move(C));
+  }
+private:
+  std::vector<std::unique_ptr<PBQPRAConstraint>> Constraints;
+  void anchor() override;
+};
+
+}
+
+#endif /* LLVM_CODEGEN_PBQPRACONSTRAINT_H */
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 87f55e8572fe..8ed32b8a8dd5 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -105,6 +105,7 @@ private:
   AnalysisID StopAfter;
   bool Started;
   bool Stopped;
+  bool AddingMachinePasses;
 
 protected:
   TargetMachine *TM;
@@ -178,6 +179,10 @@ public:
   /// Return true if the optimized regalloc pipeline is enabled.
   bool getOptimizeRegAlloc() const;
 
+  /// Return true if the default global register allocator is in use and
+  /// has not be overriden on the command line with '-regalloc=...'
+  bool usingDefaultRegAlloc() const;
+
   /// Add common target configurable passes that perform LLVM IR to IR
   /// transforms following machine independent optimization.
   virtual void addIRPasses();
@@ -255,12 +260,9 @@ protected:
     return false;
   }
 
-  /// addPreRegAlloc - This method may be implemented by targets that want to
-  /// run passes immediately before register allocation. This should return
-  /// true if -print-machineinstrs should print after these passes.
-  virtual bool addPreRegAlloc() {
-    return false;
-  }
+  /// This method may be implemented by targets that want to run passes
+  /// immediately before register allocation.
+  virtual void addPreRegAlloc() { }
 
   /// createTargetRegisterAllocator - Create the register allocator pass for
   /// this target at the current optimization level.
@@ -286,24 +288,16 @@ protected:
     return false;
   }
 
-  /// addPostRegAlloc - This method may be implemented by targets that want to
-  /// run passes after register allocation pass pipeline but before
-  /// prolog-epilog insertion.  This should return true if -print-machineinstrs
-  /// should print after these passes.
-  virtual bool addPostRegAlloc() {
-    return false;
-  }
+  /// This method may be implemented by targets that want to run passes after
+  /// register allocation pass pipeline but before prolog-epilog insertion.
+  virtual void addPostRegAlloc() { }
 
   /// Add passes that optimize machine instructions after register allocation.
   virtual void addMachineLateOptimization();
 
-  /// addPreSched2 - This method may be implemented by targets that want to
-  /// run passes after prolog-epilog insertion and before the second instruction
-  /// scheduling pass.  This should return true if -print-machineinstrs should
-  /// print after these passes.
-  virtual bool addPreSched2() {
-    return false;
-  }
+  /// This method may be implemented by targets that want to run passes after
+  /// prolog-epilog insertion and before the second instruction scheduling pass.
+  virtual void addPreSched2() { }
 
   /// addGCPasses - Add late codegen passes that analyze code for garbage
   /// collection. This should return true if GC info should be printed after
@@ -313,24 +307,30 @@ protected:
   /// Add standard basic block placement passes.
   virtual void addBlockPlacement();
 
-  /// addPreEmitPass - This pass may be implemented by targets that want to run
-  /// passes immediately before machine code is emitted.  This should return
-  /// true if -print-machineinstrs should print out the code after the passes.
-  virtual bool addPreEmitPass() {
-    return false;
-  }
+  /// This pass may be implemented by targets that want to run passes
+  /// immediately before machine code is emitted.
+  virtual void addPreEmitPass() { }
 
   /// Utilities for targets to add passes to the pass manager.
   ///
 
   /// Add a CodeGen pass at this point in the pipeline after checking overrides.
   /// Return the pass that was added, or zero if no pass was added.
-  AnalysisID addPass(AnalysisID PassID);
+  /// @p printAfter    if true and adding a machine function pass add an extra
+  ///                  machine printer pass afterwards
+  /// @p verifyAfter   if true and adding a machine function pass add an extra
+  ///                  machine verification pass afterwards.
+  AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true,
+                     bool printAfter = true);
 
   /// Add a pass to the PassManager if that pass is supposed to be run, as
   /// determined by the StartAfter and StopAfter options. Takes ownership of the
   /// pass.
-  void addPass(Pass *P);
+  /// @p printAfter    if true and adding a machine function pass add an extra
+  ///                  machine printer pass afterwards
+  /// @p verifyAfter   if true and adding a machine function pass add an extra
+  ///                  machine verification pass afterwards.
+  void addPass(Pass *P, bool verifyAfter = true, bool printAfter = true);
 
   /// addMachinePasses helper to create the target-selected or overriden
   /// regalloc pass.
@@ -339,13 +339,20 @@ protected:
   /// printAndVerify - Add a pass to dump then verify the machine function, if
   /// those steps are enabled.
   ///
-  void printAndVerify(const char *Banner);
+  void printAndVerify(const std::string &Banner);
+
+  /// Add a pass to print the machine function if printing is enabled.
+  void addPrintPass(const std::string &Banner);
+
+  /// Add a pass to perform basic verification of the machine function if
+  /// verification is enabled.
+  void addVerifyPass(const std::string &Banner);
 };
 } // namespace llvm
 
 /// List of target independent CodeGen pass IDs.
 namespace llvm {
-  FunctionPass *createAtomicExpandLoadLinkedPass(const TargetMachine *TM);
+  FunctionPass *createAtomicExpandPass(const TargetMachine *TM);
 
   /// \brief Create a basic TargetTransformInfo analysis pass.
   ///
@@ -372,8 +379,9 @@ namespace llvm {
   /// matching during instruction selection.
   FunctionPass *createCodeGenPreparePass(const TargetMachine *TM = nullptr);
 
-  /// AtomicExpandLoadLinkedID -- FIXME
-  extern char &AtomicExpandLoadLinkedID;
+  /// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg
+  /// load-linked/store-conditional loops.
+  extern char &AtomicExpandID;
 
   /// MachineLoopInfo - This pass is a loop analysis pass.
   extern char &MachineLoopInfoID;
@@ -489,6 +497,10 @@ namespace llvm {
   /// inserting cmov instructions.
   extern char &EarlyIfConverterID;
 
+  /// This pass performs instruction combining using trace metrics to estimate
+  /// critical-path and resource depth.
+  extern char &MachineCombinerID;
+
   /// StackSlotColoring - This pass performs stack coloring and merging.
   /// It merges disjoint allocas to reduce the stack size.
   extern char &StackColoringID;
@@ -551,7 +563,7 @@ namespace llvm {
   /// createMachineVerifierPass - This pass verifies cenerated machine code
   /// instructions for correctness.
   ///
-  FunctionPass *createMachineVerifierPass(const char *Banner = nullptr);
+  FunctionPass *createMachineVerifierPass(const std::string& Banner);
 
   /// createDwarfEHPass - This pass mulches exception handling code into a form
   /// adapted to code generation.  Required if using dwarf exception handling.
@@ -593,6 +605,10 @@ namespace llvm {
 
   /// createJumpInstrTables - This pass creates jump-instruction tables.
   ModulePass *createJumpInstrTablesPass();
+
+  /// createForwardControlFlowIntegrityPass - This pass adds control-flow
+  /// integrity.
+  ModulePass *createForwardControlFlowIntegrityPass();
 } // End llvm namespace
 
 /// This initializer registers TargetMachine constructor, so the pass being
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index 441b0f084e69..eceb790c547d 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -16,150 +16,505 @@
 #ifndef LLVM_CODEGEN_REGALLOCPBQP_H
 #define LLVM_CODEGEN_REGALLOCPBQP_H
 
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/PBQP/RegAllocSolver.h"
-#include <map>
-#include <set>
+#include "llvm/CodeGen/PBQP/CostAllocator.h"
+#include "llvm/CodeGen/PBQP/ReductionRules.h"
+#include "llvm/CodeGen/PBQPRAConstraint.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
-
-  class LiveIntervals;
-  class MachineBlockFrequencyInfo;
-  class MachineFunction;
-  class TargetRegisterInfo;
-
-  typedef PBQP::RegAlloc::Graph PBQPRAGraph;
-
-  /// This class wraps up a PBQP instance representing a register allocation
-  /// problem, plus the structures necessary to map back from the PBQP solution
-  /// to a register allocation solution. (i.e. The PBQP-node <--> vreg map,
-  /// and the PBQP option <--> storage location map).
-  class PBQPRAProblem {
-  public:
-
-    typedef SmallVector<unsigned, 16> AllowedSet;
-
-    PBQPRAGraph& getGraph() { return graph; }
-
-    const PBQPRAGraph& getGraph() const { return graph; }
-
-    /// Record the mapping between the given virtual register and PBQP node,
-    /// and the set of allowed pregs for the vreg.
-    ///
-    /// If you are extending
-    /// PBQPBuilder you are unlikely to need this: Nodes and options for all
-    /// vregs will already have been set up for you by the base class.
-    template <typename AllowedRegsItr>
-    void recordVReg(unsigned vreg, PBQPRAGraph::NodeId nodeId,
-                    AllowedRegsItr arBegin, AllowedRegsItr arEnd) {
-      assert(node2VReg.find(nodeId) == node2VReg.end() && "Re-mapping node.");
-      assert(vreg2Node.find(vreg) == vreg2Node.end() && "Re-mapping vreg.");
-      assert(allowedSets[vreg].empty() && "vreg already has pregs.");
-
-      node2VReg[nodeId] = vreg;
-      vreg2Node[vreg] = nodeId;
-      std::copy(arBegin, arEnd, std::back_inserter(allowedSets[vreg]));
+namespace PBQP {
+namespace RegAlloc {
+
+/// @brief Spill option index.
+inline unsigned getSpillOptionIdx() { return 0; }
+
+/// \brief Metadata to speed allocatability test.
+///
+/// Keeps track of the number of infinities in each row and column.
+class MatrixMetadata {
+private:
+  MatrixMetadata(const MatrixMetadata&);
+  void operator=(const MatrixMetadata&);
+public:
+  MatrixMetadata(const Matrix& M)
+    : WorstRow(0), WorstCol(0),
+      UnsafeRows(new bool[M.getRows() - 1]()),
+      UnsafeCols(new bool[M.getCols() - 1]()) {
+
+    unsigned* ColCounts = new unsigned[M.getCols() - 1]();
+
+    for (unsigned i = 1; i < M.getRows(); ++i) {
+      unsigned RowCount = 0;
+      for (unsigned j = 1; j < M.getCols(); ++j) {
+        if (M[i][j] == std::numeric_limits<PBQPNum>::infinity()) {
+          ++RowCount;
+          ++ColCounts[j - 1];
+          UnsafeRows[i - 1] = true;
+          UnsafeCols[j - 1] = true;
+        }
+      }
+      WorstRow = std::max(WorstRow, RowCount);
     }
+    unsigned WorstColCountForCurRow =
+      *std::max_element(ColCounts, ColCounts + M.getCols() - 1);
+    WorstCol = std::max(WorstCol, WorstColCountForCurRow);
+    delete[] ColCounts;
+  }
+
+  unsigned getWorstRow() const { return WorstRow; }
+  unsigned getWorstCol() const { return WorstCol; }
+  const bool* getUnsafeRows() const { return UnsafeRows.get(); }
+  const bool* getUnsafeCols() const { return UnsafeCols.get(); }
+
+private:
+  unsigned WorstRow, WorstCol;
+  std::unique_ptr<bool[]> UnsafeRows;
+  std::unique_ptr<bool[]> UnsafeCols;
+};
+
+/// \brief Holds a vector of the allowed physical regs for a vreg.
+class AllowedRegVector {
+  friend hash_code hash_value(const AllowedRegVector &);
+public:
+
+  AllowedRegVector() : NumOpts(0), Opts(nullptr) {}
+
+  AllowedRegVector(const std::vector<unsigned> &OptVec)
+    : NumOpts(OptVec.size()), Opts(new unsigned[NumOpts]) {
+    std::copy(OptVec.begin(), OptVec.end(), Opts.get());
+  }
+
+  AllowedRegVector(const AllowedRegVector &Other)
+    : NumOpts(Other.NumOpts), Opts(new unsigned[NumOpts]) {
+    std::copy(Other.Opts.get(), Other.Opts.get() + NumOpts, Opts.get());
+  }
+
+  AllowedRegVector(AllowedRegVector &&Other)
+    : NumOpts(std::move(Other.NumOpts)), Opts(std::move(Other.Opts)) {}
+
+  AllowedRegVector& operator=(const AllowedRegVector &Other) {
+    NumOpts = Other.NumOpts;
+    Opts.reset(new unsigned[NumOpts]);
+    std::copy(Other.Opts.get(), Other.Opts.get() + NumOpts, Opts.get());
+    return *this;
+  }
+
+  AllowedRegVector& operator=(AllowedRegVector &&Other) {
+    NumOpts = std::move(Other.NumOpts);
+    Opts = std::move(Other.Opts);
+    return *this;
+  }
+
+  unsigned size() const { return NumOpts; }
+  unsigned operator[](size_t I) const { return Opts[I]; }
+
+  bool operator==(const AllowedRegVector &Other) const {
+    if (NumOpts != Other.NumOpts)
+      return false;
+    return std::equal(Opts.get(), Opts.get() + NumOpts, Other.Opts.get());
+  }
+
+  bool operator!=(const AllowedRegVector &Other) const {
+    return !(*this == Other);
+  }
+
+private:
+  unsigned NumOpts;
+  std::unique_ptr<unsigned[]> Opts;
+};
+
+inline hash_code hash_value(const AllowedRegVector &OptRegs) {
+  unsigned *OStart = OptRegs.Opts.get();
+  unsigned *OEnd = OptRegs.Opts.get() + OptRegs.NumOpts;
+  return hash_combine(OptRegs.NumOpts,
+                      hash_combine_range(OStart, OEnd));
+}
 
-    /// Get the virtual register corresponding to the given PBQP node.
-    unsigned getVRegForNode(PBQPRAGraph::NodeId nodeId) const;
-
-    /// Get the PBQP node corresponding to the given virtual register.
-    PBQPRAGraph::NodeId getNodeForVReg(unsigned vreg) const;
-
-    /// Returns true if the given PBQP option represents a physical register,
-    /// false otherwise.
-    bool isPRegOption(unsigned vreg, unsigned option) const {
-      // At present we only have spills or pregs, so anything that's not a
-      // spill is a preg. (This might be extended one day to support remat).
-      return !isSpillOption(vreg, option);
+/// \brief Holds graph-level metadata relevent to PBQP RA problems.
+class GraphMetadata {
+private:
+  typedef ValuePool<AllowedRegVector> AllowedRegVecPool;
+public:
+
+  typedef AllowedRegVecPool::PoolRef AllowedRegVecRef;
+
+  GraphMetadata(MachineFunction &MF,
+                LiveIntervals &LIS,
+                MachineBlockFrequencyInfo &MBFI)
+    : MF(MF), LIS(LIS), MBFI(MBFI) {}
+
+  MachineFunction &MF;
+  LiveIntervals &LIS;
+  MachineBlockFrequencyInfo &MBFI;
+
+  void setNodeIdForVReg(unsigned VReg, GraphBase::NodeId NId) {
+    VRegToNodeId[VReg] = NId;
+  }
+
+  GraphBase::NodeId getNodeIdForVReg(unsigned VReg) const {
+    auto VRegItr = VRegToNodeId.find(VReg);
+    if (VRegItr == VRegToNodeId.end())
+      return GraphBase::invalidNodeId();
+    return VRegItr->second;
+  }
+
+  void eraseNodeIdForVReg(unsigned VReg) {
+    VRegToNodeId.erase(VReg);
+  }
+
+  AllowedRegVecRef getAllowedRegs(AllowedRegVector Allowed) {
+    return AllowedRegVecs.getValue(std::move(Allowed));
+  }
+
+private:
+  DenseMap<unsigned, GraphBase::NodeId> VRegToNodeId;
+  AllowedRegVecPool AllowedRegVecs;
+};
+
+/// \brief Holds solver state and other metadata relevant to each PBQP RA node.
+class NodeMetadata {
+public:
+  typedef RegAlloc::AllowedRegVector AllowedRegVector;
+
+  typedef enum { Unprocessed,
+                 OptimallyReducible,
+                 ConservativelyAllocatable,
+                 NotProvablyAllocatable } ReductionState;
+
+  NodeMetadata()
+    : RS(Unprocessed), NumOpts(0), DeniedOpts(0), OptUnsafeEdges(nullptr),
+      VReg(0) {}
+
+  // FIXME: Re-implementing default behavior to work around MSVC. Remove once
+  // MSVC synthesizes move constructors properly.
+  NodeMetadata(const NodeMetadata &Other)
+    : RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts),
+      OptUnsafeEdges(new unsigned[NumOpts]), VReg(Other.VReg),
+      AllowedRegs(Other.AllowedRegs) {
+    if (NumOpts > 0) {
+      std::copy(&Other.OptUnsafeEdges[0], &Other.OptUnsafeEdges[NumOpts],
+                &OptUnsafeEdges[0]);
     }
-
-    /// Returns true if the given PBQP option represents spilling, false
-    /// otherwise.
-    bool isSpillOption(unsigned vreg, unsigned option) const {
-      // We hardcode option zero as the spill option.
-      return option == 0;
+  }
+
+  // FIXME: Re-implementing default behavior to work around MSVC. Remove once
+  // MSVC synthesizes move constructors properly.
+  NodeMetadata(NodeMetadata &&Other)
+    : RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts),
+      OptUnsafeEdges(std::move(Other.OptUnsafeEdges)), VReg(Other.VReg),
+      AllowedRegs(std::move(Other.AllowedRegs)) {}
+
+  // FIXME: Re-implementing default behavior to work around MSVC. Remove once
+  // MSVC synthesizes move constructors properly.
+  NodeMetadata& operator=(const NodeMetadata &Other) {
+    RS = Other.RS;
+    NumOpts = Other.NumOpts;
+    DeniedOpts = Other.DeniedOpts;
+    OptUnsafeEdges.reset(new unsigned[NumOpts]);
+    std::copy(Other.OptUnsafeEdges.get(), Other.OptUnsafeEdges.get() + NumOpts,
+              OptUnsafeEdges.get());
+    VReg = Other.VReg;
+    AllowedRegs = Other.AllowedRegs;
+    return *this;
+  }
+
+  // FIXME: Re-implementing default behavior to work around MSVC. Remove once
+  // MSVC synthesizes move constructors properly.
+  NodeMetadata& operator=(NodeMetadata &&Other) {
+    RS = Other.RS;
+    NumOpts = Other.NumOpts;
+    DeniedOpts = Other.DeniedOpts;
+    OptUnsafeEdges = std::move(Other.OptUnsafeEdges);
+    VReg = Other.VReg;
+    AllowedRegs = std::move(Other.AllowedRegs);
+    return *this;
+  }
+
+  void setVReg(unsigned VReg) { this->VReg = VReg; }
+  unsigned getVReg() const { return VReg; }
+
+  void setAllowedRegs(GraphMetadata::AllowedRegVecRef AllowedRegs) {
+    this->AllowedRegs = std::move(AllowedRegs);
+  }
+  const AllowedRegVector& getAllowedRegs() const { return *AllowedRegs; }
+
+  void setup(const Vector& Costs) {
+    NumOpts = Costs.getLength() - 1;
+    OptUnsafeEdges = std::unique_ptr<unsigned[]>(new unsigned[NumOpts]());
+  }
+
+  ReductionState getReductionState() const { return RS; }
+  void setReductionState(ReductionState RS) { this->RS = RS; }
+
+  void handleAddEdge(const MatrixMetadata& MD, bool Transpose) {
+    DeniedOpts += Transpose ? MD.getWorstCol() : MD.getWorstRow();
+    const bool* UnsafeOpts =
+      Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows();
+    for (unsigned i = 0; i < NumOpts; ++i)
+      OptUnsafeEdges[i] += UnsafeOpts[i];
+  }
+
+  void handleRemoveEdge(const MatrixMetadata& MD, bool Transpose) {
+    DeniedOpts -= Transpose ? MD.getWorstCol() : MD.getWorstRow();
+    const bool* UnsafeOpts =
+      Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows();
+    for (unsigned i = 0; i < NumOpts; ++i)
+      OptUnsafeEdges[i] -= UnsafeOpts[i];
+  }
+
+  bool isConservativelyAllocatable() const {
+    return (DeniedOpts < NumOpts) ||
+      (std::find(&OptUnsafeEdges[0], &OptUnsafeEdges[NumOpts], 0) !=
+       &OptUnsafeEdges[NumOpts]);
+  }
+
+private:
+  ReductionState RS;
+  unsigned NumOpts;
+  unsigned DeniedOpts;
+  std::unique_ptr<unsigned[]> OptUnsafeEdges;
+  unsigned VReg;
+  GraphMetadata::AllowedRegVecRef AllowedRegs;
+};
+
+class RegAllocSolverImpl {
+private:
+  typedef MDMatrix<MatrixMetadata> RAMatrix;
+public:
+  typedef PBQP::Vector RawVector;
+  typedef PBQP::Matrix RawMatrix;
+  typedef PBQP::Vector Vector;
+  typedef RAMatrix     Matrix;
+  typedef PBQP::PoolCostAllocator<Vector, Matrix> CostAllocator;
+
+  typedef GraphBase::NodeId NodeId;
+  typedef GraphBase::EdgeId EdgeId;
+
+  typedef RegAlloc::NodeMetadata NodeMetadata;
+  struct EdgeMetadata { };
+  typedef RegAlloc::GraphMetadata GraphMetadata;
+
+  typedef PBQP::Graph<RegAllocSolverImpl> Graph;
+
+  RegAllocSolverImpl(Graph &G) : G(G) {}
+
+  Solution solve() {
+    G.setSolver(*this);
+    Solution S;
+    setup();
+    S = backpropagate(G, reduce());
+    G.unsetSolver();
+    return S;
+  }
+
+  void handleAddNode(NodeId NId) {
+    G.getNodeMetadata(NId).setup(G.getNodeCosts(NId));
+  }
+  void handleRemoveNode(NodeId NId) {}
+  void handleSetNodeCosts(NodeId NId, const Vector& newCosts) {}
+
+  void handleAddEdge(EdgeId EId) {
+    handleReconnectEdge(EId, G.getEdgeNode1Id(EId));
+    handleReconnectEdge(EId, G.getEdgeNode2Id(EId));
+  }
+
+  void handleRemoveEdge(EdgeId EId) {
+    handleDisconnectEdge(EId, G.getEdgeNode1Id(EId));
+    handleDisconnectEdge(EId, G.getEdgeNode2Id(EId));
+  }
+
+  void handleDisconnectEdge(EdgeId EId, NodeId NId) {
+    NodeMetadata& NMd = G.getNodeMetadata(NId);
+    const MatrixMetadata& MMd = G.getEdgeCosts(EId).getMetadata();
+    NMd.handleRemoveEdge(MMd, NId == G.getEdgeNode2Id(EId));
+    if (G.getNodeDegree(NId) == 3) {
+      // This node is becoming optimally reducible.
+      moveToOptimallyReducibleNodes(NId);
+    } else if (NMd.getReductionState() ==
+               NodeMetadata::NotProvablyAllocatable &&
+               NMd.isConservativelyAllocatable()) {
+      // This node just became conservatively allocatable.
+      moveToConservativelyAllocatableNodes(NId);
+    }
+  }
+
+  void handleReconnectEdge(EdgeId EId, NodeId NId) {
+    NodeMetadata& NMd = G.getNodeMetadata(NId);
+    const MatrixMetadata& MMd = G.getEdgeCosts(EId).getMetadata();
+    NMd.handleAddEdge(MMd, NId == G.getEdgeNode2Id(EId));
+  }
+
+  void handleSetEdgeCosts(EdgeId EId, const Matrix& NewCosts) {
+    handleRemoveEdge(EId);
+
+    NodeId N1Id = G.getEdgeNode1Id(EId);
+    NodeId N2Id = G.getEdgeNode2Id(EId);
+    NodeMetadata& N1Md = G.getNodeMetadata(N1Id);
+    NodeMetadata& N2Md = G.getNodeMetadata(N2Id);
+    const MatrixMetadata& MMd = NewCosts.getMetadata();
+    N1Md.handleAddEdge(MMd, N1Id != G.getEdgeNode1Id(EId));
+    N2Md.handleAddEdge(MMd, N2Id != G.getEdgeNode1Id(EId));
+  }
+
+private:
+
+  void removeFromCurrentSet(NodeId NId) {
+    switch (G.getNodeMetadata(NId).getReductionState()) {
+    case NodeMetadata::Unprocessed: break;
+    case NodeMetadata::OptimallyReducible:
+      assert(OptimallyReducibleNodes.find(NId) !=
+             OptimallyReducibleNodes.end() &&
+             "Node not in optimally reducible set.");
+      OptimallyReducibleNodes.erase(NId);
+      break;
+    case NodeMetadata::ConservativelyAllocatable:
+      assert(ConservativelyAllocatableNodes.find(NId) !=
+             ConservativelyAllocatableNodes.end() &&
+             "Node not in conservatively allocatable set.");
+      ConservativelyAllocatableNodes.erase(NId);
+      break;
+    case NodeMetadata::NotProvablyAllocatable:
+      assert(NotProvablyAllocatableNodes.find(NId) !=
+             NotProvablyAllocatableNodes.end() &&
+             "Node not in not-provably-allocatable set.");
+      NotProvablyAllocatableNodes.erase(NId);
+      break;
+    }
+  }
+
+  void moveToOptimallyReducibleNodes(NodeId NId) {
+    removeFromCurrentSet(NId);
+    OptimallyReducibleNodes.insert(NId);
+    G.getNodeMetadata(NId).setReductionState(
+      NodeMetadata::OptimallyReducible);
+  }
+
+  void moveToConservativelyAllocatableNodes(NodeId NId) {
+    removeFromCurrentSet(NId);
+    ConservativelyAllocatableNodes.insert(NId);
+    G.getNodeMetadata(NId).setReductionState(
+      NodeMetadata::ConservativelyAllocatable);
+  }
+
+  void moveToNotProvablyAllocatableNodes(NodeId NId) {
+    removeFromCurrentSet(NId);
+    NotProvablyAllocatableNodes.insert(NId);
+    G.getNodeMetadata(NId).setReductionState(
+      NodeMetadata::NotProvablyAllocatable);
+  }
+
+  void setup() {
+    // Set up worklists.
+    for (auto NId : G.nodeIds()) {
+      if (G.getNodeDegree(NId) < 3)
+        moveToOptimallyReducibleNodes(NId);
+      else if (G.getNodeMetadata(NId).isConservativelyAllocatable())
+        moveToConservativelyAllocatableNodes(NId);
+      else
+        moveToNotProvablyAllocatableNodes(NId);
+    }
+  }
+
+  // Compute a reduction order for the graph by iteratively applying PBQP
+  // reduction rules. Locally optimal rules are applied whenever possible (R0,
+  // R1, R2). If no locally-optimal rules apply then any conservatively
+  // allocatable node is reduced. Finally, if no conservatively allocatable
+  // node exists then the node with the lowest spill-cost:degree ratio is
+  // selected.
+  std::vector<GraphBase::NodeId> reduce() {
+    assert(!G.empty() && "Cannot reduce empty graph.");
+
+    typedef GraphBase::NodeId NodeId;
+    std::vector<NodeId> NodeStack;
+
+    // Consume worklists.
+    while (true) {
+      if (!OptimallyReducibleNodes.empty()) {
+        NodeSet::iterator NItr = OptimallyReducibleNodes.begin();
+        NodeId NId = *NItr;
+        OptimallyReducibleNodes.erase(NItr);
+        NodeStack.push_back(NId);
+        switch (G.getNodeDegree(NId)) {
+        case 0:
+          break;
+        case 1:
+          applyR1(G, NId);
+          break;
+        case 2:
+          applyR2(G, NId);
+          break;
+        default: llvm_unreachable("Not an optimally reducible node.");
+        }
+      } else if (!ConservativelyAllocatableNodes.empty()) {
+        // Conservatively allocatable nodes will never spill. For now just
+        // take the first node in the set and push it on the stack. When we
+        // start optimizing more heavily for register preferencing, it may
+        // would be better to push nodes with lower 'expected' or worst-case
+        // register costs first (since early nodes are the most
+        // constrained).
+        NodeSet::iterator NItr = ConservativelyAllocatableNodes.begin();
+        NodeId NId = *NItr;
+        ConservativelyAllocatableNodes.erase(NItr);
+        NodeStack.push_back(NId);
+        G.disconnectAllNeighborsFromNode(NId);
+
+      } else if (!NotProvablyAllocatableNodes.empty()) {
+        NodeSet::iterator NItr =
+          std::min_element(NotProvablyAllocatableNodes.begin(),
+                           NotProvablyAllocatableNodes.end(),
+                           SpillCostComparator(G));
+        NodeId NId = *NItr;
+        NotProvablyAllocatableNodes.erase(NItr);
+        NodeStack.push_back(NId);
+        G.disconnectAllNeighborsFromNode(NId);
+      } else
+        break;
     }
 
-    /// Returns the allowed set for the given virtual register.
-    const AllowedSet& getAllowedSet(unsigned vreg) const;
-
-    /// Get PReg for option.
-    unsigned getPRegForOption(unsigned vreg, unsigned option) const;
-
-  private:
-
-    typedef std::map<PBQPRAGraph::NodeId, unsigned>  Node2VReg;
-    typedef DenseMap<unsigned, PBQPRAGraph::NodeId> VReg2Node;
-    typedef DenseMap<unsigned, AllowedSet> AllowedSetMap;
-
-    PBQPRAGraph graph;
-    Node2VReg node2VReg;
-    VReg2Node vreg2Node;
-
-    AllowedSetMap allowedSets;
-
-  };
+    return NodeStack;
+  }
 
-  /// Builds PBQP instances to represent register allocation problems. Includes
-  /// spill, interference and coalescing costs by default. You can extend this
-  /// class to support additional constraints for your architecture.
-  class PBQPBuilder {
-  private:
-    PBQPBuilder(const PBQPBuilder&) LLVM_DELETED_FUNCTION;
-    void operator=(const PBQPBuilder&) LLVM_DELETED_FUNCTION;
+  class SpillCostComparator {
   public:
-
-    typedef std::set<unsigned> RegSet;
-
-    /// Default constructor.
-    PBQPBuilder() {}
-
-    /// Clean up a PBQPBuilder.
-    virtual ~PBQPBuilder() {}
-
-    /// Build a PBQP instance to represent the register allocation problem for
-    /// the given MachineFunction.
-    virtual PBQPRAProblem *build(MachineFunction *mf, const LiveIntervals *lis,
-                                 const MachineBlockFrequencyInfo *mbfi,
-                                 const RegSet &vregs);
+    SpillCostComparator(const Graph& G) : G(G) {}
+    bool operator()(NodeId N1Id, NodeId N2Id) {
+      PBQPNum N1SC = G.getNodeCosts(N1Id)[0] / G.getNodeDegree(N1Id);
+      PBQPNum N2SC = G.getNodeCosts(N2Id)[0] / G.getNodeDegree(N2Id);
+      return N1SC < N2SC;
+    }
   private:
-
-    void addSpillCosts(PBQP::Vector &costVec, PBQP::PBQPNum spillCost);
-
-    void addInterferenceCosts(PBQP::Matrix &costMat,
-                              const PBQPRAProblem::AllowedSet &vr1Allowed,
-                              const PBQPRAProblem::AllowedSet &vr2Allowed,
-                              const TargetRegisterInfo *tri);
+    const Graph& G;
   };
 
-  /// Extended builder which adds coalescing constraints to a problem.
-  class PBQPBuilderWithCoalescing : public PBQPBuilder {
-  public:
-
-    /// Build a PBQP instance to represent the register allocation problem for
-    /// the given MachineFunction.
-    PBQPRAProblem *build(MachineFunction *mf, const LiveIntervals *lis,
-                         const MachineBlockFrequencyInfo *mbfi,
-                         const RegSet &vregs) override;
-
-  private:
+  Graph& G;
+  typedef std::set<NodeId> NodeSet;
+  NodeSet OptimallyReducibleNodes;
+  NodeSet ConservativelyAllocatableNodes;
+  NodeSet NotProvablyAllocatableNodes;
+};
+
+class PBQPRAGraph : public PBQP::Graph<RegAllocSolverImpl> {
+private:
+  typedef PBQP::Graph<RegAllocSolverImpl> BaseT;
+public:
+  PBQPRAGraph(GraphMetadata Metadata) : BaseT(Metadata) {}
+};
+
+inline Solution solve(PBQPRAGraph& G) {
+  if (G.empty())
+    return Solution();
+  RegAllocSolverImpl RegAllocSolver(G);
+  return RegAllocSolver.solve();
+}
 
-    void addPhysRegCoalesce(PBQP::Vector &costVec, unsigned pregOption,
-                            PBQP::PBQPNum benefit);
+} // namespace RegAlloc
+} // namespace PBQP
 
-    void addVirtRegCoalesce(PBQP::Matrix &costMat,
-                            const PBQPRAProblem::AllowedSet &vr1Allowed,
-                            const PBQPRAProblem::AllowedSet &vr2Allowed,
-                            PBQP::PBQPNum benefit);
-  };
+/// @brief Create a PBQP register allocator instance.
+FunctionPass *
+createPBQPRegisterAllocator(char *customPassID = nullptr);
 
-  FunctionPass *
-  createPBQPRegisterAllocator(std::unique_ptr<PBQPBuilder> builder,
-                              char *customPassID = nullptr);
-}
+} // namespace llvm
 
 #endif /* LLVM_CODEGEN_REGALLOCPBQP_H */
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 335dd7f084c1..474861e45df1 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -34,7 +34,7 @@ class RegScavenger {
   MachineRegisterInfo* MRI;
   MachineBasicBlock *MBB;
   MachineBasicBlock::iterator MBBI;
-  unsigned NumPhysRegs;
+  unsigned NumRegUnits;
 
   /// Tracking - True if RegScavenger is currently tracking the liveness of 
   /// registers.
@@ -58,22 +58,19 @@ class RegScavenger {
   /// A vector of information on scavenged registers.
   SmallVector<ScavengedInfo, 2> Scavenged;
 
-  /// CalleeSavedrRegs - A bitvector of callee saved registers for the target.
-  ///
-  BitVector CalleeSavedRegs;
-
-  /// RegsAvailable - The current state of all the physical registers immediately
-  /// before MBBI. One bit per physical register. If bit is set that means it's
-  /// available, unset means the register is currently being used.
-  BitVector RegsAvailable;
+  /// RegUnitsAvailable - The current state of each reg unit immediatelly
+  /// before MBBI. One bit per register unit. If bit is not set it means any
+  /// register containing that register unit is currently being used.
+  BitVector RegUnitsAvailable;
 
   // These BitVectors are only used internally to forward(). They are members
   // to avoid frequent reallocations.
-  BitVector KillRegs, DefRegs;
+  BitVector KillRegUnits, DefRegUnits;
+  BitVector TmpRegUnits;
 
 public:
   RegScavenger()
-    : MBB(nullptr), NumPhysRegs(0), Tracking(false) {}
+    : MBB(nullptr), NumRegUnits(0), Tracking(false) {}
 
   /// enterBasicBlock - Start tracking liveness from the begin of the specific
   /// basic block.
@@ -112,9 +109,9 @@ public:
   MachineBasicBlock::iterator getCurrentPosition() const {
     return MBBI;
   }
-
-  /// getRegsUsed - return all registers currently in use in used.
-  void getRegsUsed(BitVector &used, bool includeReserved);
+  
+  /// isRegUsed - return if a specific register is currently used.
+  bool isRegUsed(unsigned Reg, bool includeReserved = true) const;
 
   /// getRegsAvailable - Return all available registers in the register class
   /// in Mask.
@@ -157,40 +154,29 @@ public:
     return scavengeRegister(RegClass, MBBI, SPAdj);
   }
 
-  /// setUsed - Tell the scavenger a register is used.
+  /// setRegUsed - Tell the scavenger a register is used.
   ///
-  void setUsed(unsigned Reg);
+  void setRegUsed(unsigned Reg);
 private:
   /// isReserved - Returns true if a register is reserved. It is never "unused".
   bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); }
 
-  /// isUsed - Test if a register is currently being used.  When called by the
-  /// isAliasUsed function, we only check isReserved if this is the original
-  /// register, not an alias register.
+  /// setUsed / setUnused - Mark the state of one or a number of register units.
   ///
-  bool isUsed(unsigned Reg, bool CheckReserved = true) const   {
-    return !RegsAvailable.test(Reg) || (CheckReserved && isReserved(Reg));
+  void setUsed(BitVector &RegUnits) {
+    RegUnitsAvailable.reset(RegUnits);
   }
-
-  /// isAliasUsed - Is Reg or an alias currently in use?
-  bool isAliasUsed(unsigned Reg) const;
-
-  /// setUsed / setUnused - Mark the state of one or a number of registers.
-  ///
-  void setUsed(BitVector &Regs) {
-    RegsAvailable.reset(Regs);
-  }
-  void setUnused(BitVector &Regs) {
-    RegsAvailable |= Regs;
+  void setUnused(BitVector &RegUnits) {
+    RegUnitsAvailable |= RegUnits;
   }
 
-  /// Processes the current instruction and fill the KillRegs and DefRegs bit
-  /// vectors.
+  /// Processes the current instruction and fill the KillRegUnits and
+  /// DefRegUnits bit vectors.
   void determineKillsAndDefs();
-
-  /// Add Reg and all its sub-registers to BV.
-  void addRegWithSubRegs(BitVector &BV, unsigned Reg);
-
+  
+  /// Add all Reg Units that Reg contains to BV.
+  void addRegUnits(BitVector &BV, unsigned Reg);
+  
   /// findSurvivorReg - Return the candidate register that is unused for the
   /// longest after StartMI. UseMI is set to the instruction where the search
   /// stopped.
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 81db8a2f79b5..64c9c4729e92 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -203,6 +203,16 @@ namespace RTLIB {
     COPYSIGN_F80,
     COPYSIGN_F128,
     COPYSIGN_PPCF128,
+    FMIN_F32,
+    FMIN_F64,
+    FMIN_F80,
+    FMIN_F128,
+    FMIN_PPCF128,
+    FMAX_F32,
+    FMAX_F64,
+    FMAX_F80,
+    FMAX_F128,
+    FMAX_PPCF128,
 
     // CONVERSION
     FPEXT_F64_F128,
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 5a65d590802a..80aee8c62880 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -190,6 +190,12 @@ namespace llvm {
       return getKind() == Order && Contents.OrdKind == Barrier;
     }
 
+    /// isNormalMemoryOrBarrier - Test if this is could be any kind of memory
+    /// dependence.
+    bool isNormalMemoryOrBarrier() const {
+      return (isNormalMemory() || isBarrier());
+    }
+
     /// isMustAlias - Test if this is an Order dependence that is marked
     /// as "must alias", meaning that the SUnits at either end of the edge
     /// have a memory dependence on a known memory location.
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index e6754a2c0342..00dd8f9a633e 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -75,8 +75,7 @@ namespace llvm {
   /// MachineInstrs.
   class ScheduleDAGInstrs : public ScheduleDAG {
   protected:
-    const MachineLoopInfo &MLI;
-    const MachineDominatorTree &MDT;
+    const MachineLoopInfo *MLI;
     const MachineFrameInfo *MFI;
 
     /// Live Intervals provides reaching defs in preRA scheduling.
@@ -154,8 +153,7 @@ namespace llvm {
 
   public:
     explicit ScheduleDAGInstrs(MachineFunction &mf,
-                               const MachineLoopInfo &mli,
-                               const MachineDominatorTree &mdt,
+                               const MachineLoopInfo *mli,
                                bool IsPostRAFlag,
                                bool RemoveKillFlags = false,
                                LiveIntervals *LIS = nullptr);
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index bb87f82d2def..4950797bb1e0 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -16,9 +16,11 @@
 #define LLVM_CODEGEN_SELECTIONDAG_H
 
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include "llvm/Target/TargetMachine.h"
@@ -126,6 +128,10 @@ public:
       DbgValMap[Node].push_back(V);
   }
 
+  /// \brief Invalidate all DbgValues attached to the node and remove
+  /// it from the Node-to-DbgValues map.
+  void erase(const SDNode *Node);
+
   void clear() {
     DbgValMap.clear();
     DbgValues.clear();
@@ -166,7 +172,7 @@ void checkForCycles(const SelectionDAG *DAG, bool force = false);
 ///
 class SelectionDAG {
   const TargetMachine &TM;
-  const TargetSelectionDAGInfo &TSI;
+  const TargetSelectionDAGInfo *TSI;
   const TargetLowering *TLI;
   MachineFunction *MF;
   LLVMContext *Context;
@@ -266,7 +272,7 @@ public:
   /// init - Prepare this SelectionDAG to process code in the given
   /// MachineFunction.
   ///
-  void init(MachineFunction &mf, const TargetLowering *TLI);
+  void init(MachineFunction &mf);
 
   /// clear - Clear state and free memory necessary to make this
   /// SelectionDAG ready to process a new block.
@@ -275,8 +281,9 @@ public:
 
   MachineFunction &getMachineFunction() const { return *MF; }
   const TargetMachine &getTarget() const { return TM; }
+  const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); }
   const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
-  const TargetSelectionDAGInfo &getSelectionDAGInfo() const { return TSI; }
+  const TargetSelectionDAGInfo &getSelectionDAGInfo() const { return *TSI; }
   LLVMContext *getContext() const {return Context; }
 
   /// viewGraph - Pop up a GraphViz/gv window with the DAG rendered using 'dot'.
@@ -364,6 +371,27 @@ public:
   /// the graph.
   void Legalize();
 
+  /// \brief Transforms a SelectionDAG node and any operands to it into a node
+  /// that is compatible with the target instruction selector, as indicated by
+  /// the TargetLowering object.
+  ///
+  /// \returns true if \c N is a valid, legal node after calling this.
+  ///
+  /// This essentially runs a single recursive walk of the \c Legalize process
+  /// over the given node (and its operands). This can be used to incrementally
+  /// legalize the DAG. All of the nodes which are directly replaced,
+  /// potentially including N, are added to the output parameter \c
+  /// UpdatedNodes so that the delta to the DAG can be understood by the
+  /// caller.
+  ///
+  /// When this returns false, N has been legalized in a way that make the
+  /// pointer passed in no longer valid. It may have even been deleted from the
+  /// DAG, and so it shouldn't be used further. When this returns true, the
+  /// N passed in is a legal node, and can be immediately processed as such.
+  /// This may still have done some work on the DAG, and will still populate
+  /// UpdatedNodes with any new nodes replacing those originally in the DAG.
+  bool LegalizeOp(SDNode *N, SmallSetVector<SDNode *, 16> &UpdatedNodes);
+
   /// LegalizeVectors - This transforms the SelectionDAG into a SelectionDAG
   /// that only uses vector math operations supported by the target.  This is
   /// necessary as a separate step from Legalize because unrolling a vector
@@ -725,7 +753,7 @@ public:
                    SDValue SV, unsigned Align);
 
   /// getAtomicCmpSwap - Gets a node for an atomic cmpxchg op. There are two
-  /// valid Opcodes. ISD::ATOMIC_CMO_SWAP produces a the value loaded and a
+  /// valid Opcodes. ISD::ATOMIC_CMO_SWAP produces the value loaded and a
   /// chain result. ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS produces the value loaded,
   /// a success flag (initially i1), and a chain.
   SDValue getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs,
@@ -778,7 +806,8 @@ public:
                               ArrayRef<SDValue> Ops,
                               EVT MemVT, MachinePointerInfo PtrInfo,
                               unsigned Align = 0, bool Vol = false,
-                              bool ReadMem = true, bool WriteMem = true);
+                              bool ReadMem = true, bool WriteMem = true,
+                              unsigned Size = 0);
 
   SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
                               ArrayRef<SDValue> Ops,
@@ -793,15 +822,15 @@ public:
   SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
                   MachinePointerInfo PtrInfo, bool isVolatile,
                   bool isNonTemporal, bool isInvariant, unsigned Alignment,
-                  const MDNode *TBAAInfo = nullptr,
+                  const AAMDNodes &AAInfo = AAMDNodes(),
                   const MDNode *Ranges = nullptr);
   SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
                   MachineMemOperand *MMO);
   SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
                      SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo,
                      EVT MemVT, bool isVolatile,
-                     bool isNonTemporal, unsigned Alignment,
-                     const MDNode *TBAAInfo = nullptr);
+                     bool isNonTemporal, bool isInvariant, unsigned Alignment,
+                     const AAMDNodes &AAInfo = AAMDNodes());
   SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
                      SDValue Chain, SDValue Ptr, EVT MemVT,
                      MachineMemOperand *MMO);
@@ -812,7 +841,7 @@ public:
                   SDValue Chain, SDValue Ptr, SDValue Offset,
                   MachinePointerInfo PtrInfo, EVT MemVT,
                   bool isVolatile, bool isNonTemporal, bool isInvariant,
-                  unsigned Alignment, const MDNode *TBAAInfo = nullptr,
+                  unsigned Alignment, const AAMDNodes &AAInfo = AAMDNodes(),
                   const MDNode *Ranges = nullptr);
   SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                   EVT VT, SDLoc dl,
@@ -824,19 +853,23 @@ public:
   SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
                    MachinePointerInfo PtrInfo, bool isVolatile,
                    bool isNonTemporal, unsigned Alignment,
-                   const MDNode *TBAAInfo = nullptr);
+                   const AAMDNodes &AAInfo = AAMDNodes());
   SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
                    MachineMemOperand *MMO);
   SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
                         MachinePointerInfo PtrInfo, EVT TVT,
                         bool isNonTemporal, bool isVolatile,
                         unsigned Alignment,
-                        const MDNode *TBAAInfo = nullptr);
+                        const AAMDNodes &AAInfo = AAMDNodes());
   SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
                         EVT TVT, MachineMemOperand *MMO);
   SDValue getIndexedStore(SDValue OrigStoe, SDLoc dl, SDValue Base,
                            SDValue Offset, ISD::MemIndexedMode AM);
 
+  SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
+                        SDValue Mask, SDValue Src0, MachineMemOperand *MMO);
+  SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
+                         SDValue Ptr, SDValue Mask, MachineMemOperand *MMO);
   /// getSrcValue - Construct a node to track a Value* through the backend.
   SDValue getSrcValue(const Value *v);
 
@@ -959,15 +992,18 @@ public:
 
   /// getDbgValue - Creates a SDDbgValue node.
   ///
-  SDDbgValue *getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R,
-			  bool IsIndirect, uint64_t Off,
-                          DebugLoc DL, unsigned O);
-  /// Constant.
-  SDDbgValue *getConstantDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
-				  DebugLoc DL, unsigned O);
-  /// Frame index.
-  SDDbgValue *getFrameIndexDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
-				    DebugLoc DL, unsigned O);
+  /// SDNode
+  SDDbgValue *getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R,
+                          bool IsIndirect, uint64_t Off, DebugLoc DL,
+                          unsigned O);
+
+  /// Constant
+  SDDbgValue *getConstantDbgValue(MDNode *Var, MDNode *Expr, const Value *C,
+                                  uint64_t Off, DebugLoc DL, unsigned O);
+
+  /// FrameIndex
+  SDDbgValue *getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, unsigned FI,
+                                    uint64_t Off, DebugLoc DL, unsigned O);
 
   /// RemoveDeadNode - Remove the specified node from the system. If any of its
   /// operands then becomes dead, remove them as well. Inform UpdateListener
@@ -1039,7 +1075,10 @@ public:
     case ISD::SADDO:
     case ISD::UADDO:
     case ISD::ADDC:
-    case ISD::ADDE: return true;
+    case ISD::ADDE:
+    case ISD::FMINNUM:
+    case ISD::FMAXNUM:
+      return true;
     default: return false;
     }
   }
@@ -1198,6 +1237,7 @@ public:
   unsigned getEVTAlignment(EVT MemoryVT) const;
 
 private:
+  void InsertNode(SDNode *N);
   bool RemoveNodeFromCSEMaps(SDNode *N);
   void AddModifiedNodeToCSEMaps(SDNode *N);
   SDNode *FindModifiedNodeSlot(SDNode *N, SDValue Op, void *&InsertPos);
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 520be402cfc5..d53e66da5a71 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/Pass.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 namespace llvm {
   class FastISel;
@@ -50,15 +51,16 @@ public:
   AliasAnalysis *AA;
   GCFunctionInfo *GFI;
   CodeGenOpt::Level OptLevel;
+  const TargetInstrInfo *TII;
+  const TargetLowering *TLI;
+
   static char ID;
 
   explicit SelectionDAGISel(TargetMachine &tm,
                             CodeGenOpt::Level OL = CodeGenOpt::Default);
   virtual ~SelectionDAGISel();
 
-  const TargetLowering *getTargetLowering() const {
-    return TM.getTargetLowering();
-  }
+  const TargetLowering *getTargetLowering() const { return TLI; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
@@ -238,6 +240,12 @@ public:
                            const unsigned char *MatcherTable,
                            unsigned TableSize);
 
+  /// \brief Return true if complex patterns for this target can mutate the
+  /// DAG.
+  virtual bool ComplexPatternFuncMutatesDAG() const {
+    return false;
+  }
+
 private:
 
   // Calls to these functions are generated by tblgen.
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 223151105b0d..8e7fd547626c 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -19,7 +19,6 @@
 #ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
 #define LLVM_CODEGEN_SELECTIONDAGNODES_H
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/GraphTraits.h"
@@ -27,6 +26,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -117,11 +117,13 @@ namespace ISD {
 /// of information is represented with the SDValue value type.
 ///
 class SDValue {
+  friend struct DenseMapInfo<SDValue>;
+
   SDNode *Node;       // The node defining the value we are using.
   unsigned ResNo;     // Which return value of the node we are using.
 public:
   SDValue() : Node(nullptr), ResNo(0) {}
-  SDValue(SDNode *node, unsigned resno) : Node(node), ResNo(resno) {}
+  SDValue(SDNode *node, unsigned resno);
 
   /// get the index which selects a specific result in the SDNode
   unsigned getResNo() const { return ResNo; }
@@ -208,10 +210,14 @@ public:
 
 template<> struct DenseMapInfo<SDValue> {
   static inline SDValue getEmptyKey() {
-    return SDValue((SDNode*)-1, -1U);
+    SDValue V;
+    V.ResNo = -1U;
+    return V;
   }
   static inline SDValue getTombstoneKey() {
-    return SDValue((SDNode*)-1, 0);
+    SDValue V;
+    V.ResNo = -2U;
+    return V;
   }
   static unsigned getHashValue(const SDValue &Val) {
     return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
@@ -411,6 +417,16 @@ public:
     return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
   }
 
+  /// Test if this node is a memory intrinsic (with valid pointer information).
+  /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
+  /// non-memory intrinsics (with chains) that are not really instances of
+  /// MemSDNode. For such nodes, we need some extra state to determine the
+  /// proper classof relationship.
+  bool isMemIntrinsic() const {
+    return (NodeType == ISD::INTRINSIC_W_CHAIN ||
+            NodeType == ISD::INTRINSIC_VOID) && ((SubclassData >> 13) & 1);
+  }
+
   /// isMachineOpcode - Test if this node has a post-isel opcode, directly
   /// corresponding to a MachineInstr opcode.
   bool isMachineOpcode() const { return NodeType < 0; }
@@ -578,7 +594,7 @@ public:
   /// changes.
   /// NOTE: This is still very expensive. Use carefully.
   bool hasPredecessorHelper(const SDNode *N,
-                            SmallPtrSet<const SDNode *, 32> &Visited,
+                            SmallPtrSetImpl<const SDNode *> &Visited,
                             SmallVectorImpl<const SDNode *> &Worklist) const;
 
   /// getNumOperands - Return the number of values used by this operation.
@@ -746,7 +762,13 @@ protected:
       ValueList(VTs.VTs), UseList(nullptr),
       NumOperands(Ops.size()), NumValues(VTs.NumVTs),
       debugLoc(dl), IROrder(Order) {
+    assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+    assert(NumOperands == Ops.size() &&
+           "NumOperands wasn't wide enough for its operands!");
+    assert(NumValues == VTs.NumVTs &&
+           "NumValues wasn't wide enough for its operands!");
     for (unsigned i = 0; i != Ops.size(); ++i) {
+      assert(OperandList && "no operands available");
       OperandList[i].setUser(this);
       OperandList[i].setInitial(Ops[i]);
     }
@@ -759,7 +781,11 @@ protected:
     : NodeType(Opc), OperandsNeedDelete(false), HasDebugValue(false),
       SubclassData(0), NodeId(-1), OperandList(nullptr), ValueList(VTs.VTs),
       UseList(nullptr), NumOperands(0), NumValues(VTs.NumVTs), debugLoc(dl),
-      IROrder(Order) {}
+      IROrder(Order) {
+    assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+    assert(NumValues == VTs.NumVTs &&
+           "NumValues wasn't wide enough for its operands!");
+  }
 
   /// InitOperands - Initialize the operands list of this with 1 operand.
   void InitOperands(SDUse *Ops, const SDValue &Op0) {
@@ -818,6 +844,8 @@ protected:
       Ops[i].setInitial(Vals[i]);
     }
     NumOperands = N;
+    assert(NumOperands == N &&
+           "NumOperands wasn't wide enough for its operands!");
     OperandList = Ops;
     checkForCycles(this);
   }
@@ -877,6 +905,13 @@ public:
 
 // Define inline functions from the SDValue class.
 
+inline SDValue::SDValue(SDNode *node, unsigned resno)
+    : Node(node), ResNo(resno) {
+  assert((!Node || ResNo < Node->getNumValues()) &&
+         "Invalid result number for the given node!");
+  assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.");
+}
+
 inline unsigned SDValue::getOpcode() const {
   return Node->getOpcode();
 }
@@ -1088,8 +1123,8 @@ public:
   // Returns the offset from the location of the access.
   int64_t getSrcValueOffset() const { return MMO->getOffset(); }
 
-  /// Returns the TBAAInfo that describes the dereference.
-  const MDNode *getTBAAInfo() const { return MMO->getTBAAInfo(); }
+  /// Returns the AA info that describes the dereference.
+  AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
 
   /// Returns the Ranges that describes the dereference.
   const MDNode *getRanges() const { return MMO->getRanges(); }
@@ -1145,6 +1180,9 @@ public:
            N->getOpcode() == ISD::ATOMIC_LOAD_UMAX    ||
            N->getOpcode() == ISD::ATOMIC_LOAD         ||
            N->getOpcode() == ISD::ATOMIC_STORE        ||
+           N->getOpcode() == ISD::MLOAD               ||
+           N->getOpcode() == ISD::MSTORE              ||
+           N->isMemIntrinsic()                        ||
            N->isTargetMemoryOpcode();
   }
 };
@@ -1273,14 +1311,14 @@ public:
                      ArrayRef<SDValue> Ops, EVT MemoryVT,
                      MachineMemOperand *MMO)
     : MemSDNode(Opc, Order, dl, VTs, Ops, MemoryVT, MMO) {
+    SubclassData |= 1u << 13;
   }
 
   // Methods to support isa and dyn_cast
   static bool classof(const SDNode *N) {
     // We lower some target intrinsics to their target opcode
     // early a node with a target opcode can be of this class
-    return N->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
-           N->getOpcode() == ISD::INTRINSIC_VOID ||
+    return N->isMemIntrinsic()             ||
            N->getOpcode() == ISD::PREFETCH ||
            N->isTargetMemoryOpcode();
   }
@@ -1380,6 +1418,12 @@ public:
   /// isNaN - Return true if the value is a NaN.
   bool isNaN() const { return Value->isNaN(); }
 
+  /// isInfinity - Return true if the value is an infinity
+  bool isInfinity() const { return Value->isInfinity(); }
+
+  /// isNegative - Return true if the value is negative.
+  bool isNegative() const { return Value->isNegative(); }
+
   /// isExactlyValue - We don't rely on operator== working on double values, as
   /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
   /// As such, this method can be used to do an exact bit-for-bit comparison of
@@ -1893,6 +1937,72 @@ public:
   }
 };
 
+/// MaskedLoadStoreSDNode - This is a base class is used to represent MLOAD and
+/// MSTORE nodes
+///
+class MaskedLoadStoreSDNode : public MemSDNode {
+  // Operands
+  SDUse Ops[4];
+public:
+  friend class SelectionDAG;
+  MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl,
+                   SDValue *Operands, unsigned numOperands, 
+                   SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+    : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+    InitOperands(Ops, Operands, numOperands);
+  }
+
+  // In the both nodes address is Op1, mask is Op2:
+  // MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value
+  // MaskedStoreSDNode (Chain, ptr, mask, data)
+  // Mask is a vector of i1 elements
+  const SDValue &getBasePtr() const { return getOperand(1); }
+  const SDValue &getMask() const    { return getOperand(2); }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MLOAD ||
+           N->getOpcode() == ISD::MSTORE;
+  }
+};
+
+/// MaskedLoadSDNode - This class is used to represent an MLOAD node
+///
+class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
+public:
+  friend class SelectionDAG;
+  MaskedLoadSDNode(unsigned Order, DebugLoc dl,
+                   SDValue *Operands, unsigned numOperands, 
+                   SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+    : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
+                            VTs, MemVT, MMO) 
+  {}
+
+  const SDValue &getSrc0() const { return getOperand(3); }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MLOAD;
+  }
+};
+
+/// MaskedStoreSDNode - This class is used to represent an MSTORE node
+///
+class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
+
+public:
+  friend class SelectionDAG;
+  MaskedStoreSDNode(unsigned Order, DebugLoc dl,
+                   SDValue *Operands, unsigned numOperands, 
+                   SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+    : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
+                            VTs, MemVT, MMO) 
+  {}
+
+  const SDValue &getData() const { return getOperand(3); }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MSTORE;
+  }
+};
+
 /// MachineSDNode - An SDNode that represents everything that will be needed
 /// to construct a MachineInstr. These nodes are created during the
 /// instruction selection proper phase.
diff --git a/include/llvm/CodeGen/StackMapLivenessAnalysis.h b/include/llvm/CodeGen/StackMapLivenessAnalysis.h
index 6f0754616206..f67a6e95191d 100644
--- a/include/llvm/CodeGen/StackMapLivenessAnalysis.h
+++ b/include/llvm/CodeGen/StackMapLivenessAnalysis.h
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H
-#define LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H
+#ifndef LLVM_CODEGEN_STACKMAPLIVENESSANALYSIS_H
+#define LLVM_CODEGEN_STACKMAPLIVENESSANALYSIS_H
 
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -61,4 +61,4 @@ private:
 
 } // llvm namespace
 
-#endif // LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H
+#endif
diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h
index 5eddbb65259e..4e48afe14004 100644
--- a/include/llvm/CodeGen/StackMaps.h
+++ b/include/llvm/CodeGen/StackMaps.h
@@ -8,8 +8,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_STACKMAPS
-#define LLVM_STACKMAPS
+#ifndef LLVM_CODEGEN_STACKMAPS_H
+#define LLVM_CODEGEN_STACKMAPS_H
 
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
@@ -81,6 +81,52 @@ public:
   unsigned getNextScratchIdx(unsigned StartIdx = 0) const;
 };
 
+/// MI-level Statepoint operands
+///
+/// Statepoint operands take the form:
+///   <num call arguments>, <call target>, [call arguments],
+///   <StackMaps::ConstantOp>, <flags>,
+///   <StackMaps::ConstantOp>, <num other args>, [other args],
+///   [gc values]
+class StatepointOpers {
+private:
+  enum {
+    NCallArgsPos = 0,
+    CallTargetPos = 1
+  };
+
+public:
+  explicit StatepointOpers(const MachineInstr *MI):
+    MI(MI) { }
+
+  /// Get starting index of non call related arguments
+  /// (statepoint flags, vm state and gc state).
+  unsigned getVarIdx() const {
+    return MI->getOperand(NCallArgsPos).getImm() + 2;
+  }
+
+  /// Returns the index of the operand containing the number of non-gc non-call
+  /// arguments. 
+  unsigned getNumVMSArgsIdx() const {
+    return getVarIdx() + 3;
+  }
+
+  /// Returns the number of non-gc non-call arguments attached to the
+  /// statepoint.  Note that this is the number of arguments, not the number of
+  /// operands required to represent those arguments.
+  unsigned getNumVMSArgs() const {
+    return MI->getOperand(getNumVMSArgsIdx()).getImm();
+  }
+
+  /// Returns the target of the underlying call.
+  const MachineOperand &getCallTarget() const {
+    return MI->getOperand(CallTargetPos);
+  }
+
+private:
+  const MachineInstr *MI;
+};
+
 class StackMaps {
 public:
   struct Location {
@@ -118,6 +164,12 @@ public:
 
   StackMaps(AsmPrinter &AP);
 
+  void reset() {
+    CSInfos.clear();
+    ConstPool.clear();
+    FnStackSize.clear();
+  }
+
   /// \brief Generate a stackmap record for a stackmap instruction.
   ///
   /// MI must be a raw STACKMAP, not a PATCHPOINT.
@@ -126,6 +178,9 @@ public:
   /// \brief Generate a stackmap record for a patchpoint instruction.
   void recordPatchPoint(const MachineInstr &MI);
 
+  /// \brief Generate a stackmap record for a statepoint instruction.
+  void recordStatepoint(const MachineInstr &MI);
+
   /// If there is any stack map data, create a stack map section and serialize
   /// the map info into it. This clears the stack map data structures
   /// afterwards.
@@ -133,10 +188,9 @@ public:
 
 private:
   static const char *WSMP;
-
   typedef SmallVector<Location, 8> LocationVec;
   typedef SmallVector<LiveOutReg, 8> LiveOutVec;
-  typedef MapVector<int64_t, int64_t> ConstantPool;
+  typedef MapVector<uint64_t, uint64_t> ConstantPool;
   typedef MapVector<const MCSymbol *, uint64_t> FnStackSizeMap;
 
   struct CallsiteInfo {
@@ -146,9 +200,9 @@ private:
     LiveOutVec LiveOuts;
     CallsiteInfo() : CSOffsetExpr(nullptr), ID(0) {}
     CallsiteInfo(const MCExpr *CSOffsetExpr, uint64_t ID,
-                 LocationVec &Locations, LiveOutVec &LiveOuts)
-      : CSOffsetExpr(CSOffsetExpr), ID(ID), Locations(Locations),
-        LiveOuts(LiveOuts) {}
+                 LocationVec &&Locations, LiveOutVec &&LiveOuts)
+      : CSOffsetExpr(CSOffsetExpr), ID(ID), Locations(std::move(Locations)),
+        LiveOuts(std::move(LiveOuts)) {}
   };
 
   typedef std::vector<CallsiteInfo> CallsiteInfoList;
@@ -196,4 +250,4 @@ private:
 
 }
 
-#endif // LLVM_STACKMAPS
+#endif
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 87f140190a75..9209e1c67c1b 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -89,8 +89,6 @@ public:
                        ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
                        Mangler &Mang, const TargetMachine &TM) const override;
 
-  bool isSectionAtomizableBySymbols(const MCSection &Section) const override;
-
   const MCSection *
     SelectSectionForGlobal(const GlobalValue *GV,
                            SectionKind Kind, Mangler &Mang,
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 690b70fad89b..b6136665b968 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -41,7 +41,7 @@ class TargetSchedModel {
   unsigned MicroOpFactor; // Multiply to normalize microops to resource units.
   unsigned ResourceLCM;   // Resource units per cycle. Latency normalization factor.
 public:
-  TargetSchedModel(): STI(nullptr), TII(nullptr) {}
+  TargetSchedModel(): SchedModel(MCSchedModel::GetDefaultSchedModel()), STI(nullptr), TII(nullptr) {}
 
   /// \brief Initialize the machine model for instruction scheduling.
   ///
@@ -167,6 +167,7 @@ public:
   /// if converter after moving it to TargetSchedModel).
   unsigned computeInstrLatency(const MachineInstr *MI,
                                bool UseDefaultDefLatency = true) const;
+  unsigned computeInstrLatency(unsigned Opcode) const;
 
   /// \brief Output dependency latency of a pair of defs of the same register.
   ///