From c82ad72f63369bc462e59458f09960d66daa58a9 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 4 Jan 2017 22:11:11 +0000
Subject: Vendor import of llvm trunk r291012:
 https://llvm.org/svn/llvm-project/llvm/trunk@291012

---
 lib/Target/AArch64/AArch64.td                      |   6 +-
 lib/Target/AArch64/AArch64AsmPrinter.cpp           |  56 +----
 lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp   |   3 +
 lib/Target/AMDGPU/AMDGPUISelLowering.cpp           |   1 +
 lib/Target/AMDGPU/AMDGPUISelLowering.h             |   1 +
 lib/Target/AMDGPU/AMDGPUInstrInfo.td               |   4 +
 lib/Target/AMDGPU/SIISelLowering.cpp               |   9 +-
 lib/Target/AMDGPU/SIInsertWaits.cpp                |   5 +-
 lib/Target/AMDGPU/SOPInstructions.td               |   5 +-
 lib/Target/ARM/ARMAsmPrinter.cpp                   |   3 -
 lib/Target/ARM/ARMAsmPrinter.h                     |   3 -
 lib/Target/ARM/ARMMCInstLower.cpp                  |  38 ----
 lib/Target/Hexagon/BitTracker.cpp                  |  70 ++-----
 lib/Target/Hexagon/BitTracker.h                    |  53 ++---
 lib/Target/Hexagon/HexagonBitTracker.cpp           |  52 +++--
 lib/Target/Hexagon/HexagonBitTracker.h             |  22 +-
 lib/Target/Hexagon/HexagonInstrInfo.cpp            | 181 +++-------------
 lib/Target/Hexagon/HexagonInstrInfo.h              |  19 +-
 lib/Target/Hexagon/HexagonMachineFunctionInfo.h    |  27 ++-
 lib/Target/Hexagon/HexagonTargetObjectFile.cpp     |  39 ++--
 .../Hexagon/MCTargetDesc/HexagonMCCompound.cpp     |  45 ++--
 lib/Target/Hexagon/RDFCopy.h                       |  19 +-
 lib/Target/Hexagon/RDFGraph.cpp                    |  60 +++---
 lib/Target/Hexagon/RDFGraph.h                      |  99 ++++++---
 lib/Target/Mips/MipsSEISelDAGToDAG.cpp             |   6 +
 lib/Target/Mips/MipsSEISelDAGToDAG.h               |   2 +
 lib/Target/PowerPC/PPCISelLowering.cpp             |  79 ++++---
 lib/Target/X86/X86AsmPrinter.cpp                   |   2 +-
 lib/Target/X86/X86FrameLowering.cpp                |  22 +-
 lib/Target/X86/X86ISelLowering.cpp                 | 232 ++++++++++++++++-----
 lib/Target/X86/X86InstrAVX512.td                   |  43 +---
 lib/Target/X86/X86InstrSSE.td                      |  53 +++--
 lib/Target/X86/X86InstrTablesInfo.h                |  90 ++++----
 lib/Target/X86/X86IntrinsicsInfo.h                 |  26 +--
 lib/Target/X86/X86MCInstLower.cpp                  |  50 -----
 lib/Target/X86/X86TargetTransformInfo.cpp          | 220 +++++++------------
 36 files changed, 749 insertions(+), 896 deletions(-)

(limited to 'lib/Target')

diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index c40391d5ad9d..740766b151bb 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -264,9 +264,13 @@ def ProcFalkor  : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
                                    "Qualcomm Falkor processors", [
                                    FeatureCRC,
                                    FeatureCrypto,
+                                   FeatureCustomCheapAsMoveHandling,
                                    FeatureFPARMv8,
                                    FeatureNEON,
-                                   FeaturePerfMon
+                                   FeaturePerfMon,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredictableSelectIsExpensive,
+                                   FeatureZCZeroing
                                    ]>;
 
 def ProcVulcan  : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index b2d96a32fd3a..efc221893782 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -76,7 +76,6 @@ public:
   void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
   void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
 
-  void EmitXRayTable();
   void EmitSled(const MachineInstr &MI, SledKind Kind);
 
   /// \brief tblgen'erated driver function for lowering simple MI->MC
@@ -95,7 +94,7 @@ public:
     AArch64FI = F.getInfo<AArch64FunctionInfo>();
     STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
     bool Result = AsmPrinter::runOnMachineFunction(F);
-    EmitXRayTable();
+    emitXRayTable();
     return Result;
   }
 
@@ -150,59 +149,6 @@ void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
   EmitSled(MI, SledKind::TAIL_CALL);
 }
 
-void AArch64AsmPrinter::EmitXRayTable()
-{
-  //TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid
-  // code duplication as it is now for x86_64, ARM32 and AArch64.
-  if (Sleds.empty())
-    return;
-
-  auto PrevSection = OutStreamer->getCurrentSectionOnly();
-  auto Fn = MF->getFunction();
-  MCSection *Section;
-
-  if (STI->isTargetELF()) {
-    if (Fn->hasComdat())
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-        ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
-        Fn->getComdat()->getName());
-    else
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-        ELF::SHF_ALLOC);
-  } else if (STI->isTargetMachO()) {
-    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
-                                         SectionKind::getReadOnlyWithRel());
-  } else {
-    llvm_unreachable("Unsupported target");
-  }
-
-  // Before we switch over, we force a reference to a label inside the
-  // xray_instr_map section. Since EmitXRayTable() is always called just
-  // before the function's end, we assume that this is happening after the
-  // last return instruction.
-  //
-  // We then align the reference to 16 byte boundaries, which we determined
-  // experimentally to be beneficial to avoid causing decoder stalls.
-  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
-  OutStreamer->EmitCodeAlignment(16);
-  OutStreamer->EmitSymbolValue(Tmp, 8, false);
-  OutStreamer->SwitchSection(Section);
-  OutStreamer->EmitLabel(Tmp);
-  for (const auto &Sled : Sleds) {
-    OutStreamer->EmitSymbolValue(Sled.Sled, 8);
-    OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
-    auto Kind = static_cast<uint8_t>(Sled.Kind);
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Kind), 1));
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
-    OutStreamer->EmitZeros(14);
-  }
-  OutStreamer->SwitchSection(PrevSection);
-
-  Sleds.clear();
-}
-
 void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
 {
   static const int8_t NoopsInSledCount = 7;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index dcb05601e5f4..8a76c42b5898 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1470,6 +1470,9 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
   bool IsUnscaled = TII->isUnscaledLdSt(MI);
   int Offset = getLdStOffsetOp(MI).getImm();
   int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
+  // Allow one more for offset.
+  if (Offset > 0)
+    Offset -= OffsetStride;
   if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
     return false;
 
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a87204d46eae..0b0a0e7d083e 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(KILL)
   case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
   NODE_NAME_CASE(SENDMSG)
+  NODE_NAME_CASE(SENDMSGHALT)
   NODE_NAME_CASE(INTERP_MOV)
   NODE_NAME_CASE(INTERP_P1)
   NODE_NAME_CASE(INTERP_P2)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 5cc5efb331e3..745c9923de2e 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -313,6 +313,7 @@ enum NodeType : unsigned {
   /// Pointer to the start of the shader's constant data.
   CONST_DATA_PTR,
   SENDMSG,
+  SENDMSGHALT,
   INTERP_MOV,
   INTERP_P1,
   INTERP_P2,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index e7b40016e272..f079c8d0c70c 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
                     SDTypeProfile<0, 1, [SDTCisInt<0>]>,
                     [SDNPHasChain, SDNPInGlue]>;
 
+def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT",
+                    SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                    [SDNPHasChain, SDNPInGlue]>;
+
 def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
                         SDTypeProfile<1, 3, [SDTCisFP<0>]>,
                         [SDNPInGlue]>;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index fa53831cbe16..c78e97dfd46f 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 
   switch (IntrinsicID) {
-  case AMDGPUIntrinsic::SI_sendmsg: {
+  case AMDGPUIntrinsic::SI_sendmsg:
+  case Intrinsic::amdgcn_s_sendmsg: {
     Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
     SDValue Glue = Chain.getValue(1);
     return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
                        Op.getOperand(2), Glue);
   }
+  case Intrinsic::amdgcn_s_sendmsghalt: {
+    Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
+    SDValue Glue = Chain.getValue(1);
+    return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain,
+                       Op.getOperand(2), Glue);
+  }
   case AMDGPUIntrinsic::SI_tbuffer_store: {
     SDValue Ops[] = {
       Chain,
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp
index 202a1e9ed8ac..fceabd7a8fdd 100644
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
     return;
 
   // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
-  if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
+  if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) {
     BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
     LastInstWritesM0 = false;
     return;
@@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
       // signalling other hardware blocks
       if ((I->getOpcode() == AMDGPU::S_BARRIER &&
                ST->needWaitcntBeforeBarrier()) ||
-           I->getOpcode() == AMDGPU::S_SENDMSG)
+           I->getOpcode() == AMDGPU::S_SENDMSG ||
+           I->getOpcode() == AMDGPU::S_SENDMSGHALT)
         Required = LastIssued;
       else
         Required = handleOperands(*I);
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td
index 0aeb1297d3a7..73cd5774128e 100644
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in {
 def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
   [(AMDGPUsendmsg (i32 imm:$simm16))]
 >;
+
+def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
+  [(AMDGPUsendmsghalt (i32 imm:$simm16))]
+>;
 } // End Uses = [EXEC, M0]
 
-def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">;
 def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
 def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
   let simm16 = 0;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index f20768ab77a5..8ec9cb02813c 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -164,9 +164,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Emit the rest of the function body.
   EmitFunctionBody();
 
-  // Emit the XRay table for this function.
-  EmitXRayTable();
-
   // If we need V4T thumb mode Register Indirect Jump pads, emit them.
   // These are created per function, rather than per TU, since it's
   // relatively easy to exceed the thumb branch range within a TU.
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index ce0b04d56d9e..93fed10eb2d0 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -113,9 +113,6 @@ public:
   void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
   void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
   void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
-  // Helper function that emits the XRay sleds we've collected for a particular
-  // function.
-  void EmitXRayTable();
 
 private:
   void EmitSled(const MachineInstr &MI, SledKind Kind);
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 293a527b09e8..07044b9697b6 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -22,9 +22,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCStreamer.h"
 using namespace llvm;
@@ -226,38 +223,3 @@ void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
 {
   EmitSled(MI, SledKind::TAIL_CALL);
 }
-
-void ARMAsmPrinter::EmitXRayTable()
-{
-  if (Sleds.empty())
-    return;
-
-  MCSection *Section = nullptr;
-  if (Subtarget->isTargetELF()) {
-    Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                       ELF::SHF_ALLOC | ELF::SHF_GROUP |
-                                           ELF::SHF_MERGE,
-                                       0, CurrentFnSym->getName());
-  } else if (Subtarget->isTargetMachO()) {
-    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
-                                         SectionKind::getReadOnlyWithRel());
-  } else {
-    llvm_unreachable("Unsupported target");
-  }
-
-  auto PrevSection = OutStreamer->getCurrentSectionOnly();
-  OutStreamer->SwitchSection(Section);
-  for (const auto &Sled : Sleds) {
-    OutStreamer->EmitSymbolValue(Sled.Sled, 4);
-    OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
-    auto Kind = static_cast<uint8_t>(Sled.Kind);
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Kind), 1));
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
-    OutStreamer->EmitZeros(6);
-  }
-  OutStreamer->SwitchSection(PrevSection);
-
-  Sleds.clear();
-}
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index c0591c332dea..963fb99ce09b 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -53,28 +53,36 @@
 //
 // The code below is intended to be fully target-independent.
 
+#include "BitTracker.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
-#include "BitTracker.h"
+#include <iterator>
+#include <cassert>
+#include <cstdint>
 
 using namespace llvm;
 
 typedef BitTracker BT;
 
 namespace {
+
   // Local trickery to pretty print a register (without the whole "%vreg"
   // business).
   struct printv {
     printv(unsigned r) : R(r) {}
+
     unsigned R;
   };
+
   raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
     if (PV.R)
       OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
@@ -82,9 +90,11 @@ namespace {
       OS << 's';
     return OS;
   }
-}
+
+} // end anonymous namespace
 
 namespace llvm {
+
   raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) {
     switch (BV.Type) {
       case BT::BitValue::Top:
@@ -167,14 +177,14 @@ namespace llvm {
 
     return OS;
   }
-}
+
+} // end namespace llvm
 
 void BitTracker::print_cells(raw_ostream &OS) const {
   for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
     dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
 }
 
-
 BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
     : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}
 
@@ -182,7 +192,6 @@ BitTracker::~BitTracker() {
   delete &Map;
 }
 
-
 // If we were allowed to update a cell for a part of a register, the meet
 // operation would need to be parametrized by the register number and the
 // exact part of the register, so that the computer BitRefs correspond to
@@ -201,7 +210,6 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
   return Changed;
 }
 
-
 // Insert the entire cell RC into the current cell at position given by M.
 BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
       const BitMask &M) {
@@ -224,7 +232,6 @@ BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
   return *this;
 }
 
-
 BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
   uint16_t B = M.first(), E = M.last(), W = width();
   assert(B < W && E < W);
@@ -243,7 +250,6 @@ BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
   return RC;
 }
 
-
 BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
   // Rotate left (i.e. towards increasing bit indices).
   // Swap the two parts:  [0..W-Sh-1] [W-Sh..W-1]
@@ -265,7 +271,6 @@ BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
   return *this;
 }
 
-
 BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
       const BitValue &V) {
   assert(B <= E);
@@ -274,7 +279,6 @@ BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
   return *this;
 }
 
-
 BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
   // Append the cell given as the argument to the "this" cell.
   // Bit 0 of RC becomes bit W of the result, where W is this->width().
@@ -285,7 +289,6 @@ BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
   return *this;
 }
 
-
 uint16_t BT::RegisterCell::ct(bool B) const {
   uint16_t W = width();
   uint16_t C = 0;
@@ -295,7 +298,6 @@ uint16_t BT::RegisterCell::ct(bool B) const {
   return C;
 }
 
-
 uint16_t BT::RegisterCell::cl(bool B) const {
   uint16_t W = width();
   uint16_t C = 0;
@@ -305,7 +307,6 @@ uint16_t BT::RegisterCell::cl(bool B) const {
   return C;
 }
 
-
 bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
   uint16_t W = Bits.size();
   if (RC.Bits.size() != W)
@@ -316,7 +317,6 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
   return true;
 }
 
-
 uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
   // The general problem is with finding a register class that corresponds
   // to a given reference reg:sub. There can be several such classes, and
@@ -342,7 +342,6 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
   return BW;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
       const CellMapType &M) const {
   uint16_t BW = getRegBitWidth(RR);
@@ -370,7 +369,6 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
   return RegisterCell::top(BW);
 }
 
-
 void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
       CellMapType &M) const {
   // While updating the cell map can be done in a meaningful way for
@@ -388,7 +386,6 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
   M[RR.Reg] = RC;
 }
 
-
 // Check if the cell represents a compile-time integer value.
 bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
   uint16_t W = A.width();
@@ -398,7 +395,6 @@ bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
   return true;
 }
 
-
 // Convert a cell to the integer value. The result must fit in uint64_t.
 uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
   assert(isInt(A));
@@ -411,7 +407,6 @@ uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
   return Val;
 }
 
-
 // Evaluator helper functions. These implement some common operation on
 // register cells that can be used to implement target-specific instructions
 // in a target-specific evaluator.
@@ -426,7 +421,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const {
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
   const APInt &A = CI->getValue();
   uint16_t BW = A.getBitWidth();
@@ -437,7 +431,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width();
@@ -471,7 +464,6 @@ BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width();
@@ -505,29 +497,26 @@ BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width() + A2.width();
-  uint16_t Z = A1.ct(0) + A2.ct(0);
+  uint16_t Z = A1.ct(false) + A2.ct(false);
   RegisterCell Res(W);
   Res.fill(0, Z, BitValue::Zero);
   Res.fill(Z, W, BitValue::self());
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width() + A2.width();
-  uint16_t Z = A1.ct(0) + A2.ct(0);
+  uint16_t Z = A1.ct(false) + A2.ct(false);
   RegisterCell Res(W);
   Res.fill(0, Z, BitValue::Zero);
   Res.fill(Z, W, BitValue::self());
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
       uint16_t Sh) const {
   assert(Sh <= A1.width());
@@ -537,7 +526,6 @@ BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
       uint16_t Sh) const {
   uint16_t W = A1.width();
@@ -548,7 +536,6 @@ BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
       uint16_t Sh) const {
   uint16_t W = A1.width();
@@ -560,7 +547,6 @@ BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width();
@@ -583,7 +569,6 @@ BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width();
@@ -606,7 +591,6 @@ BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width();
@@ -627,7 +611,6 @@ BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
   uint16_t W = A1.width();
   RegisterCell Res(W);
@@ -643,7 +626,6 @@ BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
       uint16_t BitN) const {
   assert(BitN < A1.width());
@@ -652,7 +634,6 @@ BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
       uint16_t BitN) const {
   assert(BitN < A1.width());
@@ -661,7 +642,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
       uint16_t W) const {
   uint16_t C = A1.cl(B), AW = A1.width();
@@ -672,7 +652,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
   return RegisterCell::self(0, W);
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
       uint16_t W) const {
   uint16_t C = A1.ct(B), AW = A1.width();
@@ -683,7 +662,6 @@ BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
   return RegisterCell::self(0, W);
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
       uint16_t FromN) const {
   uint16_t W = A1.width();
@@ -695,7 +673,6 @@ BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
       uint16_t FromN) const {
   uint16_t W = A1.width();
@@ -705,7 +682,6 @@ BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
       uint16_t B, uint16_t E) const {
   uint16_t W = A1.width();
@@ -718,7 +694,6 @@ BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
       const RegisterCell &A2, uint16_t AtN) const {
   uint16_t W1 = A1.width(), W2 = A2.width();
@@ -731,7 +706,6 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
   assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
   uint16_t W = getRegBitWidth(Reg);
@@ -785,7 +759,6 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI,
   return true;
 }
 
-
 // Main W-Z implementation.
 
 void BT::visitPHI(const MachineInstr &PI) {
@@ -977,7 +950,6 @@ void BT::visitBranchesFrom(const MachineInstr &BI) {
   }
 }
 
-
 void BT::visitUsesOf(unsigned Reg) {
   if (Trace)
     dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n";
@@ -997,17 +969,14 @@ void BT::visitUsesOf(unsigned Reg) {
   }
 }
 
-
 BT::RegisterCell BT::get(RegisterRef RR) const {
   return ME.getCell(RR, Map);
 }
 
-
 void BT::put(RegisterRef RR, const RegisterCell &RC) {
   ME.putCell(RR, RC, Map);
 }
 
-
 // Replace all references to bits from OldRR with the corresponding bits
 // in NewRR.
 void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
@@ -1033,7 +1002,6 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
   }
 }
 
-
 // Check if the block has been "executed" during propagation. (If not, the
 // block is dead, but it may still appear to be reachable.)
 bool BT::reached(const MachineBasicBlock *B) const {
@@ -1047,7 +1015,6 @@ bool BT::reached(const MachineBasicBlock *B) const {
   return false;
 }
 
-
 // Visit an individual instruction. This could be a newly added instruction,
 // or one that has been modified by an optimization.
 void BT::visit(const MachineInstr &MI) {
@@ -1061,14 +1028,12 @@ void BT::visit(const MachineInstr &MI) {
     FlowQ.pop();
 }
 
-
 void BT::reset() {
   EdgeExec.clear();
   InstrExec.clear();
   Map.clear();
 }
 
-
 void BT::run() {
   reset();
   assert(FlowQ.empty());
@@ -1141,4 +1106,3 @@ void BT::run() {
   if (Trace)
     print_cells(dbgs() << "Cells after propagation:\n");
 }
-
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
index 74cafcd00b60..48c5f2266acf 100644
--- a/lib/Target/Hexagon/BitTracker.h
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -1,4 +1,4 @@
-//===--- BitTracker.h -----------------------------------------------------===//
+//===--- BitTracker.h -------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,24 +7,27 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef BITTRACKER_H
-#define BITTRACKER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
+#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
 
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
-
+#include "llvm/CodeGen/MachineOperand.h"
+#include <cassert>
+#include <cstdint>
 #include <map>
 #include <queue>
 #include <set>
+#include <utility>
 
 namespace llvm {
-  class ConstantInt;
-  class MachineRegisterInfo;
-  class MachineBasicBlock;
-  class MachineInstr;
-  class MachineOperand;
-  class raw_ostream;
+
+class ConstantInt;
+class MachineRegisterInfo;
+class MachineBasicBlock;
+class MachineInstr;
+class raw_ostream;
 
 struct BitTracker {
   struct BitRef;
@@ -76,19 +79,19 @@ private:
   CellMapType &Map;
 };
 
-
 // Abstraction of a reference to bit at position Pos from a register Reg.
 struct BitTracker::BitRef {
   BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
+
   bool operator== (const BitRef &BR) const {
     // If Reg is 0, disregard Pos.
     return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
   }
+
   unsigned Reg;
   uint16_t Pos;
 };
 
-
 // Abstraction of a register reference in MachineOperand.  It contains the
 // register number and the subregister index.
 struct BitTracker::RegisterRef {
@@ -96,10 +99,10 @@ struct BitTracker::RegisterRef {
     : Reg(R), Sub(S) {}
   RegisterRef(const MachineOperand &MO)
       : Reg(MO.getReg()), Sub(MO.getSubReg()) {}
+
   unsigned Reg, Sub;
 };
 
-
 // Value that a single bit can take.  This is outside of the context of
 // any register, it is more of an abstraction of the two-element set of
 // possible bit values.  One extension here is the "Ref" type, which
@@ -158,6 +161,7 @@ struct BitTracker::BitValue {
   bool operator!= (const BitValue &V) const {
     return !operator==(V);
   }
+
   bool is(unsigned T) const {
     assert(T == 0 || T == 1);
     return T == 0 ? Type == Zero
@@ -209,6 +213,7 @@ struct BitTracker::BitValue {
   bool num() const {
     return Type == Zero || Type == One;
   }
+
   operator bool() const {
     assert(Type == Zero || Type == One);
     return Type == One;
@@ -217,7 +222,6 @@ struct BitTracker::BitValue {
   friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV);
 };
 
-
 // This operation must be idempotent, i.e. ref(ref(V)) == ref(V).
 inline BitTracker::BitValue
 BitTracker::BitValue::ref(const BitValue &V) {
@@ -228,26 +232,26 @@ BitTracker::BitValue::ref(const BitValue &V) {
   return self();
 }
 
-
 inline BitTracker::BitValue
 BitTracker::BitValue::self(const BitRef &Self) {
   return BitValue(Self.Reg, Self.Pos);
 }
 
-
 // A sequence of bits starting from index B up to and including index E.
 // If E < B, the mask represents two sections: [0..E] and [B..W) where
 // W is the width of the register.
 struct BitTracker::BitMask {
-  BitMask() : B(0), E(0) {}
+  BitMask() = default;
   BitMask(uint16_t b, uint16_t e) : B(b), E(e) {}
+
   uint16_t first() const { return B; }
   uint16_t last() const { return E; }
+
 private:
-  uint16_t B, E;
+  uint16_t B = 0;
+  uint16_t E = 0;
 };
 
-
 // Representation of a register: a list of BitValues.
 struct BitTracker::RegisterCell {
   RegisterCell(uint16_t Width = DefaultBitN) : Bits(Width) {}
@@ -255,6 +259,7 @@ struct BitTracker::RegisterCell {
   uint16_t width() const {
     return Bits.size();
   }
+
   const BitValue &operator[](uint16_t BitN) const {
     assert(BitN < Bits.size());
     return Bits[BitN];
@@ -297,12 +302,10 @@ private:
   friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC);
 };
 
-
 inline bool BitTracker::has(unsigned Reg) const {
   return Map.find(Reg) != Map.end();
 }
 
-
 inline const BitTracker::RegisterCell&
 BitTracker::lookup(unsigned Reg) const {
   CellMapType::const_iterator F = Map.find(Reg);
@@ -310,7 +313,6 @@ BitTracker::lookup(unsigned Reg) const {
   return F->second;
 }
 
-
 inline BitTracker::RegisterCell
 BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
   RegisterCell RC(Width);
@@ -319,7 +321,6 @@ BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
   return RC;
 }
 
-
 inline BitTracker::RegisterCell
 BitTracker::RegisterCell::top(uint16_t Width) {
   RegisterCell RC(Width);
@@ -328,7 +329,6 @@ BitTracker::RegisterCell::top(uint16_t Width) {
   return RC;
 }
 
-
 inline BitTracker::RegisterCell
 BitTracker::RegisterCell::ref(const RegisterCell &C) {
   uint16_t W = C.width();
@@ -345,12 +345,13 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) {
 struct BitTracker::MachineEvaluator {
   MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M)
       : TRI(T), MRI(M) {}
-  virtual ~MachineEvaluator() {}
+  virtual ~MachineEvaluator() = default;
 
   uint16_t getRegBitWidth(const RegisterRef &RR) const;
 
   RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const;
   void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const;
+
   // A result of any operation should use refs to the source cells, not
   // the cells directly. This function is a convenience wrapper to quickly
   // generate a ref for a cell corresponding to a register reference.
@@ -435,4 +436,4 @@ struct BitTracker::MachineEvaluator {
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index b78c4126e0b1..436f88dcd450 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -7,16 +7,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
 #include "Hexagon.h"
+#include "HexagonBitTracker.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonTargetMachine.h"
-#include "HexagonBitTracker.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -76,11 +90,11 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
   }
 }
 
-
 BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
+  using namespace Hexagon;
+
   if (Sub == 0)
     return MachineEvaluator::mask(Reg, 0);
-  using namespace Hexagon;
   const TargetRegisterClass *RC = MRI.getRegClass(Reg);
   unsigned ID = RC->getID();
   uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
@@ -102,6 +116,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
 }
 
 namespace {
+
 class RegisterRefs {
   std::vector<BT::RegisterRef> Vector;
 
@@ -117,17 +132,21 @@ public:
   }
 
   size_t size() const { return Vector.size(); }
+
   const BT::RegisterRef &operator[](unsigned n) const {
     // The main purpose of this operator is to assert with bad argument.
     assert(n < Vector.size());
     return Vector[n];
   }
 };
-}
+
+} // end anonymous namespace
 
 bool HexagonEvaluator::evaluate(const MachineInstr &MI,
                                 const CellMapType &Inputs,
                                 CellMapType &Outputs) const {
+  using namespace Hexagon;
+
   unsigned NumDefs = 0;
 
   // Sanity verification: there should not be any defs with subregisters.
@@ -142,7 +161,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
   if (NumDefs == 0)
     return false;
 
-  using namespace Hexagon;
   unsigned Opc = MI.getOpcode();
 
   if (MI.mayLoad()) {
@@ -779,10 +797,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
     case S2_cl0:
     case S2_cl0p:
       // Always produce a 32-bit result.
-      return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs);
+      return rr0(eCLB(rc(1), false/*bit*/, 32), Outputs);
     case S2_cl1:
     case S2_cl1p:
-      return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs);
+      return rr0(eCLB(rc(1), true/*bit*/, 32), Outputs);
     case S2_clb:
     case S2_clbp: {
       uint16_t W1 = getRegBitWidth(Reg[1]);
@@ -794,10 +812,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
     }
     case S2_ct0:
     case S2_ct0p:
-      return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs);
+      return rr0(eCTB(rc(1), false/*bit*/, 32), Outputs);
     case S2_ct1:
     case S2_ct1p:
-      return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs);
+      return rr0(eCTB(rc(1), true/*bit*/, 32), Outputs);
     case S5_popcountp:
       // TODO
       break;
@@ -953,6 +971,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
 bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
                                     const CellMapType &Inputs,
                                     CellMapType &Outputs) const {
+  using namespace Hexagon;
+
   if (TII.isPredicated(MI))
     return false;
   assert(MI.mayLoad() && "A load that mayn't?");
@@ -960,7 +980,6 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
 
   uint16_t BitNum;
   bool SignEx;
-  using namespace Hexagon;
 
   switch (Opc) {
     default:
@@ -1141,9 +1160,9 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI,
   return true;
 }
 
-
 unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
   using namespace Hexagon;
+
   bool Is64 = DoubleRegsRegClass.contains(PReg);
   assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));
 
@@ -1180,7 +1199,6 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
   return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
 }
 
-
 unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const {
   typedef MachineRegisterInfo::livein_iterator iterator;
   for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h
index 9e7b1dbe298f..2cbf65e66ca6 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.h
+++ b/lib/Target/Hexagon/HexagonBitTracker.h
@@ -1,4 +1,4 @@
-//===--- HexagonBitTracker.h ----------------------------------------------===//
+//===--- HexagonBitTracker.h ------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,15 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef HEXAGONBITTRACKER_H
-#define HEXAGONBITTRACKER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
 
 #include "BitTracker.h"
 #include "llvm/ADT/DenseMap.h"
+#include <cstdint>
 
 namespace llvm {
-  class HexagonInstrInfo;
-  class HexagonRegisterInfo;
+
+class HexagonInstrInfo;
+class HexagonRegisterInfo;
 
 struct HexagonEvaluator : public BitTracker::MachineEvaluator {
   typedef BitTracker::CellMapType CellMapType;
@@ -49,10 +51,12 @@ private:
   // Type of formal parameter extension.
   struct ExtType {
     enum { SExt, ZExt };
-    char Type;
-    uint16_t Width;
-    ExtType() : Type(0), Width(0) {}
+
+    ExtType() = default;
     ExtType(char t, uint16_t w) : Type(t), Width(w) {}
+
+    char Type = 0;
+    uint16_t Width = 0;
   };
   // Map VR -> extension type.
   typedef DenseMap<unsigned, ExtType> RegExtMap;
@@ -61,4 +65,4 @@ private:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 34ce3e652995..0a7dc6b49d00 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -11,26 +11,45 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Hexagon.h"
 #include "HexagonHazardRecognizer.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
 #include <cctype>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
 
 using namespace llvm;
 
@@ -108,19 +127,16 @@ HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
     : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
       RI() {}
 
-
 static bool isIntRegForSubInst(unsigned Reg) {
   return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
          (Reg >= Hexagon::R16 && Reg <= Hexagon::R23);
 }
 
-
 static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) {
   return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_lo)) &&
          isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_hi));
 }
 
-
 /// Calculate number of instructions excluding the debug instructions.
 static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
                               MachineBasicBlock::const_instr_iterator MIE) {
@@ -132,7 +148,6 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
   return Count;
 }
 
-
 /// Find the hardware loop instruction used to set-up the specified loop.
 /// On Hexagon, we have two instructions used to set-up the hardware loop
 /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
@@ -164,17 +179,16 @@ static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
         return &*I;
       // We've reached a different loop, which means the loop0 has been removed.
       if (Opc == EndLoopOp)
-        return 0;
+        return nullptr;
     }
     // Check the predecessors for the LOOP instruction.
     MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
     if (loop)
       return loop;
   }
-  return 0;
+  return nullptr;
 }
 
-
 /// Gather register def/uses from MI.
 /// This treats possible (predicated) defs as actually happening ones
 /// (conservatively).
@@ -201,7 +215,6 @@ static inline void parseOperands(const MachineInstr &MI,
   }
 }
 
-
 // Position dependent, so check twice for swap.
 static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
   switch (Ga) {
@@ -228,8 +241,6 @@ static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
   return false;
 }
 
-
-
 /// isLoadFromStackSlot - If the specified machine instruction is a direct
 /// load from a stack slot, return the virtual or physical register number of
 /// the destination along with the FrameIndex of the loaded stack slot.  If
@@ -280,7 +291,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
   return 0;
 }
 
-
 /// isStoreToStackSlot - If the specified machine instruction is a direct
 /// store to a stack slot, return the virtual or physical register number of
 /// the source reg along with the FrameIndex of the loaded stack slot.  If
@@ -337,7 +347,6 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
   return 0;
 }
 
-
 /// This function can analyze one/two way branching only and should (mostly) be
 /// called by target independent side.
 /// First entry is always the opcode of the branching instruction, except when
@@ -401,7 +410,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   // Delete the J2_jump if it's equivalent to a fall-through.
   if (AllowModify && JumpToBlock &&
       MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
-    DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
+    DEBUG(dbgs() << "\nErasing the jump to successor block\n";);
     I->eraseFromParent();
     I = MBB.instr_end();
     if (I == MBB.instr_begin())
@@ -415,7 +424,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   MachineInstr *LastInst = &*I;
   MachineInstr *SecondLastInst = nullptr;
   // Find one more terminator if present.
-  for (;;) {
+  while (true) {
     if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(*I)) {
       if (!SecondLastInst)
         SecondLastInst = &*I;
@@ -524,7 +533,6 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-
 unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB,
                                         int *BytesRemoved) const {
   assert(!BytesRemoved && "code size not handled");
@@ -730,7 +738,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
   return nonDbgBBSize(&MBB) <= 3;
 }
 
-
 bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
       unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB,
       unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability)
@@ -738,7 +745,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
   return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3;
 }
 
-
 bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
       unsigned NumInstrs, BranchProbability Probability) const {
   return NumInstrs <= 4;
@@ -853,7 +859,6 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   llvm_unreachable("Unimplemented");
 }
 
-
 void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI,
       const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
@@ -976,7 +981,6 @@ void HexagonInstrInfo::loadRegFromStackSlot(
   }
 }
 
-
 static void getLiveRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) {
   const MachineBasicBlock &B = *MI.getParent();
   Regs.addLiveOuts(B);
@@ -1307,7 +1311,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   return false;
 }
 
-
 // We indicate that we want to reverse the branch by
 // inserting the reversed branching opcode.
 bool HexagonInstrInfo::reverseBranchCondition(
@@ -1325,19 +1328,16 @@ bool HexagonInstrInfo::reverseBranchCondition(
   return false;
 }
 
-
 void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB,
       MachineBasicBlock::iterator MI) const {
   DebugLoc DL;
   BuildMI(MBB, MI, DL, get(Hexagon::A2_nop));
 }
 
-
 bool HexagonInstrInfo::isPostIncrement(const MachineInstr &MI) const {
   return getAddrMode(MI) == HexagonII::PostInc;
 }
 
-
 // Returns true if an instruction is predicated irrespective of the predicate
 // sense. For example, all of the following will return true.
 // if (p0) R1 = add(R2, R3)
@@ -1351,7 +1351,6 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr &MI) const {
   return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
 }
 
-
 bool HexagonInstrInfo::PredicateInstruction(
     MachineInstr &MI, ArrayRef<MachineOperand> Cond) const {
   if (Cond.empty() || isNewValueJump(Cond[0].getImm()) ||
@@ -1403,14 +1402,12 @@ bool HexagonInstrInfo::PredicateInstruction(
   return true;
 }
 
-
 bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
       ArrayRef<MachineOperand> Pred2) const {
   // TODO: Fix this
   return false;
 }
 
-
 bool HexagonInstrInfo::DefinesPredicate(
     MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
   auto &HRI = getRegisterInfo();
@@ -1427,7 +1424,6 @@ bool HexagonInstrInfo::DefinesPredicate(
   return false;
 }
 
-
 bool HexagonInstrInfo::isPredicable(MachineInstr &MI) const {
   return MI.getDesc().isPredicable();
 }
@@ -1466,7 +1462,6 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
   return false;
 }
 
-
 /// Measure the specified inline asm to determine an approximation of its
 /// length.
 /// Comments (which run till the next SeparatorString or newline) do not
@@ -1502,7 +1497,6 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
   return Length;
 }
 
-
 ScheduleHazardRecognizer*
 HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
       const InstrItineraryData *II, const ScheduleDAG *DAG) const {
@@ -1513,7 +1507,6 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
   return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
 }
 
-
 /// \brief For a comparison instruction, return the source registers in
 /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
 /// compares against in CmpValue. Return true if the comparison instruction
@@ -1609,14 +1602,12 @@ unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   return getInstrTimingClassLatency(ItinData, MI);
 }
 
-
 DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
     const TargetSubtargetInfo &STI) const {
   const InstrItineraryData *II = STI.getInstrItineraryData();
   return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II);
 }
 
-
 // Inspired by this pair:
 //  %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0]
 //  S2_storeri_io %R29, 132, %R1<kill>; flags:  mem:ST4[FixedStack1]
@@ -1661,7 +1652,6 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
   return false;
 }
 
-
 /// If the instruction is an increment of a constant value, return the amount.
 bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI,
       int &Value) const {
@@ -1677,7 +1667,6 @@ bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI,
   return false;
 }
 
-
 unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const {
   MachineRegisterInfo &MRI = MF->getRegInfo();
   const TargetRegisterClass *TRC;
@@ -1695,18 +1684,15 @@ unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const {
   return NewReg;
 }
 
-
 bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr &MI) const {
   return (getAddrMode(MI) == HexagonII::AbsoluteSet);
 }
 
-
 bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
 }
 
-
 bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const {
   const MachineFunction *MF = MI.getParent()->getParent();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -1727,13 +1713,11 @@ bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const {
   return false;
 }
 
-
 // Return true if the instruction is a compund branch instruction.
 bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const {
   return (getType(MI) == HexagonII::TypeCOMPOUND && MI.isBranch());
 }
 
-
 bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const {
   return (MI.isBranch() && isPredicated(MI)) ||
          isConditionalTransfer(MI) ||
@@ -1744,7 +1728,6 @@ bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const {
           !isPredicatedNew(MI));
 }
 
-
 bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
     case Hexagon::A2_paddf:
@@ -1802,7 +1785,6 @@ bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const {
   return false;
 }
 
-
 // FIXME - Function name and it's functionality don't match.
 // It should be renamed to hasPredNewOpcode()
 bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const {
@@ -1814,7 +1796,6 @@ bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const {
   return PNewOpcode >= 0;
 }
 
-
 // Returns true if an instruction is a conditional store.
 //
 // Note: It doesn't include conditional new-value stores as they can't be
@@ -1872,7 +1853,6 @@ bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
     case Hexagon::A2_tfrt:
@@ -1893,7 +1873,6 @@ bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const {
   return false;
 }
 
-
 // TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle
 // isFPImm and later getFPImm as well.
 bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
@@ -1942,7 +1921,6 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
   return (ImmValue < MinValue || ImmValue > MaxValue);
 }
 
-
 bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case Hexagon::L4_return :
@@ -1957,7 +1935,6 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const {
   return false;
 }
 
-
 // Return true when ConsMI uses a register defined by ProdMI.
 bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI,
       const MachineInstr &ConsMI) const {
@@ -1994,7 +1971,6 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI,
   return false;
 }
 
-
 // Returns true if the instruction is alread a .cur.
 bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
@@ -2007,7 +1983,6 @@ bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const {
   return false;
 }
 
-
 // Returns true, if any one of the operands is a dot new
 // insn, whether it is predicated dot new or register dot new.
 bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const {
@@ -2017,7 +1992,6 @@ bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const {
   return false;
 }
 
-
 /// Symmetrical. See if these two instructions are fit for duplex pair.
 bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa,
       const MachineInstr &MIb) const {
@@ -2026,7 +2000,6 @@ bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa,
   return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
 }
 
-
 bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const {
   if (MI.mayLoad() || MI.mayStore() || MI.isCompare())
     return true;
@@ -2038,13 +2011,11 @@ bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const {
   return false;
 }
 
-
 bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const {
   return (Opcode == Hexagon::ENDLOOP0 ||
           Opcode == Hexagon::ENDLOOP1);
 }
 
-
 bool HexagonInstrInfo::isExpr(unsigned OpType) const {
   switch(OpType) {
   case MachineOperand::MO_MachineBasicBlock:
@@ -2059,7 +2030,6 @@ bool HexagonInstrInfo::isExpr(unsigned OpType) const {
   }
 }
 
-
 bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const {
   const MCInstrDesc &MID = MI.getDesc();
   const uint64_t F = MID.TSFlags;
@@ -2079,7 +2049,6 @@ bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const {
   return  false;
 }
 
-
 // This returns true in two cases:
 // - The OP code itself indicates that this is an extended instruction.
 // - One of MOs has been marked with HMOTF_ConstExtended flag.
@@ -2098,14 +2067,12 @@ bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const {
   return  false;
 }
 
-
 bool HexagonInstrInfo::isFloat(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::FPPos) & HexagonII::FPMask;
 }
 
-
 // No V60 HVX VMEM with A_INDIRECT.
 bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I,
       const MachineInstr &J) const {
@@ -2116,7 +2083,6 @@ bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I,
   return J.isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J);
 }
 
-
 bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case Hexagon::J2_callr :
@@ -2128,7 +2094,6 @@ bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const {
   return false;
 }
 
-
 bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case Hexagon::L4_return :
@@ -2143,7 +2108,6 @@ bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const {
   return false;
 }
 
-
 bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case Hexagon::J2_jumpr :
@@ -2158,7 +2122,6 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const {
   return false;
 }
 
-
 // Return true if a given MI can accommodate given offset.
 // Use abs estimate as oppose to the exact number.
 // TODO: This will need to be changed to use MC level
@@ -2203,7 +2166,6 @@ bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI,
   }
 }
 
-
 bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
       const MachineInstr &ESMI) const {
   bool isLate = isLateResultInstr(LRMI);
@@ -2222,7 +2184,6 @@ bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
   return false;
 }
 
-
 bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case TargetOpcode::EXTRACT_SUBREG:
@@ -2259,14 +2220,12 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const {
   return true;
 }
 
-
 bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr &MI) const {
   // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply
   // resource, but all operands can be received late like an ALU instruction.
   return MI.getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE;
 }
 
-
 bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
   return Opcode == Hexagon::J2_loop0i    ||
@@ -2279,7 +2238,6 @@ bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const {
          Opcode == Hexagon::J2_loop1rext;
 }
 
-
 bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
     default: return false;
@@ -2312,46 +2270,38 @@ bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const {
   return false;
 }
 
-
 bool HexagonInstrInfo::isNewValue(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
 }
 
-
 bool HexagonInstrInfo::isNewValue(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
 }
 
-
 bool HexagonInstrInfo::isNewValueInst(const MachineInstr &MI) const {
   return isNewValueJump(MI) || isNewValueStore(MI);
 }
 
-
 bool HexagonInstrInfo::isNewValueJump(const MachineInstr &MI) const {
   return isNewValue(MI) && MI.isBranch();
 }
 
-
 bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const {
   return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
 }
 
-
 bool HexagonInstrInfo::isNewValueStore(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
 }
 
-
 bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
 }
 
-
 // Returns true if a particular operand is extendable for an instruction.
 bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI,
     unsigned OperandNum) const {
@@ -2360,28 +2310,24 @@ bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI,
           == OperandNum;
 }
 
-
 bool HexagonInstrInfo::isPredicatedNew(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   assert(isPredicated(MI));
   return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
 }
 
-
 bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   assert(isPredicated(Opcode));
   return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
 }
 
-
 bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return !((F >> HexagonII::PredicatedFalsePos) &
            HexagonII::PredicatedFalseMask);
 }
 
-
 bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   // Make sure that the instruction is predicated.
@@ -2390,19 +2336,16 @@ bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
            HexagonII::PredicatedFalseMask);
 }
 
-
 bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
 }
 
-
 bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
 }
 
-
 bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   assert(get(Opcode).isBranch() &&
@@ -2410,7 +2353,6 @@ bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
   return (F >> HexagonII::TakenPos) & HexagonII::TakenMask;
 }
 
-
 bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr &MI) const {
   return MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 ||
          MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT ||
@@ -2496,13 +2438,11 @@ bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isSolo(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::SoloPos) & HexagonII::SoloMask;
 }
 
-
 bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case Hexagon::STriw_pred :
@@ -2513,7 +2453,6 @@ bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const {
   if (!MI.isBranch())
     return false;
@@ -2524,7 +2463,6 @@ bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const {
   return false;
 }
 
-
 // Returns true when SU has a timing class TC1.
 bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const {
   unsigned SchedClass = MI.getDesc().getSchedClass();
@@ -2544,7 +2482,6 @@ bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const {
   unsigned SchedClass = MI.getDesc().getSchedClass();
   switch (SchedClass) {
@@ -2561,7 +2498,6 @@ bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const {
   unsigned SchedClass = MI.getDesc().getSchedClass();
   switch (SchedClass) {
@@ -2582,13 +2518,11 @@ bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isTC4x(const MachineInstr &MI) const {
   unsigned SchedClass = MI.getDesc().getSchedClass();
   return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23;
 }
 
-
 // Schedule this ASAP.
 bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1,
       const MachineInstr &MI2) const {
@@ -2608,13 +2542,11 @@ bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1,
   return false;
 }
 
-
 bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr &MI) const {
   const uint64_t V = getType(MI);
   return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST;
 }
 
-
 // Check if the Offset is a valid auto-inc imm by Load/Store Type.
 //
 bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
@@ -2653,7 +2585,6 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
   llvm_unreachable("Not an auto-inc opc!");
 }
 
-
 bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
       bool Extend) const {
   // This function is to check whether the "Offset" is in the correct range of
@@ -2808,12 +2739,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
                    "Please define it in the above switch statement!");
 }
 
-
 bool HexagonInstrInfo::isVecAcc(const MachineInstr &MI) const {
   return isV60VectorInstruction(MI) && isAccumulator(MI);
 }
 
-
 bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const {
   const uint64_t F = get(MI.getOpcode()).TSFlags;
   const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
@@ -2822,7 +2751,6 @@ bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const {
     V == HexagonII::TypeCVI_VA_DV;
 }
 
-
 bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr &ProdMI,
       const MachineInstr &ConsMI) const {
   if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI))
@@ -2915,7 +2843,6 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const {
   }
 }
 
-
 // Add latency to instruction.
 bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
       const MachineInstr &MI2) const {
@@ -2925,7 +2852,6 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
   return false;
 }
 
-
 /// \brief Get the base register and byte offset of a load/store instr.
 bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
       unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI)
@@ -2937,7 +2863,6 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
   return BaseReg != 0;
 }
 
-
 /// \brief Can these instructions execute at the same time in a bundle.
 bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First,
       const MachineInstr &Second) const {
@@ -2959,13 +2884,11 @@ bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First,
   return false;
 }
 
-
 bool HexagonInstrInfo::doesNotReturn(const MachineInstr &CallMI) const {
   unsigned Opc = CallMI.getOpcode();
   return Opc == Hexagon::PS_call_nr || Opc == Hexagon::PS_callr_nr;
 }
 
-
 bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
   for (auto &I : *B)
     if (I.isEHLabel())
@@ -2973,7 +2896,6 @@ bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
   return false;
 }
 
-
 // Returns true if an instruction can be converted into a non-extended
 // equivalent instruction.
 bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const {
@@ -3011,13 +2933,11 @@ bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const {
   return false;
 }
 
-
 bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr &MI) const {
   return Hexagon::getRealHWInstr(MI.getOpcode(),
                                  Hexagon::InstrType_Pseudo) >= 0;
 }
 
-
 bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
       const {
   MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
@@ -3029,7 +2949,6 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
   return false;
 }
 
-
 // Returns true, if a LD insn can be promoted to a cur load.
 bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const {
   auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>();
@@ -3038,14 +2957,12 @@ bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const {
          HST.hasV60TOps();
 }
 
-
 // Returns true, if a ST insn can be promoted to a new-value store.
 bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask;
 }
 
-
 bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI,
       const MachineInstr &ConsMI) const {
   // There is no stall when ProdMI is not a V60 vector.
@@ -3064,7 +2981,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI,
   return true;
 }
 
-
 bool HexagonInstrInfo::producesStall(const MachineInstr &MI,
       MachineBasicBlock::const_instr_iterator BII) const {
   // There is no stall when I is not a V60 vector.
@@ -3091,7 +3007,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI,
   return false;
 }
 
-
 bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
       unsigned PredReg) const {
   for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) {
@@ -3106,7 +3021,6 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
   return MI.getOpcode() != Hexagon::A4_tlbmatch;
 }
 
-
 bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
   return (Opcode == Hexagon::J2_jumpt)      ||
          (Opcode == Hexagon::J2_jumpf)      ||
@@ -3116,25 +3030,21 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
          (Opcode == Hexagon::J2_jumpfnewpt);
 }
 
-
 bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
   if (Cond.empty() || !isPredicated(Cond[0].getImm()))
     return false;
   return !isPredicatedTrue(Cond[0].getImm());
 }
 
-
 short HexagonInstrInfo::getAbsoluteForm(const MachineInstr &MI) const {
   return Hexagon::getAbsoluteForm(MI.getOpcode());
 }
 
-
 unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
 }
 
-
 // Returns the base register in a memory access (load/store). The offset is
 // returned in Offset and the access size is returned in AccessSize.
 unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
@@ -3171,7 +3081,6 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
   return MI.getOperand(basePos).getReg();
 }
 
-
 /// Return the position of the base and offset operands for this instruction.
 bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI,
       unsigned &BasePos, unsigned &OffsetPos) const {
@@ -3203,7 +3112,6 @@ bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI,
   return true;
 }
 
-
 // Inserts branching instructions in reverse order of their occurrence.
 // e.g. jump_t t1 (i1)
 // jump t2        (i2)
@@ -3265,24 +3173,20 @@ SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs(
   return Jumpers;
 }
 
-
 short HexagonInstrInfo::getBaseWithLongOffset(short Opcode) const {
   if (Opcode < 0)
     return -1;
   return Hexagon::getBaseWithLongOffset(Opcode);
 }
 
-
 short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr &MI) const {
   return Hexagon::getBaseWithLongOffset(MI.getOpcode());
 }
 
-
 short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr &MI) const {
   return Hexagon::getBaseWithRegOffset(MI.getOpcode());
 }
 
-
 // Returns Operand Index for the constant extended instruction.
 unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
@@ -3379,7 +3283,6 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
   return HexagonII::HCG_None;
 }
 
-
 // Returns -1 when there is no opcode found.
 unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA,
       const MachineInstr &GB) const {
@@ -3398,7 +3301,6 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA,
   return -1;
 }
 
-
 int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
   enum Hexagon::PredSense inPredSense;
   inPredSense = invertPredicate ? Hexagon::PredSense_false :
@@ -3410,7 +3312,6 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
   llvm_unreachable("Unexpected predicable instruction");
 }
 
-
 // Return the cur value instruction for a given store.
 int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
@@ -3428,8 +3329,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
   return 0;
 }
 
-
-
 // The diagram below shows the steps involved in the conversion of a predicated
 // store instruction to its .new predicated new-value form.
 //
@@ -3509,7 +3408,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
 // promoted. Therefore, in case of dependence check failure (due to R5) during
 // next iteration, it should be converted back to its most basic form.
 
-
 // Return the new value instruction for a given store.
 int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
   int NVOpcode = Hexagon::getNewValueOpcode(MI.getOpcode());
@@ -3552,7 +3450,6 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
   return 0;
 }
 
-
 // Returns the opcode to use when converting MI, which is a conditional jump,
 // into a conditional instruction which uses the .new value of the predicate.
 // We also use branch probabilities to add a hint to the jump.
@@ -3579,7 +3476,6 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
   }
 }
 
-
 // Return .new predicate version for an instruction.
 int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
       const MachineBranchProbabilityInfo *MBPI) const {
@@ -3599,7 +3495,6 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
   return 0;
 }
 
-
 int HexagonInstrInfo::getDotOldOp(const int opc) const {
   int NewOp = opc;
   if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
@@ -3615,7 +3510,6 @@ int HexagonInstrInfo::getDotOldOp(const int opc) const {
   return NewOp;
 }
 
-
 // See if instruction could potentially be a duplex candidate.
 // If so, return its group. Zero otherwise.
 HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
@@ -3960,12 +3854,10 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
   return HexagonII::HSIG_None;
 }
 
-
 short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const {
   return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real);
 }
 
-
 // Return first non-debug instruction in the basic block.
 MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
       const {
@@ -3978,7 +3870,6 @@ MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
   return nullptr;
 }
 
-
 unsigned HexagonInstrInfo::getInstrTimingClassLatency(
       const InstrItineraryData *ItinData, const MachineInstr &MI) const {
   // Default to one cycle for no itinerary. However, an "empty" itinerary may
@@ -4000,7 +3891,6 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency(
   return Latency;
 }
 
-
 // inverts the predication logic.
 // p -> NotP
 // NotP -> P
@@ -4013,7 +3903,6 @@ bool HexagonInstrInfo::getInvertedPredSense(
   return true;
 }
 
-
 unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
   int InvPredOpcode;
   InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
@@ -4024,7 +3913,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
   llvm_unreachable("Unexpected predicated instruction");
 }
 
-
 // Returns the max value that doesn't need to be extended.
 int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
@@ -4039,13 +3927,11 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const {
     return ~(-1U << bits);
 }
 
-
 unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask;
 }
 
-
 // Returns the min value that doesn't need to be extended.
 int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
@@ -4060,7 +3946,6 @@ int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const {
     return 0;
 }
 
-
 // Returns opcode of the non-extended equivalent instruction.
 short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const {
   // Check if the instruction has a register form that uses register in place
@@ -4086,7 +3971,6 @@ short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const {
   return -1;
 }
 
-
 bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
       unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const {
   if (Cond.empty())
@@ -4107,17 +3991,14 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
   return true;
 }
 
-
 short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr &MI) const {
   return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo);
 }
 
-
 short HexagonInstrInfo::getRegForm(const MachineInstr &MI) const {
   return Hexagon::getRegForm(MI.getOpcode());
 }
 
-
 // Return the number of bytes required to encode the instruction.
 // Hexagon instructions are fixed length, 4 bytes, unless they
 // use a constant extender, which requires another 4 bytes.
@@ -4156,13 +4037,11 @@ unsigned HexagonInstrInfo::getSize(const MachineInstr &MI) const {
   return Size;
 }
 
-
 uint64_t HexagonInstrInfo::getType(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::TypePos) & HexagonII::TypeMask;
 }
 
-
 unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const {
   const TargetSubtargetInfo &ST = MI.getParent()->getParent()->getSubtarget();
   const InstrItineraryData &II = *ST.getInstrItineraryData();
@@ -4171,19 +4050,16 @@ unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const {
   return IS.getUnits();
 }
 
-
 unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask;
 }
 
-
 // Calculate size of the basic block without debug instructions.
 unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const {
   return nonDbgMICount(BB->instr_begin(), BB->instr_end());
 }
 
-
 unsigned HexagonInstrInfo::nonDbgBundleSize(
       MachineBasicBlock::const_iterator BundleHead) const {
   assert(BundleHead->isBundle() && "Not a bundle header");
@@ -4192,7 +4068,6 @@ unsigned HexagonInstrInfo::nonDbgBundleSize(
   return nonDbgMICount(++MII, getBundleEnd(BundleHead.getInstrIterator()));
 }
 
-
 /// immediateExtend - Changes the instruction in place to one using an immediate
 /// extender.
 void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const {
@@ -4208,7 +4083,6 @@ void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const {
   MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
 }
 
-
 bool HexagonInstrInfo::invertAndChangeJumpTarget(
       MachineInstr &MI, MachineBasicBlock *NewTarget) const {
   DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#"
@@ -4229,7 +4103,6 @@ bool HexagonInstrInfo::invertAndChangeJumpTarget(
   return true;
 }
 
-
 void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
   /* +++ The code below is used to generate complete set of Hexagon Insn +++ */
   MachineFunction::iterator A = MF.begin();
@@ -4248,7 +4121,6 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
   /* --- The code above is used to generate complete set of Hexagon Insn --- */
 }
 
-
 // inverts the predication logic.
 // p -> NotP
 // NotP -> P
@@ -4258,7 +4130,6 @@ bool HexagonInstrInfo::reversePredSense(MachineInstr &MI) const {
   return true;
 }
 
-
 // Reverse the branch prediction.
 unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
   int PredRevOpcode = -1;
@@ -4270,14 +4141,12 @@ unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
   return PredRevOpcode;
 }
 
-
 // TODO: Add more rigorous validation.
 bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond)
       const {
   return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1));
 }
 
-
 short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr &MI) const {
   return Hexagon::xformRegToImmOffset(MI.getOpcode());
 }
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2d184d1484e9..2358d4b7e4c0 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -16,9 +16,14 @@
 
 #include "HexagonRegisterInfo.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include <cstdint>
+#include <vector>
 
 #define GET_INSTRINFO_HEADER
 #include "HexagonGenInstrInfo.inc"
@@ -29,9 +34,10 @@ struct EVT;
 class HexagonSubtarget;
 
 class HexagonInstrInfo : public HexagonGenInstrInfo {
-  virtual void anchor();
   const HexagonRegisterInfo RI;
 
+  virtual void anchor();
+
 public:
   explicit HexagonInstrInfo(HexagonSubtarget &ST);
 
@@ -260,7 +266,7 @@ public:
   /// PredCost.
   unsigned getInstrLatency(const InstrItineraryData *ItinData,
                            const MachineInstr &MI,
-                           unsigned *PredCost = 0) const override;
+                           unsigned *PredCost = nullptr) const override;
 
   /// Create machine specific model for scheduling.
   DFAPacketizer *
@@ -378,7 +384,6 @@ public:
   bool PredOpcodeHasJMP_c(unsigned Opcode) const;
   bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
 
-
   short getAbsoluteForm(const MachineInstr &MI) const;
   unsigned getAddrMode(const MachineInstr &MI) const;
   unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset,
@@ -421,13 +426,11 @@ public:
   unsigned getUnits(const MachineInstr &MI) const;
   unsigned getValidSubTargets(const unsigned Opcode) const;
 
-
   /// getInstrTimingClassLatency - Compute the instruction latency of a given
   /// instruction using Timing Class information, if available.
   unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
   unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
 
-
   void immediateExtend(MachineInstr &MI) const;
   bool invertAndChangeJumpTarget(MachineInstr &MI,
                                  MachineBasicBlock* NewTarget) const;
@@ -438,6 +441,6 @@ public:
   short xformRegToImmOffset(const MachineInstr &MI) const;
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 371b52108b9b..d83bcbc41553 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -15,33 +15,31 @@
 
 namespace llvm {
 
-  namespace Hexagon {
+namespace Hexagon {
+
     const unsigned int StartPacket = 0x1;
     const unsigned int EndPacket = 0x2;
-  }
 
+} // end namespace Hexagon
 
 /// Hexagon target-specific information for each MachineFunction.
 class HexagonMachineFunctionInfo : public MachineFunctionInfo {
   // SRetReturnReg - Some subtargets require that sret lowering includes
   // returning the value of the returned struct in a register. This field
   // holds the virtual register into which the sret argument is passed.
-  unsigned SRetReturnReg;
-  unsigned StackAlignBaseVReg;    // Aligned-stack base register (virtual)
-  unsigned StackAlignBasePhysReg; //                             (physical)
+  unsigned SRetReturnReg = 0;
+  unsigned StackAlignBaseVReg = 0;    // Aligned-stack base register (virtual)
+  unsigned StackAlignBasePhysReg = 0; //                             (physical)
   int VarArgsFrameIndex;
-  bool HasClobberLR;
-  bool HasEHReturn;
+  bool HasClobberLR = false;
+  bool HasEHReturn = false;
   std::map<const MachineInstr*, unsigned> PacketInfo;
   virtual void anchor();
 
 public:
-  HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0),
-      StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {}
+  HexagonMachineFunctionInfo() = default;
 
-  HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
-      StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0),
-      HasEHReturn(false) {}
+  HexagonMachineFunctionInfo(MachineFunction &MF) {}
 
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@@ -75,6 +73,7 @@ public:
   void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; }
   unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; }
 };
-} // End llvm namespace
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index e902f600e881..c9c4f95dbaaa 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -10,17 +10,27 @@
 // This file contains the declarations of the HexagonTargetAsmInfo properties.
 //
 //===----------------------------------------------------------------------===//
+
 #define DEBUG_TYPE "hexagon-sdata"
 
-#include "HexagonTargetMachine.h"
 #include "HexagonTargetObjectFile.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Type.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -44,13 +54,21 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement",
 // (e.g. -debug and -debug-only=globallayout)
 #define TRACE_TO(s, X) s << X
 #ifdef NDEBUG
-#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0)
+#define TRACE(X)                                                               \
+  do {                                                                         \
+    if (TraceGVPlacement) {                                                    \
+      TRACE_TO(errs(), X);                                                     \
+    }                                                                          \
+  } while (false)
 #else
-#define TRACE(X) \
-  do { \
-    if (TraceGVPlacement) { TRACE_TO(errs(), X); } \
-    else { DEBUG( TRACE_TO(dbgs(), X) ); } \
-  } while (0)
+#define TRACE(X)                                                               \
+  do {                                                                         \
+    if (TraceGVPlacement) {                                                    \
+      TRACE_TO(errs(), X);                                                     \
+    } else {                                                                   \
+      DEBUG(TRACE_TO(dbgs(), X));                                              \
+    }                                                                          \
+  } while (false)
 #endif
 
 // Returns true if the section name is such that the symbol will be put
@@ -69,7 +87,6 @@ static bool isSmallDataSection(StringRef Sec) {
          Sec.find(".scommon.") != StringRef::npos;
 }
 
-
 static const char *getSectionSuffixForSize(unsigned Size) {
   switch (Size) {
   default:
@@ -163,7 +180,6 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
   return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM);
 }
 
-
 /// Return true if this global value should be placed into small data/bss
 /// section.
 bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
@@ -232,17 +248,14 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
   return true;
 }
 
-
 bool HexagonTargetObjectFile::isSmallDataEnabled() const {
   return SmallDataThreshold > 0;
 }
 
-
 unsigned HexagonTargetObjectFile::getSmallDataSize() const {
   return SmallDataThreshold;
 }
 
-
 /// Descends any type down to "elementary" components,
 /// discovering the smallest addressable one.
 /// If zero is returned, declaration will not be modified.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 5feaffe6efb9..9a09a17767a6 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -1,5 +1,4 @@
-
-//=== HexagonMCCompound.cpp - Hexagon Compound checker  -------===//
+//=== HexagonMCCompound.cpp - Hexagon Compound checker  -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,18 +10,17 @@
 // This file is looks at a packet and tries to form compound insns
 //
 //===----------------------------------------------------------------------===//
+
 #include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCShuffler.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCAssembler.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
 
 using namespace llvm;
 using namespace Hexagon;
@@ -79,8 +77,7 @@ static const unsigned cmpgtn1BitOpcode[8] = {
 };
 
 // enum HexagonII::CompoundGroup
-namespace {
-unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
+static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
   unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
 
   switch (MI.getOpcode()) {
@@ -173,11 +170,9 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
 
   return HexagonII::HCG_None;
 }
-}
 
 /// getCompoundOp - Return the index from 0-7 into the above opcode lists.
-namespace {
-unsigned getCompoundOp(MCInst const &HMCI) {
+static unsigned getCompoundOp(MCInst const &HMCI) {
   const MCOperand &Predicate = HMCI.getOperand(0);
   unsigned PredReg = Predicate.getReg();
 
@@ -198,11 +193,10 @@ unsigned getCompoundOp(MCInst const &HMCI) {
     return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
   }
 }
-}
 
-namespace {
-MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
-  MCInst *CompoundInsn = 0;
+static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
+                               MCInst const &R) {
+  MCInst *CompoundInsn = nullptr;
   unsigned compoundOpcode;
   MCOperand Rs, Rt;
   int64_t Value;
@@ -336,12 +330,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
 
   return CompoundInsn;
 }
-}
 
 /// Non-Symmetrical. See if these two instructions are fit for compound pair.
-namespace {
-bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
-                           MCInst const &MIb, bool IsExtendedB) {
+static bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
+                                  MCInst const &MIb, bool IsExtendedB) {
   unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA);
   unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB);
   // We have two candidates - check that this is the same register
@@ -353,10 +345,9 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
   return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
           (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
 }
-}
 
-namespace {
-bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
+static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context,
+                            MCInst &MCI) {
   assert(HexagonMCInstrInfo::isBundle(MCI));
   bool JExtended = false;
   for (MCInst::iterator J =
@@ -367,8 +358,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
       JExtended = true;
       continue;
     }
-    if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) ==
-        HexagonII::TypeJ) {
+    if (HexagonMCInstrInfo::getType(MCII, *JumpInst) == HexagonII::TypeJ) {
       // Try to pair with another insn (B)undled with jump.
       bool BExtended = false;
       for (MCInst::iterator B =
@@ -401,7 +391,6 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
   }
   return false;
 }
-}
 
 /// tryCompound - Given a bundle check for compound insns when one
 /// is found update the contents fo the bundle with the compound insn.
@@ -420,6 +409,4 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
   // a compound is found.
   while (lookForCompound(MCII, Context, MCI))
     ;
-
-  return;
 }
diff --git a/lib/Target/Hexagon/RDFCopy.h b/lib/Target/Hexagon/RDFCopy.h
index 517f17cc9c64..5ece11bd5ce4 100644
--- a/lib/Target/Hexagon/RDFCopy.h
+++ b/lib/Target/Hexagon/RDFCopy.h
@@ -1,4 +1,4 @@
-//===--- RDFCopy.h --------------------------------------------------------===//
+//===--- RDFCopy.h ----------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,23 +7,26 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef RDF_COPY_H
-#define RDF_COPY_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
 
 #include "RDFGraph.h"
 #include <map>
 #include <vector>
 
 namespace llvm {
+
   class MachineBasicBlock;
   class MachineDominatorTree;
   class MachineInstr;
 
 namespace rdf {
+
   struct CopyPropagation {
     CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
         Trace(false) {}
-    virtual ~CopyPropagation() {}
+
+    virtual ~CopyPropagation() = default;
 
     bool run();
     void trace(bool On) { Trace = On; }
@@ -49,7 +52,9 @@ namespace rdf {
     void updateMap(NodeAddr<InstrNode*> IA);
     bool scanBlock(MachineBasicBlock *B);
   };
-} // namespace rdf
-} // namespace llvm
 
-#endif
+} // end namespace rdf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index 33c3f03790f3..fa272ea1a76a 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -10,16 +10,31 @@
 // Target-independent, SSA-based data flow graph for register data flow (RDF).
 //
 #include "RDFGraph.h"
-
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominanceFrontier.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 using namespace rdf;
@@ -88,14 +103,12 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
   return OS;
 }
 
-namespace {
-  void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
-        const DataFlowGraph &G) {
-    OS << Print<NodeId>(RA.Id, G) << '<'
-       << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
-    if (RA.Addr->getFlags() & NodeAttrs::Fixed)
-      OS << '!';
-  }
+static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
+                const DataFlowGraph &G) {
+  OS << Print<NodeId>(RA.Id, G) << '<'
+     << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
+  if (RA.Addr->getFlags() & NodeAttrs::Fixed)
+    OS << '!';
 }
 
 template<>
@@ -183,9 +196,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
 }
 
 namespace {
+
   template <typename T>
   struct PrintListV {
     PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
+
     typedef T Type;
     const NodeList &List;
     const DataFlowGraph &G;
@@ -201,7 +216,8 @@ namespace {
     }
     return OS;
   }
-}
+
+} // end anonymous namespace
 
 template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
@@ -219,10 +235,10 @@ raw_ostream &operator<< (raw_ostream &OS,
   // Print the target for calls and branches (for readability).
   if (MI.isCall() || MI.isBranch()) {
     MachineInstr::const_mop_iterator T =
-          find_if(MI.operands(),
-                  [] (const MachineOperand &Op) -> bool {
-                    return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
-                  });
+          llvm::find_if(MI.operands(),
+                        [] (const MachineOperand &Op) -> bool {
+                          return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
+                        });
     if (T != MI.operands_end()) {
       OS << ' ';
       if (T->isMBB())
@@ -327,8 +343,8 @@ raw_ostream &operator<< (raw_ostream &OS,
   return OS;
 }
 
-} // namespace rdf
-} // namespace llvm
+} // end namespace rdf
+} // end namespace llvm
 
 // Node allocation functions.
 //
@@ -390,7 +406,6 @@ void NodeAllocator::clear() {
   ActiveEnd = nullptr;
 }
 
-
 // Insert node NA after "this" in the circular chain.
 void NodeBase::append(NodeAddr<NodeBase*> NA) {
   NodeId Nx = Next;
@@ -401,7 +416,6 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) {
   }
 }
 
-
 // Fundamental node manipulator functions.
 
 // Obtain the register reference from a reference node.
@@ -590,7 +604,6 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
   return findBlock(EntryB, G);
 }
 
-
 // Target operand information.
 //
 
@@ -641,7 +654,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
   return false;
 }
 
-
 RegisterRef RegisterAggr::normalize(RegisterRef RR) const {
   RegisterId SuperReg = RR.Reg;
   while (true) {
@@ -745,7 +757,6 @@ void RegisterAggr::print(raw_ostream &OS) const {
   OS << " }";
 }
 
-
 //
 // The data flow graph construction.
 //
@@ -753,10 +764,9 @@ void RegisterAggr::print(raw_ostream &OS) const {
 DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
       const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
       const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
-    : LMI(), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
+    : MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
 }
 
-
 // The implementation of the definition stack.
 // Each register reference has its own definition stack. In particular,
 // for a register references "Reg" and "Reg:subreg" will each have their
@@ -845,7 +855,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
   return P;
 }
 
-
 // Register information.
 
 // Get the list of references aliased to RR. Lane masks are ignored.
@@ -915,7 +924,6 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
   return NA;
 }
 
-
 // Allocation routines for specific node types/kinds.
 
 NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
@@ -1248,7 +1256,6 @@ bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const {
   return false;
 }
 
-
 // Clear all information in the graph.
 void DataFlowGraph::reset() {
   Memory.clear();
@@ -1256,7 +1263,6 @@ void DataFlowGraph::reset() {
   Func = NodeAddr<FuncNode*>();
 }
 
-
 // Return the next reference node in the instruction node IA that is related
 // to RA. Conceptually, two reference nodes are related if they refer to the
 // same instance of a register access, but differ in flags or other minor
diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h
index 871062ff2b05..49d78a8b22b5 100644
--- a/lib/Target/Hexagon/RDFGraph.h
+++ b/lib/Target/Hexagon/RDFGraph.h
@@ -1,4 +1,4 @@
-//===--- RDFGraph.h -------------------------------------------------------===//
+//===--- RDFGraph.h ---------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -221,20 +221,25 @@
 // The statement s5 has two use nodes for t0: u7" and u9". The quotation
 // mark " indicates that the node is a shadow.
 //
-#ifndef RDF_GRAPH_H
-#define RDF_GRAPH_H
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
 
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/LaneBitmask.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
+#include <cassert>
+#include <cstdint>
+#include <cstring>
 #include <functional>
 #include <map>
 #include <set>
 #include <unordered_map>
+#include <utility>
 #include <vector>
 
 // RDF uses uint32_t to refer to registers. This is to ensure that the type
@@ -243,6 +248,7 @@
 static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal");
 
 namespace llvm {
+
   class MachineBasicBlock;
   class MachineFunction;
   class MachineInstr;
@@ -252,6 +258,7 @@ namespace llvm {
   class TargetInstrInfo;
 
 namespace rdf {
+
   typedef uint32_t NodeId;
   typedef uint32_t RegisterId;
 
@@ -293,9 +300,11 @@ namespace rdf {
     static uint16_t set_type(uint16_t A, uint16_t T) {
       return (A & ~TypeMask) | T;
     }
+
     static uint16_t set_kind(uint16_t A, uint16_t K) {
       return (A & ~KindMask) | K;
     }
+
     static uint16_t set_flags(uint16_t A, uint16_t F) {
       return (A & ~FlagMask) | F;
     }
@@ -326,9 +335,14 @@ namespace rdf {
   };
 
   template <typename T> struct NodeAddr {
-    NodeAddr() : Addr(nullptr), Id(0) {}
+    NodeAddr() : Addr(nullptr) {}
     NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
 
+    // Type cast (casting constructor). The reason for having this class
+    // instead of std::pair.
+    template <typename S> NodeAddr(const NodeAddr<S> &NA)
+      : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
+
     bool operator== (const NodeAddr<T> &NA) const {
       assert((Addr == NA.Addr) == (Id == NA.Id));
       return Addr == NA.Addr;
@@ -336,13 +350,9 @@ namespace rdf {
     bool operator!= (const NodeAddr<T> &NA) const {
       return !operator==(NA);
     }
-    // Type cast (casting constructor). The reason for having this class
-    // instead of std::pair.
-    template <typename S> NodeAddr(const NodeAddr<S> &NA)
-      : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
 
     T Addr;
-    NodeId Id;
+    NodeId Id = 0;
   };
 
   struct NodeBase;
@@ -366,17 +376,20 @@ namespace rdf {
   struct NodeAllocator {
     // Amount of storage for a single node.
     enum { NodeMemSize = 32 };
+
     NodeAllocator(uint32_t NPB = 4096)
         : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
-          IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) {
+          IndexMask((1 << BitsPerIndex)-1) {
       assert(isPowerOf2_32(NPB));
     }
+
     NodeBase *ptr(NodeId N) const {
       uint32_t N1 = N-1;
       uint32_t BlockN = N1 >> BitsPerIndex;
       uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
       return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
     }
+
     NodeId id(const NodeBase *P) const;
     NodeAddr<NodeBase*> New();
     void clear();
@@ -384,6 +397,7 @@ namespace rdf {
   private:
     void startNewBlock();
     bool needNewBlock();
+
     uint32_t makeId(uint32_t Block, uint32_t Index) const {
       // Add 1 to the id, to avoid the id of 0, which is treated as "null".
       return ((Block << BitsPerIndex) | Index) + 1;
@@ -392,7 +406,7 @@ namespace rdf {
     const uint32_t NodesPerBlock;
     const uint32_t BitsPerIndex;
     const uint32_t IndexMask;
-    char *ActiveEnd;
+    char *ActiveEnd = nullptr;
     std::vector<char*> Blocks;
     typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy;
     AllocatorTy MemPool;
@@ -405,6 +419,7 @@ namespace rdf {
     RegisterRef() : RegisterRef(0) {}
     explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
       : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
+
     operator bool() const { return Reg != 0 && Mask.any(); }
     bool operator== (const RegisterRef &RR) const {
       return Reg == RR.Reg && Mask == RR.Mask;
@@ -420,7 +435,8 @@ namespace rdf {
 
   struct TargetOperandInfo {
     TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
-    virtual ~TargetOperandInfo() {}
+    virtual ~TargetOperandInfo() = default;
+
     virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
     virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
     virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
@@ -428,7 +444,6 @@ namespace rdf {
     const TargetInstrInfo &TII;
   };
 
-
   // Packed register reference. Only used for storage.
   struct PackedRegisterRef {
     RegisterId Reg;
@@ -442,11 +457,13 @@ namespace rdf {
   template <typename T, unsigned N = 32>
   struct IndexedSet {
     IndexedSet() : Map() { Map.reserve(N); }
+
     T get(uint32_t Idx) const {
       // Index Idx corresponds to Map[Idx-1].
       assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
       return Map[Idx-1];
     }
+
     uint32_t insert(T Val) {
       // Linear search.
       auto F = llvm::find(Map, Val);
@@ -455,11 +472,13 @@ namespace rdf {
       Map.push_back(Val);
       return Map.size();  // Return actual_index + 1.
     }
+
     uint32_t find(T Val) const {
       auto F = llvm::find(Map, Val);
       assert(F != Map.end());
       return F - Map.begin();
     }
+
   private:
     std::vector<T> Map;
   };
@@ -478,12 +497,14 @@ namespace rdf {
       assert(LM.any());
       return LM.all() ? 0 : find(LM);
     }
+
     PackedRegisterRef pack(RegisterRef RR) {
       return { RR.Reg, getIndexForLaneMask(RR.Mask) };
     }
     PackedRegisterRef pack(RegisterRef RR) const {
       return { RR.Reg, getIndexForLaneMask(RR.Mask) };
     }
+
     RegisterRef unpack(PackedRegisterRef PR) const {
       return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId));
     }
@@ -491,11 +512,8 @@ namespace rdf {
 
   struct RegisterAggr {
     RegisterAggr(const TargetRegisterInfo &tri)
-        : Masks(), ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false),
-          TRI(tri) {}
-    RegisterAggr(const RegisterAggr &RG)
-        : Masks(RG.Masks), ExpAliasUnits(RG.ExpAliasUnits),
-          CheckUnits(RG.CheckUnits), TRI(RG.TRI) {}
+        : ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {}
+    RegisterAggr(const RegisterAggr &RG) = default;
 
     bool empty() const { return Masks.empty(); }
     bool hasAliasOf(RegisterRef RR) const;
@@ -530,11 +548,11 @@ namespace rdf {
     const TargetRegisterInfo &TRI;
   };
 
-
   struct NodeBase {
   public:
     // Make sure this is a POD.
     NodeBase() = default;
+
     uint16_t getType()  const { return NodeAttrs::type(Attrs); }
     uint16_t getKind()  const { return NodeAttrs::kind(Attrs); }
     uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
@@ -596,29 +614,36 @@ namespace rdf {
 
   struct RefNode : public NodeBase {
     RefNode() = default;
+
     RegisterRef getRegRef(const DataFlowGraph &G) const;
+
     MachineOperand &getOp() {
       assert(!(getFlags() & NodeAttrs::PhiRef));
       return *Ref.Op;
     }
+
     void setRegRef(RegisterRef RR, DataFlowGraph &G);
     void setRegRef(MachineOperand *Op, DataFlowGraph &G);
+
     NodeId getReachingDef() const {
       return Ref.RD;
     }
     void setReachingDef(NodeId RD) {
       Ref.RD = RD;
     }
+
     NodeId getSibling() const {
       return Ref.Sib;
     }
     void setSibling(NodeId Sib) {
       Ref.Sib = Sib;
     }
+
     bool isUse() const {
       assert(getType() == NodeAttrs::Ref);
       return getKind() == NodeAttrs::Use;
     }
+
     bool isDef() const {
       assert(getType() == NodeAttrs::Ref);
       return getKind() == NodeAttrs::Def;
@@ -702,6 +727,7 @@ namespace rdf {
     MachineBasicBlock *getCode() const {
       return CodeNode::getCode<MachineBasicBlock*>();
     }
+
     void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
   };
 
@@ -709,6 +735,7 @@ namespace rdf {
     MachineFunction *getCode() const {
       return CodeNode::getCode<MachineFunction*>();
     }
+
     NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
         const DataFlowGraph &G) const;
     NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
@@ -723,6 +750,7 @@ namespace rdf {
     template <typename T> T ptr(NodeId N) const {
       return static_cast<T>(ptr(N));
     }
+
     NodeId id(const NodeBase *P) const;
 
     template <typename T> NodeAddr<T> addr(NodeId N) const {
@@ -738,13 +766,17 @@ namespace rdf {
 
     struct DefStack {
       DefStack() = default;
+
       bool empty() const { return Stack.empty() || top() == bottom(); }
+
     private:
       typedef NodeAddr<DefNode*> value_type;
       struct Iterator {
         typedef DefStack::value_type value_type;
+
         Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
         Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
+
         value_type operator*() const {
           assert(Pos >= 1);
           return DS.Stack[Pos-1];
@@ -755,14 +787,17 @@ namespace rdf {
         }
         bool operator==(const Iterator &It) const { return Pos == It.Pos; }
         bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
+
       private:
         Iterator(const DefStack &S, bool Top);
+
         // Pos-1 is the index in the StorageType object that corresponds to
         // the top of the DefStack.
         const DefStack &DS;
         unsigned Pos;
         friend struct DefStack;
       };
+
     public:
       typedef Iterator iterator;
       iterator top() const { return Iterator(*this, true); }
@@ -773,14 +808,18 @@ namespace rdf {
       void pop();
       void start_block(NodeId N);
       void clear_block(NodeId N);
+
     private:
       friend struct Iterator;
       typedef std::vector<value_type> StorageType;
+
       bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
         return (P.Addr == nullptr) && (N == 0 || P.Id == N);
       }
+
       unsigned nextUp(unsigned P) const;
       unsigned nextDown(unsigned P) const;
+
       StorageType Stack;
     };
 
@@ -819,6 +858,7 @@ namespace rdf {
       if (RemoveFromOwner)
         removeFromOwner(UA);
     }
+
     void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) {
       unlinkDefDF(DA);
       if (RemoveFromOwner)
@@ -831,23 +871,28 @@ namespace rdf {
       return BA.Addr->getType() == NodeAttrs::Ref &&
              BA.Addr->getKind() == Kind;
     }
+
     template <uint16_t Kind>
     static bool IsCode(const NodeAddr<NodeBase*> BA) {
       return BA.Addr->getType() == NodeAttrs::Code &&
              BA.Addr->getKind() == Kind;
     }
+
     static bool IsDef(const NodeAddr<NodeBase*> BA) {
       return BA.Addr->getType() == NodeAttrs::Ref &&
              BA.Addr->getKind() == NodeAttrs::Def;
     }
+
     static bool IsUse(const NodeAddr<NodeBase*> BA) {
       return BA.Addr->getType() == NodeAttrs::Ref &&
              BA.Addr->getKind() == NodeAttrs::Use;
     }
+
     static bool IsPhi(const NodeAddr<NodeBase*> BA) {
       return BA.Addr->getType() == NodeAttrs::Code &&
              BA.Addr->getKind() == NodeAttrs::Phi;
     }
+
     static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
       uint16_t Flags = DA.Addr->getFlags();
       return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
@@ -902,6 +947,7 @@ namespace rdf {
 
     void unlinkUseDF(NodeAddr<UseNode*> UA);
     void unlinkDefDF(NodeAddr<DefNode*> DA);
+
     void removeFromOwner(NodeAddr<RefNode*> RA) {
       NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this);
       IA.Addr->removeMember(RA, *this);
@@ -967,7 +1013,6 @@ namespace rdf {
     return MM;
   }
 
-
   // Optionally print the lane mask, if it is not ~0.
   struct PrintLaneMaskOpt {
     PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
@@ -991,7 +1036,9 @@ namespace rdf {
     PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
       : Print<NodeAddr<T>>(x, g) {}
   };
-} // namespace rdf
-} // namespace llvm
 
-#endif // RDF_GRAPH_H
+} // end namespace rdf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 6f0fdddd7d55..92d3c001df94 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -28,6 +28,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -43,6 +44,11 @@ bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   return MipsDAGToDAGISel::runOnMachineFunction(MF);
 }
 
+void MipsSEDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+  SelectionDAGISel::getAnalysisUsage(AU);
+}
+
 void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
                                                MachineFunction &MF) {
   MachineInstrBuilder MIB(MF, &MI);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 2a8e5877e848..f89a350cab04 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -28,6 +28,8 @@ private:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
   void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
                              MachineFunction &MF);
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index aa3ffde24b99..2b9195b095e1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3981,40 +3981,46 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
 static bool isFunctionGlobalAddress(SDValue Callee);
 
 static bool
-resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
+resideInSameSection(const Function *Caller, SDValue Callee,
+                    const TargetMachine &TM) {
   // If !G, Callee can be an external symbol.
   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
-  if (!G) return false;
+  if (!G)
+    return false;
 
   const GlobalValue *GV = G->getGlobal();
-
-  if (GV->isDeclaration()) return false;
-
-  switch(GV->getLinkage()) {
-  default: llvm_unreachable("unknow linkage type");
-  case GlobalValue::AvailableExternallyLinkage:
-  case GlobalValue::ExternalWeakLinkage:
+  if (!GV->isStrongDefinitionForLinker())
     return false;
 
-  // Callee with weak linkage is allowed if it has hidden or protected
-  // visibility
-  case GlobalValue::LinkOnceAnyLinkage:
-  case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
-  case GlobalValue::WeakAnyLinkage:
-  case GlobalValue::WeakODRLinkage:     // e.g. c++ template instantiation
-    if (GV->hasDefaultVisibility())
+  // Any explicitly-specified sections and section prefixes must also match.
+  // Also, if we're using -ffunction-sections, then each function is always in
+  // a different section (the same is true for COMDAT functions).
+  if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
+      GV->getSection() != Caller->getSection())
+    return false;
+  if (const auto *F = dyn_cast<Function>(GV)) {
+    if (F->getSectionPrefix() != Caller->getSectionPrefix())
       return false;
-
-  case GlobalValue::ExternalLinkage:
-  case GlobalValue::InternalLinkage:
-  case GlobalValue::PrivateLinkage:
-    break;
   }
 
-  // With '-fPIC', calling default visiblity function need insert 'nop' after
-  // function call, no matter that function resides in same module or not, so
-  // we treat it as in different module.
-  if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
+  // If the callee might be interposed, then we can't assume the ultimate call
+  // target will be in the same section. Even in cases where we can assume that
+  // interposition won't happen, in any case where the linker might insert a
+  // stub to allow for interposition, we must generate code as though
+  // interposition might occur. To understand why this matters, consider a
+  // situation where: a -> b -> c where the arrows indicate calls. b and c are
+  // in the same section, but a is in a different module (i.e. has a different
+  // TOC base pointer). If the linker allows for interposition between b and c,
+  // then it will generate a stub for the call edge between b and c which will
+  // save the TOC pointer into the designated stack slot allocated by b. If we
+  // return true here, and therefore allow a tail call between b and c, that
+  // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
+  // pointer into the stack slot allocated by a (where the a -> b stub saved
+  // a's TOC base pointer). If we're not considering a tail call, but rather,
+  // whether a nop is needed after the call instruction in b, because the linker
+  // will insert a stub, it might complain about a missing nop if we omit it
+  // (although many don't complain in this case).
+  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
     return false;
 
   return true;
@@ -4130,11 +4136,11 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
       !isa<ExternalSymbolSDNode>(Callee))
     return false;
 
-  // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
-  // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
-  // module.
+  // Check if Callee resides in the same section, because for now, PPC64 SVR4
+  // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
+  // section.
   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
-  if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
+  if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
     return false;
 
   // TCO allows altering callee ABI, so we don't have to check further.
@@ -4592,14 +4598,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
   return CallOpc;
 }
 
-static
-bool isLocalCall(const SDValue &Callee)
-{
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    return G->getGlobal()->isStrongDefinitionForLinker();
-  return false;
-}
-
 SDValue PPCTargetLowering::LowerCallResult(
     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -4701,6 +4699,7 @@ SDValue PPCTargetLowering::FinishCall(
   // stack frame. If caller and callee belong to the same module (and have the
   // same TOC), the NOP will remain unchanged.
 
+  MachineFunction &MF = DAG.getMachineFunction();
   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
       !isPatchPoint) {
     if (CallOpc == PPCISD::BCTRL) {
@@ -4724,11 +4723,11 @@ SDValue PPCTargetLowering::FinishCall(
       // The address needs to go after the chain input but before the flag (or
       // any other variadic arguments).
       Ops.insert(std::next(Ops.begin()), AddTOC);
-    } else if ((CallOpc == PPCISD::CALL) &&
-               (!isLocalCall(Callee) ||
-                DAG.getTarget().getRelocationModel() == Reloc::PIC_))
+    } else if (CallOpc == PPCISD::CALL &&
+      !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
       // Otherwise insert NOP for non-local calls.
       CallOpc = PPCISD::CALL_NOP;
+    }
   }
 
   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index d42e1187ce64..e1825ca1eda1 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -70,7 +70,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   EmitFunctionBody();
 
   // Emit the XRay table for this function.
-  EmitXRayTable();
+  emitXRayTable();
 
   // We didn't modify anything.
   return false;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 1deefe1231ca..cd690442bb9f 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -373,6 +373,10 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
   MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
   MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
                                                        : std::next(MBBI);
+  PI = skipDebugInstructionsBackward(PI, MBB.begin());
+  if (NI != nullptr)
+    NI = skipDebugInstructionsForward(NI, MBB.end());
+
   unsigned Opc = PI->getOpcode();
   int Offset = 0;
 
@@ -2586,6 +2590,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
   uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
   I = MBB.erase(I);
+  auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
 
   if (!reserveCallFrame) {
     // If the stack pointer can be changed after prologue, turn the
@@ -2615,7 +2620,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 
     if (HasDwarfEHHandlers && !isDestroy &&
         MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
-      BuildCFI(MBB, I, DL,
+      BuildCFI(MBB, InsertPos, DL,
                MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
 
     if (Amount == 0)
@@ -2629,7 +2634,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // If this is a callee-pop calling convention, emit a CFA adjust for
     // the amount the callee popped.
     if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
-      BuildCFI(MBB, I, DL, 
+      BuildCFI(MBB, InsertPos, DL,
                MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
 
     // Add Amount to SP to destroy a frame, or subtract to setup.
@@ -2640,13 +2645,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // Merge with any previous or following adjustment instruction. Note: the
       // instructions merged with here do not have CFI, so their stack
       // adjustments do not feed into CfaAdjustment.
-      StackAdjustment += mergeSPUpdates(MBB, I, true);
-      StackAdjustment += mergeSPUpdates(MBB, I, false);
+      StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
+      StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
 
       if (StackAdjustment) {
         if (!(Fn->optForMinSize() &&
-              adjustStackWithPops(MBB, I, DL, StackAdjustment)))
-          BuildStackAdjustment(MBB, I, DL, StackAdjustment,
+              adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
+          BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
                                /*InEpilogue=*/false);
       }
     }
@@ -2662,8 +2667,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // TODO: When not using precise CFA, we also need to adjust for the
       // InternalAmt here.
       if (CfaAdjustment) {
-        BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(
-                                 nullptr, CfaAdjustment));
+        BuildCFI(MBB, InsertPos, DL,
+                 MCCFIInstruction::createAdjustCfaOffset(nullptr,
+                                                         CfaAdjustment));
       }
     }
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b293dfa98f82..fd2189397279 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -11474,6 +11474,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
                                         const SmallBitVector &Zeroable,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
+  SmallVector<int, 4> WidenedMask;
+  if (!canWidenShuffleElements(Mask, WidenedMask))
+    return SDValue();
+
   // TODO: If minimizing size and one of the inputs is a zero vector and the
   // the zero vector has only one use, we could use a VPERM2X128 to save the
   // instruction bytes needed to explicitly generate the zero vector.
@@ -11521,15 +11525,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
   //    [6]   - ignore
   //    [7]   - zero high half of destination
 
-  int MaskLO = Mask[0];
-  if (MaskLO == SM_SentinelUndef)
-    MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
-
-  int MaskHI = Mask[2];
-  if (MaskHI == SM_SentinelUndef)
-    MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
+  int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0];
+  int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1];
 
-  unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
+  unsigned PermMask = MaskLO | (MaskHI << 4);
 
   // If either input is a zero vector, replace it with an undef input.
   // Shuffle mask values <  4 are selecting elements of V1.
@@ -11538,16 +11537,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
   // selecting the zero vector and setting the zero mask bit.
   if (IsV1Zero) {
     V1 = DAG.getUNDEF(VT);
-    if (MaskLO < 4)
+    if (MaskLO < 2)
       PermMask = (PermMask & 0xf0) | 0x08;
-    if (MaskHI < 4)
+    if (MaskHI < 2)
       PermMask = (PermMask & 0x0f) | 0x80;
   }
   if (IsV2Zero) {
     V2 = DAG.getUNDEF(VT);
-    if (MaskLO >= 4)
+    if (MaskLO >= 2)
       PermMask = (PermMask & 0xf0) | 0x08;
-    if (MaskHI >= 4)
+    if (MaskHI >= 2)
       PermMask = (PermMask & 0x0f) | 0x80;
   }
 
@@ -12012,11 +12011,9 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
 
-  SmallVector<int, 4> WidenedMask;
-  if (canWidenShuffleElements(Mask, WidenedMask))
-    if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
-                                             Zeroable, Subtarget, DAG))
-      return V;
+  if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
+                                           Zeroable, Subtarget, DAG))
+    return V;
 
   if (V2.isUndef()) {
     // Check for being able to broadcast a single element.
@@ -12107,11 +12104,9 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
   assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");
 
-  SmallVector<int, 4> WidenedMask;
-  if (canWidenShuffleElements(Mask, WidenedMask))
-    if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
-                                             Zeroable, Subtarget, DAG))
-      return V;
+  if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
+                                           Zeroable, Subtarget, DAG))
+    return V;
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
                                                 Zeroable, Subtarget, DAG))
@@ -12605,33 +12600,72 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
   if (!canWidenShuffleElements(Mask, WidenedMask))
     return SDValue();
 
+  // Check for patterns which can be matched with a single insert of a 256-bit
+  // subvector.
+  bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask,
+                                        {0, 1, 2, 3, 0, 1, 2, 3});
+  if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask,
+                                        {0, 1, 2, 3, 8, 9, 10, 11})) {
+    MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
+    SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+                              DAG.getIntPtrConstant(0, DL));
+    SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+                              OnlyUsesV1 ? V1 : V2,
+                              DAG.getIntPtrConstant(0, DL));
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+  }
+
+  assert(WidenedMask.size() == 4);
+
+  // See if this is an insertion of the lower 128-bits of V2 into V1.
+  bool IsInsert = true;
+  int V2Index = -1;
+  for (int i = 0; i < 4; ++i) {
+    assert(WidenedMask[i] >= -1);
+    if (WidenedMask[i] < 0)
+      continue;
+
+    // Make sure all V1 subvectors are in place.
+    if (WidenedMask[i] < 4) {
+      if (WidenedMask[i] != i) {
+        IsInsert = false;
+        break;
+      }
+    } else {
+      // Make sure we only have a single V2 index and its the lowest 128-bits.
+      if (V2Index >= 0 || WidenedMask[i] != 4) {
+        IsInsert = false;
+        break;
+      }
+      V2Index = i;
+    }
+  }
+  if (IsInsert && V2Index >= 0) {
+    MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
+    SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
+                                 DAG.getIntPtrConstant(0, DL));
+    return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
+  }
+
+  // Try to lower to to vshuf64x2/vshuf32x4.
   SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
+  unsigned PermMask = 0;
   // Insure elements came from the same Op.
-  int MaxOp1Index = VT.getVectorNumElements()/2 - 1;
-  for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
-    if (WidenedMask[i] == SM_SentinelZero)
-      return SDValue();
-    if (WidenedMask[i] == SM_SentinelUndef)
+  for (int i = 0; i < 4; ++i) {
+    assert(WidenedMask[i] >= -1);
+    if (WidenedMask[i] < 0)
       continue;
 
-    SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1;
-    unsigned OpIndex = (i < Size/2) ? 0 : 1;
+    SDValue Op = WidenedMask[i] >= 4 ? V2 : V1;
+    unsigned OpIndex = i / 2;
     if (Ops[OpIndex].isUndef())
       Ops[OpIndex] = Op;
     else if (Ops[OpIndex] != Op)
       return SDValue();
-  }
 
-  // Form a 128-bit permutation.
-  // Convert the 64-bit shuffle mask selection values into 128-bit selection
-  // bits defined by a vshuf64x2 instruction's immediate control byte.
-  unsigned PermMask = 0, Imm = 0;
-  unsigned ControlBitsNum = WidenedMask.size() / 2;
-
-  for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
-    // Use first element in place of undef mask.
-    Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
-    PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
+    // Convert the 128-bit shuffle mask selection values into 128-bit selection
+    // bits defined by a vshuf64x2 instruction's immediate control byte.
+    PermMask |= (WidenedMask[i] % 4) << (i * 2);
   }
 
   return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
@@ -13051,10 +13085,10 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
   int NumElements = Mask.size();
 
-  int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0;
+  int NumV1Elements = 0, NumV2Elements = 0;
   for (int M : Mask)
     if (M < 0)
-      ++NumSentinelElements;
+      continue;
     else if (M < NumElements)
       ++NumV1Elements;
     else
@@ -18660,8 +18694,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
                                   Mask, PassThru, Subtarget, DAG);
     }
     case INTR_TYPE_3OP_IMM8_MASK:
-    case INTR_TYPE_3OP_MASK:
-    case INSERT_SUBVEC: {
+    case INTR_TYPE_3OP_MASK: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
       SDValue Src3 = Op.getOperand(3);
@@ -18670,13 +18703,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
 
       if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
         Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
-      else if (IntrData->Type == INSERT_SUBVEC) {
-        // imm should be adapted to ISD::INSERT_SUBVECTOR behavior
-        assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
-        unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
-        Imm *= Src2.getSimpleValueType().getVectorNumElements();
-        Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
-      }
 
       // We specify 2 possible opcodes for intrinsics with rounding modes.
       // First, we check if the intrinsic may have non-default rounding mode,
@@ -28693,6 +28719,29 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
     return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
                                     Op.getOperand(2));
   }
+  case ISD::INSERT_SUBVECTOR: {
+    unsigned EltSize = EltVT.getSizeInBits();
+    if (EltSize != 32 && EltSize != 64)
+      return false;
+    MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
+    // Only change element size, not type.
+    if (VT.isInteger() != OpEltVT.isInteger())
+      return false;
+    uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+    Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
+    SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0));
+    DCI.AddToWorklist(Op0.getNode());
+    // Op1 needs to be bitcasted to a smaller vector with the same element type.
+    SDValue Op1 = Op.getOperand(1);
+    MVT Op1VT = MVT::getVectorVT(EltVT,
+                            Op1.getSimpleValueType().getSizeInBits() / EltSize);
+    Op1 = DAG.getBitcast(Op1VT, Op1);
+    DCI.AddToWorklist(Op1.getNode());
+    DCI.CombineTo(OrigOp.getNode(),
+                  DAG.getNode(Opcode, DL, VT, Op0, Op1,
+                              DAG.getConstant(Imm, DL, MVT::i8)));
+    return true;
+  }
   }
 
   return false;
@@ -31784,6 +31833,83 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
+/// the codegen.
+/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
+static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
+                                          const X86Subtarget &Subtarget,
+                                          SDLoc &DL) {
+  assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
+  SDValue Src = N->getOperand(0);
+  unsigned Opcode = Src.getOpcode();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  EVT VT = N->getValueType(0);
+  EVT SrcVT = Src.getValueType();
+
+  auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) {
+    // TODO: Add extra cases where we can truncate both inputs for the
+    // cost of one (or none).
+    // e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y )
+    if (Op0 == Op1)
+      return true;
+
+    SDValue BC0 = peekThroughOneUseBitcasts(Op0);
+    SDValue BC1 = peekThroughOneUseBitcasts(Op1);
+    return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
+           ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
+  };
+
+  auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
+    SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
+    SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1);
+  };
+
+  // Don't combine if the operation has other uses.
+  if (!N->isOnlyUserOf(Src.getNode()))
+    return SDValue();
+
+  // Only support vector truncation for now.
+  // TODO: i64 scalar math would benefit as well.
+  if (!VT.isVector())
+    return SDValue();
+
+  // In most cases its only worth pre-truncating if we're only facing the cost
+  // of one truncation.
+  // i.e. if one of the inputs will constant fold or the input is repeated.
+  switch (Opcode) {
+  case ISD::AND:
+  case ISD::XOR:
+  case ISD::OR: {
+    SDValue Op0 = Src.getOperand(0);
+    SDValue Op1 = Src.getOperand(1);
+    if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
+        IsRepeatedOpOrOneUseConstant(Op0, Op1))
+      return TruncateArithmetic(Op0, Op1);
+    break;
+  }
+
+  case ISD::MUL:
+    // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
+    // better to truncate if we have the chance.
+    if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
+        !TLI.isOperationLegal(Opcode, SrcVT))
+      return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
+    LLVM_FALLTHROUGH;
+  case ISD::ADD: {
+    SDValue Op0 = Src.getOperand(0);
+    SDValue Op1 = Src.getOperand(1);
+    if (TLI.isOperationLegal(Opcode, VT) &&
+        IsRepeatedOpOrOneUseConstant(Op0, Op1))
+      return TruncateArithmetic(Op0, Op1);
+    break;
+  }
+  }
+
+  return SDValue();
+}
+
 /// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
 static SDValue
 combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
@@ -31970,6 +32096,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
   SDValue Src = N->getOperand(0);
   SDLoc DL(N);
 
+  // Attempt to pre-truncate inputs to arithmetic ops instead.
+  if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
+    return V;
+
   // Try to detect AVG pattern first.
   if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
     return Avg;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index da7437ea0ccb..908053e1342d 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -650,33 +650,6 @@ multiclass vextract_for_size<int Opcode,
                                 From.ZSuffix # "rrkz")
                 To.KRCWM:$mask, From.RC:$src1,
                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
-
-  // Intrinsic call with masking.
-  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x" # To.NumElts # "_" # From.Size)
-                From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
-            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
-                                From.ZSuffix # "rrk")
-                To.RC:$src0,
-                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
-                From.RC:$src1, imm:$idx)>;
-
-  // Intrinsic call with zero-masking.
-  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x" # To.NumElts # "_" # From.Size)
-                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
-            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
-                                From.ZSuffix # "rrkz")
-                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
-                From.RC:$src1, imm:$idx)>;
-
-  // Intrinsic call without masking.
-  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x" # To.NumElts # "_" # From.Size)
-                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
-            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
-                                From.ZSuffix # "rr")
-                From.RC:$src1, imm:$idx)>;
 }
 
 // Codegen pattern for the alternative types
@@ -6871,18 +6844,18 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
                                     VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
   }
   let isCodeGenOnly = 1 in {
-    defm Int_VUCOMISSZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
-                              load, "ucomiss">, PS, EVEX, VEX_LIG,
+    defm Int_VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
+                              sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG,
                               EVEX_CD8<32, CD8VT1>;
-    defm Int_VUCOMISDZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
-                              load, "ucomisd">, PD, EVEX,
+    defm Int_VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
+                              sse_load_f64, "ucomisd">, PD, EVEX,
                               VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
 
-    defm Int_VCOMISSZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
-                              load, "comiss">, PS, EVEX, VEX_LIG,
+    defm Int_VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
+                              sse_load_f32, "comiss">, PS, EVEX, VEX_LIG,
                               EVEX_CD8<32, CD8VT1>;
-    defm Int_VCOMISDZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
-                              load, "comisd">, PD, EVEX,
+    defm Int_VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
+                              sse_load_f64, "comisd">, PD, EVEX,
                               VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
   }
 }
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 9d6a89363044..4cd6ae563f03 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2373,6 +2373,23 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
           Sched<[WriteFAddLd, ReadAfterLd]>;
 }
 
+// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
+multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
+                            ValueType vt, Operand memop,
+                            ComplexPattern mem_cpat, string OpcodeStr> {
+  def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
+                     IIC_SSE_COMIS_RR>,
+          Sched<[WriteFAdd]>;
+  def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1),
+                                           mem_cpat:$src2))],
+                                           IIC_SSE_COMIS_RM>,
+          Sched<[WriteFAddLd, ReadAfterLd]>;
+}
+
 let Defs = [EFLAGS] in {
   defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
                                   "ucomiss">, PS, VEX, VEX_LIG;
@@ -2386,15 +2403,15 @@ let Defs = [EFLAGS] in {
   }
 
   let isCodeGenOnly = 1 in {
-    defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
-                              load, "ucomiss">, PS, VEX;
-    defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
-                              load, "ucomisd">, PD, VEX;
-
-    defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
-                              load, "comiss">, PS, VEX;
-    defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
-                              load, "comisd">, PD, VEX;
+    defm Int_VUCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
+                              sse_load_f32, "ucomiss">, PS, VEX;
+    defm Int_VUCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
+                              sse_load_f64, "ucomisd">, PD, VEX;
+
+    defm Int_VCOMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
+                              sse_load_f32, "comiss">, PS, VEX;
+    defm Int_VCOMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
+                              sse_load_f64, "comisd">, PD, VEX;
   }
   defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
                                   "ucomiss">, PS;
@@ -2409,15 +2426,15 @@ let Defs = [EFLAGS] in {
   }
 
   let isCodeGenOnly = 1 in {
-    defm Int_UCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
-                                load, "ucomiss">, PS;
-    defm Int_UCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
-                                load, "ucomisd">, PD;
-
-    defm Int_COMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
-                                    "comiss">, PS;
-    defm Int_COMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
-                                    "comisd">, PD;
+    defm Int_UCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
+                                sse_load_f32, "ucomiss">, PS;
+    defm Int_UCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
+                                sse_load_f64, "ucomisd">, PD;
+
+    defm Int_COMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
+                                    sse_load_f32, "comiss">, PS;
+    defm Int_COMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
+                                    sse_load_f64, "comisd">, PD;
   }
 } // Defs = [EFLAGS]
 
diff --git a/lib/Target/X86/X86InstrTablesInfo.h b/lib/Target/X86/X86InstrTablesInfo.h
index 5d2af829028a..415a891bfd97 100755
--- a/lib/Target/X86/X86InstrTablesInfo.h
+++ b/lib/Target/X86/X86InstrTablesInfo.h
@@ -1,4 +1,4 @@
-//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===//
+//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -25,8 +25,7 @@ struct X86EvexToVexCompressTableEntry {
 
 // X86 EVEX encoded instructions that have a VEX 128 encoding
 // (table format: <EVEX opcode, VEX-128 opcode>).
-static const X86EvexToVexCompressTableEntry 
-   X86EvexToVex128CompressTable[] = {
+static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = {
   // EVEX scalar with corresponding VEX.
   { X86::Int_VCOMISDZrm         ,  X86::Int_VCOMISDrm            },
   { X86::Int_VCOMISDZrr         ,  X86::Int_VCOMISDrr            },
@@ -250,20 +249,20 @@ static const X86EvexToVexCompressTableEntry
   { X86::VUCOMISDZrr            ,  X86::VUCOMISDrr               },
   { X86::VUCOMISSZrm            ,  X86::VUCOMISSrm               },
   { X86::VUCOMISSZrr            ,  X86::VUCOMISSrr               },
-                                                                               
+
   { X86::VMOV64toPQIZrr         ,   X86::VMOV64toPQIrr           },
   { X86::VMOV64toSDZrr          ,   X86::VMOV64toSDrr            },
   { X86::VMOVDI2PDIZrm          ,   X86::VMOVDI2PDIrm            },
   { X86::VMOVDI2PDIZrr          ,   X86::VMOVDI2PDIrr            },
   { X86::VMOVLHPSZrr            ,   X86::VMOVLHPSrr              },
-  { X86::VMOVHLPSZrr            ,   X86::VMOVHLPSrr              },  
+  { X86::VMOVHLPSZrr            ,   X86::VMOVHLPSrr              },
   { X86::VMOVPDI2DIZmr          ,   X86::VMOVPDI2DImr            },
   { X86::VMOVPDI2DIZrr          ,   X86::VMOVPDI2DIrr            },
   { X86::VMOVPQI2QIZmr          ,   X86::VMOVPQI2QImr            },
   { X86::VMOVPQIto64Zrr         ,   X86::VMOVPQIto64rr           },
   { X86::VMOVQI2PQIZrm          ,   X86::VMOVQI2PQIrm            },
   { X86::VMOVZPQILo2PQIZrr      ,   X86::VMOVZPQILo2PQIrr        },
-                                                                               
+
   { X86::VPEXTRBZmr             ,   X86::VPEXTRBmr               },
   { X86::VPEXTRBZrr             ,   X86::VPEXTRBrr               },
   { X86::VPEXTRDZmr             ,   X86::VPEXTRDmr               },
@@ -272,7 +271,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPEXTRQZrr             ,   X86::VPEXTRQrr               },
   { X86::VPEXTRWZmr             ,   X86::VPEXTRWmr               },
   { X86::VPEXTRWZrr             ,   X86::VPEXTRWri               },
-                                                                               
+
   { X86::VPINSRBZrm             ,   X86::VPINSRBrm               },
   { X86::VPINSRBZrr             ,   X86::VPINSRBrr               },
   { X86::VPINSRDZrm             ,   X86::VPINSRDrm               },
@@ -294,7 +293,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VANDPDZ128rm           ,    X86::VANDPDrm               },
   { X86::VANDPDZ128rr           ,    X86::VANDPDrr               },
   { X86::VANDPSZ128rm           ,    X86::VANDPSrm               },
-  { X86::VANDPSZ128rr           ,    X86::VANDPSrr               },      
+  { X86::VANDPSZ128rr           ,    X86::VANDPSrr               },
   { X86::VBROADCASTSSZ128m      ,    X86::VBROADCASTSSrm         },
   { X86::VBROADCASTSSZ128r      ,    X86::VBROADCASTSSrr         },
   { X86::VBROADCASTSSZ128r_s    ,    X86::VBROADCASTSSrr         },
@@ -414,8 +413,8 @@ static const X86EvexToVexCompressTableEntry
   { X86::VMOVAPDZ128rm          ,    X86::VMOVAPDrm              },
   { X86::VMOVAPDZ128rr          ,    X86::VMOVAPDrr              },
   { X86::VMOVAPDZ128rr_REV      ,    X86::VMOVAPDrr_REV          },
-  { X86::VMOVAPSZ128mr          ,    X86::VMOVAPSmr              },    
-  { X86::VMOVAPSZ128rm          ,    X86::VMOVAPSrm              },    
+  { X86::VMOVAPSZ128mr          ,    X86::VMOVAPSmr              },
+  { X86::VMOVAPSZ128rm          ,    X86::VMOVAPSrm              },
   { X86::VMOVAPSZ128rr          ,    X86::VMOVAPSrr              },
   { X86::VMOVAPSZ128rr_REV      ,    X86::VMOVAPSrr_REV          },
   { X86::VMOVDDUPZ128rm         ,    X86::VMOVDDUPrm             },
@@ -464,8 +463,8 @@ static const X86EvexToVexCompressTableEntry
   { X86::VMOVUPDZ128rm          ,    X86::VMOVUPDrm              },
   { X86::VMOVUPDZ128rr          ,    X86::VMOVUPDrr              },
   { X86::VMOVUPDZ128rr_REV      ,    X86::VMOVUPDrr_REV          },
-  { X86::VMOVUPSZ128mr          ,    X86::VMOVUPSmr              },    
-  { X86::VMOVUPSZ128rm          ,    X86::VMOVUPSrm              },    
+  { X86::VMOVUPSZ128mr          ,    X86::VMOVUPSmr              },
+  { X86::VMOVUPSZ128rm          ,    X86::VMOVUPSrm              },
   { X86::VMOVUPSZ128rr          ,    X86::VMOVUPSrr              },
   { X86::VMOVUPSZ128rr_REV      ,    X86::VMOVUPSrr_REV          },
   { X86::VMULPDZ128rm           ,    X86::VMULPDrm               },
@@ -520,9 +519,9 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPBROADCASTBZ128r      ,    X86::VPBROADCASTBrr         },
   { X86::VPBROADCASTDZ128m      ,    X86::VPBROADCASTDrm         },
   { X86::VPBROADCASTDZ128r      ,    X86::VPBROADCASTDrr         },
-  { X86::VPBROADCASTQZ128m      ,    X86::VPBROADCASTQrm         }, 
-  { X86::VPBROADCASTQZ128r      ,    X86::VPBROADCASTQrr         }, 
-  { X86::VPBROADCASTWZ128m      ,    X86::VPBROADCASTWrm         },    
+  { X86::VPBROADCASTQZ128m      ,    X86::VPBROADCASTQrm         },
+  { X86::VPBROADCASTQZ128r      ,    X86::VPBROADCASTQrr         },
+  { X86::VPBROADCASTWZ128m      ,    X86::VPBROADCASTWrm         },
   { X86::VPBROADCASTWZ128r      ,    X86::VPBROADCASTWrr         },
   { X86::VPERMILPDZ128mi        ,    X86::VPERMILPDmi            },
   { X86::VPERMILPDZ128ri        ,    X86::VPERMILPDri            },
@@ -583,7 +582,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPMOVZXWDZ128rm        ,    X86::VPMOVZXWDrm            },
   { X86::VPMOVZXWDZ128rr        ,    X86::VPMOVZXWDrr            },
   { X86::VPMOVZXWQZ128rm        ,    X86::VPMOVZXWQrm            },
-  { X86::VPMOVZXWQZ128rr        ,    X86::VPMOVZXWQrr            },    
+  { X86::VPMOVZXWQZ128rr        ,    X86::VPMOVZXWQrr            },
   { X86::VPMULDQZ128rm          ,    X86::VPMULDQrm              },
   { X86::VPMULDQZ128rr          ,    X86::VPMULDQrr              },
   { X86::VPMULHRSWZ128rm        ,    X86::VPMULHRSWrm            },
@@ -612,10 +611,10 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPSHUFHWZ128ri         ,    X86::VPSHUFHWri             },
   { X86::VPSHUFLWZ128mi         ,    X86::VPSHUFLWmi             },
   { X86::VPSHUFLWZ128ri         ,    X86::VPSHUFLWri             },
-  { X86::VPSLLDQZ128rr          ,    X86::VPSLLDQri              },    
+  { X86::VPSLLDQZ128rr          ,    X86::VPSLLDQri              },
   { X86::VPSLLDZ128ri           ,    X86::VPSLLDri               },
   { X86::VPSLLDZ128rm           ,    X86::VPSLLDrm               },
-  { X86::VPSLLDZ128rr           ,    X86::VPSLLDrr               },    
+  { X86::VPSLLDZ128rr           ,    X86::VPSLLDrr               },
   { X86::VPSLLQZ128ri           ,    X86::VPSLLQri               },
   { X86::VPSLLQZ128rm           ,    X86::VPSLLQrm               },
   { X86::VPSLLQZ128rr           ,    X86::VPSLLQrr               },
@@ -713,8 +712,7 @@ static const X86EvexToVexCompressTableEntry
 
 // X86 EVEX encoded instructions that have a VEX 256 encoding
 // (table format: <EVEX opcode, VEX-256 opcode>).
- static const X86EvexToVexCompressTableEntry 
-   X86EvexToVex256CompressTable[] = {
+ static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = {
   { X86::VADDPDZ256rm           ,     X86::VADDPDYrm             },
   { X86::VADDPDZ256rr           ,     X86::VADDPDYrr             },
   { X86::VADDPSZ256rm           ,     X86::VADDPSYrm             },
@@ -727,11 +725,11 @@ static const X86EvexToVexCompressTableEntry
   { X86::VANDPDZ256rr           ,     X86::VANDPDYrr             },
   { X86::VANDPSZ256rm           ,     X86::VANDPSYrm             },
   { X86::VANDPSZ256rr           ,     X86::VANDPSYrr             },
-  { X86::VBROADCASTSDZ256m      ,     X86::VBROADCASTSDYrm       }, 
-  { X86::VBROADCASTSDZ256r      ,     X86::VBROADCASTSDYrr       }, 
-  { X86::VBROADCASTSDZ256r_s    ,     X86::VBROADCASTSDYrr       }, 
+  { X86::VBROADCASTSDZ256m      ,     X86::VBROADCASTSDYrm       },
+  { X86::VBROADCASTSDZ256r      ,     X86::VBROADCASTSDYrr       },
+  { X86::VBROADCASTSDZ256r_s    ,     X86::VBROADCASTSDYrr       },
   { X86::VBROADCASTSSZ256m      ,     X86::VBROADCASTSSYrm       },
-  { X86::VBROADCASTSSZ256r      ,     X86::VBROADCASTSSYrr       }, 
+  { X86::VBROADCASTSSZ256r      ,     X86::VBROADCASTSSYrr       },
   { X86::VBROADCASTSSZ256r_s    ,     X86::VBROADCASTSSYrr       },
   { X86::VCVTDQ2PDZ256rm        ,     X86::VCVTDQ2PDYrm          },
   { X86::VCVTDQ2PDZ256rr        ,     X86::VCVTDQ2PDYrr          },
@@ -757,6 +755,14 @@ static const X86EvexToVexCompressTableEntry
   { X86::VDIVPDZ256rr           ,     X86::VDIVPDYrr             },
   { X86::VDIVPSZ256rm           ,     X86::VDIVPSYrm             },
   { X86::VDIVPSZ256rr           ,     X86::VDIVPSYrr             },
+  { X86::VEXTRACTF32x4Z256mr    ,    X86::VEXTRACTF128mr         },
+  { X86::VEXTRACTF64x2Z256mr    ,    X86::VEXTRACTF128mr         },
+  { X86::VEXTRACTF32x4Z256rr    ,    X86::VEXTRACTF128rr         },
+  { X86::VEXTRACTF64x2Z256rr    ,    X86::VEXTRACTF128rr         },
+  { X86::VEXTRACTI32x4Z256mr    ,    X86::VEXTRACTI128mr         },
+  { X86::VEXTRACTI64x2Z256mr    ,    X86::VEXTRACTI128mr         },
+  { X86::VEXTRACTI32x4Z256rr    ,    X86::VEXTRACTI128rr         },
+  { X86::VEXTRACTI64x2Z256rr    ,    X86::VEXTRACTI128rr         },
   { X86::VFMADD132PDZ256m       ,     X86::VFMADD132PDYm         },
   { X86::VFMADD132PDZ256r       ,     X86::VFMADD132PDYr         },
   { X86::VFMADD132PSZ256m       ,     X86::VFMADD132PSYm         },
@@ -829,6 +835,14 @@ static const X86EvexToVexCompressTableEntry
   { X86::VFNMSUB231PDZ256r      ,     X86::VFNMSUB231PDYr        },
   { X86::VFNMSUB231PSZ256m      ,     X86::VFNMSUB231PSYm        },
   { X86::VFNMSUB231PSZ256r      ,     X86::VFNMSUB231PSYr        },
+  { X86::VINSERTF32x4Z256rm     ,    X86::VINSERTF128rm          },
+  { X86::VINSERTF64x2Z256rm     ,    X86::VINSERTF128rm          },
+  { X86::VINSERTF32x4Z256rr     ,    X86::VINSERTF128rr          },
+  { X86::VINSERTF64x2Z256rr     ,    X86::VINSERTF128rr          },
+  { X86::VINSERTI32x4Z256rm     ,    X86::VINSERTI128rm          },
+  { X86::VINSERTI64x2Z256rm     ,    X86::VINSERTI128rm          },
+  { X86::VINSERTI32x4Z256rr     ,    X86::VINSERTI128rr          },
+  { X86::VINSERTI64x2Z256rr     ,    X86::VINSERTI128rr          },
   { X86::VMAXCPDZ256rm          ,     X86::VMAXCPDYrm            },
   { X86::VMAXCPDZ256rr          ,     X86::VMAXCPDYrr            },
   { X86::VMAXCPSZ256rm          ,     X86::VMAXCPSYrm            },
@@ -849,8 +863,8 @@ static const X86EvexToVexCompressTableEntry
   { X86::VMOVAPDZ256rm          ,     X86::VMOVAPDYrm            },
   { X86::VMOVAPDZ256rr          ,     X86::VMOVAPDYrr            },
   { X86::VMOVAPDZ256rr_REV      ,     X86::VMOVAPDYrr_REV        },
-  { X86::VMOVAPSZ256mr          ,     X86::VMOVAPSYmr            },    
-  { X86::VMOVAPSZ256rm          ,     X86::VMOVAPSYrm            },    
+  { X86::VMOVAPSZ256mr          ,     X86::VMOVAPSYmr            },
+  { X86::VMOVAPSZ256rm          ,     X86::VMOVAPSYrm            },
   { X86::VMOVAPSZ256rr          ,     X86::VMOVAPSYrr            },
   { X86::VMOVAPSZ256rr_REV      ,     X86::VMOVAPSYrr_REV        },
   { X86::VMOVDDUPZ256rm         ,     X86::VMOVDDUPYrm           },
@@ -943,14 +957,14 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPAVGBZ256rr           ,     X86::VPAVGBYrr             },
   { X86::VPAVGWZ256rm           ,     X86::VPAVGWYrm             },
   { X86::VPAVGWZ256rr           ,     X86::VPAVGWYrr             },
-  { X86::VPBROADCASTBZ256m      ,     X86::VPBROADCASTBYrm       }, 
-  { X86::VPBROADCASTBZ256r      ,     X86::VPBROADCASTBYrr       },   
-  { X86::VPBROADCASTDZ256m      ,     X86::VPBROADCASTDYrm       }, 
-  { X86::VPBROADCASTDZ256r      ,     X86::VPBROADCASTDYrr       },   
-  { X86::VPBROADCASTQZ256m      ,     X86::VPBROADCASTQYrm       }, 
-  { X86::VPBROADCASTQZ256r      ,     X86::VPBROADCASTQYrr       }, 
-  { X86::VPBROADCASTWZ256m      ,     X86::VPBROADCASTWYrm       }, 
-  { X86::VPBROADCASTWZ256r      ,     X86::VPBROADCASTWYrr       }, 
+  { X86::VPBROADCASTBZ256m      ,     X86::VPBROADCASTBYrm       },
+  { X86::VPBROADCASTBZ256r      ,     X86::VPBROADCASTBYrr       },
+  { X86::VPBROADCASTDZ256m      ,     X86::VPBROADCASTDYrm       },
+  { X86::VPBROADCASTDZ256r      ,     X86::VPBROADCASTDYrr       },
+  { X86::VPBROADCASTQZ256m      ,     X86::VPBROADCASTQYrm       },
+  { X86::VPBROADCASTQZ256r      ,     X86::VPBROADCASTQYrr       },
+  { X86::VPBROADCASTWZ256m      ,     X86::VPBROADCASTWYrm       },
+  { X86::VPBROADCASTWZ256r      ,     X86::VPBROADCASTWYrr       },
   { X86::VPERMDZ256rm           ,     X86::VPERMDYrm             },
   { X86::VPERMDZ256rr           ,     X86::VPERMDYrr             },
   { X86::VPERMILPDZ256mi        ,     X86::VPERMILPDYmi          },
@@ -1050,7 +1064,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPSLLDQZ256rr          ,     X86::VPSLLDQYri            },
   { X86::VPSLLDZ256ri           ,     X86::VPSLLDYri             },
   { X86::VPSLLDZ256rm           ,     X86::VPSLLDYrm             },
-  { X86::VPSLLDZ256rr           ,     X86::VPSLLDYrr             },    
+  { X86::VPSLLDZ256rr           ,     X86::VPSLLDYrr             },
   { X86::VPSLLQZ256ri           ,     X86::VPSLLQYri             },
   { X86::VPSLLQZ256rm           ,     X86::VPSLLQYrm             },
   { X86::VPSLLQZ256rr           ,     X86::VPSLLQYrr             },
@@ -1060,7 +1074,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPSLLVQZ256rr          ,     X86::VPSLLVQYrr            },
   { X86::VPSLLWZ256ri           ,     X86::VPSLLWYri             },
   { X86::VPSLLWZ256rm           ,     X86::VPSLLWYrm             },
-  { X86::VPSLLWZ256rr           ,     X86::VPSLLWYrr             },    
+  { X86::VPSLLWZ256rr           ,     X86::VPSLLWYrr             },
   { X86::VPSRADZ256ri           ,     X86::VPSRADYri             },
   { X86::VPSRADZ256rm           ,     X86::VPSRADYrm             },
   { X86::VPSRADZ256rr           ,     X86::VPSRADYrr             },
@@ -1072,7 +1086,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPSRLDQZ256rr          ,     X86::VPSRLDQYri            },
   { X86::VPSRLDZ256ri           ,     X86::VPSRLDYri             },
   { X86::VPSRLDZ256rm           ,     X86::VPSRLDYrm             },
-  { X86::VPSRLDZ256rr           ,     X86::VPSRLDYrr             },   
+  { X86::VPSRLDZ256rr           ,     X86::VPSRLDYrr             },
   { X86::VPSRLQZ256ri           ,     X86::VPSRLQYri             },
   { X86::VPSRLQZ256rm           ,     X86::VPSRLQYrm             },
   { X86::VPSRLQZ256rr           ,     X86::VPSRLQYrr             },
@@ -1145,4 +1159,4 @@ static const X86EvexToVexCompressTableEntry
   { X86::VXORPSZ256rr           ,     X86::VXORPSYrr             },
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index df47b4ad583d..63a02af02faa 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t {
   INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
   COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC,
   TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
-  EXPAND_FROM_MEM, INSERT_SUBVEC,
+  EXPAND_FROM_MEM,
   TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
   FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
 };
@@ -795,30 +795,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VGETMANTS, 0),
   X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
                      X86ISD::VGETMANTS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
                      ISD::CTLZ, 0),
   X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 2f69df064e7f..a38a4b30b77d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -1115,56 +1115,6 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLo
   OutStreamer->EmitInstruction(TC, getSubtargetInfo());
 }
 
-void X86AsmPrinter::EmitXRayTable() {
-  if (Sleds.empty())
-    return;
-  
-  auto PrevSection = OutStreamer->getCurrentSectionOnly();
-  auto Fn = MF->getFunction();
-  MCSection *Section = nullptr;
-  if (Subtarget->isTargetELF()) {
-    if (Fn->hasComdat()) {
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                         ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
-                                         Fn->getComdat()->getName());
-    } else {
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                         ELF::SHF_ALLOC);
-    }
-  } else if (Subtarget->isTargetMachO()) {
-    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
-                                         SectionKind::getReadOnlyWithRel());
-  } else {
-    llvm_unreachable("Unsupported target");
-  }
-
-  // Before we switch over, we force a reference to a label inside the
-  // xray_instr_map section. Since EmitXRayTable() is always called just
-  // before the function's end, we assume that this is happening after the
-  // last return instruction.
-  //
-  // We then align the reference to 16 byte boundaries, which we determined
-  // experimentally to be beneficial to avoid causing decoder stalls.
-  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
-  OutStreamer->EmitCodeAlignment(16);
-  OutStreamer->EmitSymbolValue(Tmp, 8, false);
-  OutStreamer->SwitchSection(Section);
-  OutStreamer->EmitLabel(Tmp);
-  for (const auto &Sled : Sleds) {
-    OutStreamer->EmitSymbolValue(Sled.Sled, 8);
-    OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
-    auto Kind = static_cast<uint8_t>(Sled.Kind);
-    OutStreamer->EmitBytes(
-        StringRef(reinterpret_cast<const char *>(&Kind), 1));
-    OutStreamer->EmitBytes(
-        StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
-    OutStreamer->EmitZeros(14);
-  }
-  OutStreamer->SwitchSection(PrevSection);
-
-  Sleds.clear();
-}
-
 // Returns instruction preceding MBBI in MachineFunction.
 // If MBBI is the first instruction of the first basic block, returns null.
 static MachineBasicBlock::const_iterator
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 2b0e672d56f2..d7792e296a58 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -598,198 +598,136 @@ int X86TTIImpl::getArithmeticInstrCost(
 
 int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                Type *SubTp) {
-
-  if (Kind == TTI::SK_Reverse) {
+  if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) {
+    // 64-bit packed float vectors (v2f32) are widened to type v4f32.
+    // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
 
     static const CostTblEntry AVX512VBMIShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v64i8,  1 }, // vpermb
-      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  1 }  // vpermb
+      { TTI::SK_Reverse, MVT::v64i8,  1 }, // vpermb
+      { TTI::SK_Reverse, MVT::v32i8,  1 }  // vpermb
     };
 
     if (ST->hasVBMI())
-      if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry =
+              CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
     static const CostTblEntry AVX512BWShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw
-      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw
-      { ISD::VECTOR_SHUFFLE, MVT::v64i8,  6 }  // vextracti64x4 + 2*vperm2i128
-                                               // + 2*pshufb + vinserti64x4
+      { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
+      { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
+      { TTI::SK_Reverse, MVT::v64i8,  6 }  // vextracti64x4 + 2*vperm2i128
+                                           // + 2*pshufb + vinserti64x4
     };
 
     if (ST->hasBWI())
-      if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry =
+              CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
     static const CostTblEntry AVX512ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v8f64,  1 }, // vpermpd
-      { ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps
-      { ISD::VECTOR_SHUFFLE, MVT::v8i64,  1 }, // vpermq
-      { ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd
+      { TTI::SK_Reverse, MVT::v8f64,  1 }, // vpermpd
+      { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
+      { TTI::SK_Reverse, MVT::v8i64,  1 }, // vpermq
+      { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
     };
 
     if (ST->hasAVX512())
       if (const auto *Entry =
-              CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+              CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
     static const CostTblEntry AVX2ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v4f64,  1 }, // vpermpd
-      { ISD::VECTOR_SHUFFLE, MVT::v8f32,  1 }, // vpermps
-      { ISD::VECTOR_SHUFFLE, MVT::v4i64,  1 }, // vpermq
-      { ISD::VECTOR_SHUFFLE, MVT::v8i32,  1 }, // vpermd
-      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb
-      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  2 }  // vperm2i128 + pshufb
+      { TTI::SK_Reverse,   MVT::v4f64,  1 }, // vpermpd
+      { TTI::SK_Reverse,   MVT::v8f32,  1 }, // vpermps
+      { TTI::SK_Reverse,   MVT::v4i64,  1 }, // vpermq
+      { TTI::SK_Reverse,   MVT::v8i32,  1 }, // vpermd
+      { TTI::SK_Reverse,   MVT::v16i16, 2 }, // vperm2i128 + pshufb
+      { TTI::SK_Reverse,   MVT::v32i8,  2 }, // vperm2i128 + pshufb
+
+      { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
+      { TTI::SK_Alternate, MVT::v32i8,  1 }  // vpblendvb
     };
 
     if (ST->hasAVX2())
-      if (const auto *Entry =
-              CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
     static const CostTblEntry AVX1ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
-      { ISD::VECTOR_SHUFFLE, MVT::v8f32,  2 }, // vperm2f128 + vpermilps
-      { ISD::VECTOR_SHUFFLE, MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
-      { ISD::VECTOR_SHUFFLE, MVT::v8i32,  2 }, // vperm2f128 + vpermilps
-      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
-                                               // + vinsertf128
-      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  4 }  // vextractf128 + 2*pshufb
-                                               // + vinsertf128
+      { TTI::SK_Reverse,   MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
+      { TTI::SK_Reverse,   MVT::v8f32,  2 }, // vperm2f128 + vpermilps
+      { TTI::SK_Reverse,   MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
+      { TTI::SK_Reverse,   MVT::v8i32,  2 }, // vperm2f128 + vpermilps
+      { TTI::SK_Reverse,   MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
+                                             // + vinsertf128
+      { TTI::SK_Reverse,   MVT::v32i8,  4 }, // vextractf128 + 2*pshufb
+                                             // + vinsertf128
+
+      { TTI::SK_Alternate, MVT::v4i64,  1 }, // vblendpd
+      { TTI::SK_Alternate, MVT::v4f64,  1 }, // vblendpd
+      { TTI::SK_Alternate, MVT::v8i32,  1 }, // vblendps
+      { TTI::SK_Alternate, MVT::v8f32,  1 }, // vblendps
+      { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
+      { TTI::SK_Alternate, MVT::v32i8,  3 }  // vpand + vpandn + vpor
     };
 
     if (ST->hasAVX())
-      if (const auto *Entry =
-              CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry SSE41ShuffleTbl[] = {
+      { TTI::SK_Alternate, MVT::v2i64,  1 }, // pblendw
+      { TTI::SK_Alternate, MVT::v2f64,  1 }, // movsd
+      { TTI::SK_Alternate, MVT::v4i32,  1 }, // pblendw
+      { TTI::SK_Alternate, MVT::v4f32,  1 }, // blendps
+      { TTI::SK_Alternate, MVT::v8i16,  1 }, // pblendw
+      { TTI::SK_Alternate, MVT::v16i8,  1 }  // pblendvb
+    };
+
+    if (ST->hasSSE41())
+      if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
     static const CostTblEntry SSSE3ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb
-      { ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 }  // pshufb
+      { TTI::SK_Reverse,   MVT::v8i16,  1 }, // pshufb
+      { TTI::SK_Reverse,   MVT::v16i8,  1 }, // pshufb
+
+      { TTI::SK_Alternate, MVT::v8i16,  3 }, // pshufb + pshufb + por
+      { TTI::SK_Alternate, MVT::v16i8,  3 }  // pshufb + pshufb + por
     };
 
     if (ST->hasSSSE3())
-      if (const auto *Entry =
-              CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
     static const CostTblEntry SSE2ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd
-      { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd
-      { ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd
-      { ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw  + pshufd
-      { ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 }  // 2*pshuflw + 2*pshufhw
-                                              // + 2*pshufd + 2*unpck + packus
+      { TTI::SK_Reverse,   MVT::v2f64,  1 }, // shufpd
+      { TTI::SK_Reverse,   MVT::v2i64,  1 }, // pshufd
+      { TTI::SK_Reverse,   MVT::v4i32,  1 }, // pshufd
+      { TTI::SK_Reverse,   MVT::v8i16,  3 }, // pshuflw + pshufhw  + pshufd
+      { TTI::SK_Reverse,   MVT::v16i8,  9 }, // 2*pshuflw + 2*pshufhw
+                                             // + 2*pshufd + 2*unpck + packus
+
+      { TTI::SK_Alternate, MVT::v2i64,  1 }, // movsd
+      { TTI::SK_Alternate, MVT::v2f64,  1 }, // movsd
+      { TTI::SK_Alternate, MVT::v4i32,  2 }, // 2*shufps
+      { TTI::SK_Alternate, MVT::v8i16,  3 }, // pand + pandn + por
+      { TTI::SK_Alternate, MVT::v16i8,  3 }  // pand + pandn + por
     };
 
     if (ST->hasSSE2())
-      if (const auto *Entry =
-              CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
     static const CostTblEntry SSE1ShuffleTbl[] = {
-        { ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps
+      { TTI::SK_Reverse,   MVT::v4f32,  1 }, // shufps
+      { TTI::SK_Alternate, MVT::v4f32,  2 }  // 2*shufps
     };
 
     if (ST->hasSSE1())
-      if (const auto *Entry =
-              CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
-  } else if (Kind == TTI::SK_Alternate) {
-    // 64-bit packed float vectors (v2f32) are widened to type v4f32.
-    // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
-    // The backend knows how to generate a single VEX.256 version of
-    // instruction VPBLENDW if the target supports AVX2.
-    if (ST->hasAVX2() && LT.second == MVT::v16i16)
-      return LT.first;
-
-    static const CostTblEntry AVXAltShuffleTbl[] = {
-      {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1},  // vblendpd
-      {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1},  // vblendpd
-
-      {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1},  // vblendps
-      {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1},  // vblendps
-
-      // This shuffle is custom lowered into a sequence of:
-      //  2x  vextractf128 , 2x vpblendw , 1x vinsertf128
-      {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
-
-      // This shuffle is custom lowered into a long sequence of:
-      //  2x vextractf128 , 4x vpshufb , 2x vpor ,  1x vinsertf128
-      {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
-    };
-
-    if (ST->hasAVX())
-      if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry SSE41AltShuffleTbl[] = {
-      // These are lowered into movsd.
-      {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
-      {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
-
-      // packed float vectors with four elements are lowered into BLENDI dag
-      // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
-      {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
-      {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
-
-      // This shuffle generates a single pshufw.
-      {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
-
-      // There is no instruction that matches a v16i8 alternate shuffle.
-      // The backend will expand it into the sequence 'pshufb + pshufb + or'.
-      {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
-    };
-
-    if (ST->hasSSE41())
-      if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE,
-                                              LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry SSSE3AltShuffleTbl[] = {
-      {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},  // movsd
-      {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},  // movsd
-
-      // SSE3 doesn't have 'blendps'. The following shuffles are expanded into
-      // the sequence 'shufps + pshufd'
-      {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
-      {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
-
-      {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
-      {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}  // pshufb + pshufb + or
-    };
-
-    if (ST->hasSSSE3())
-      if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry SSEAltShuffleTbl[] = {
-      {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},  // movsd
-      {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},  // movsd
-
-      {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
-      {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
-
-      // This is expanded into a long sequence of four extract + four insert.
-      {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
-
-      // 8 x (pinsrw + pextrw + and + movb + movzb + or)
-      {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
-    };
-
-    // Fall-back (SSE3 and SSE2).
-    if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
-                                            ISD::VECTOR_SHUFFLE, LT.second))
-      return LT.first * Entry->Cost;
-
   } else if (Kind == TTI::SK_PermuteTwoSrc) {
     // We assume that source and destination have the same vector type.
     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-- 
cgit v1.2.3