46 files changed, 2991 insertions, 1175 deletions
diff --git a/lib/Target/SystemZ/AsmParser/Makefile b/lib/Target/SystemZ/AsmParser/Makefile
deleted file mode 100644
index 623ae2c4e3ef..000000000000
--- a/lib/Target/SystemZ/AsmParser/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/SystemZ/AsmParser/Makefile ---------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZAsmParser
-
-# Hack: we need to include 'main' SystemZ target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 9c995bf42b0b..3923614c89d3 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -13,9 +13,9 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
@@ -391,6 +391,9 @@ public:
     : MCTargetAsmParser(Options, sti), Parser(parser) {
     MCAsmParserExtension::Initialize(Parser);
 
+    // Alias the .word directive to .short.
+    parser.addAliasForDirective(".word", ".short");
+
     // Initialize the set of available features.
     setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
   }
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 336f037bb733..4b849ad6491b 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -30,6 +30,7 @@ add_llvm_target(SystemZCodeGen
   SystemZSubtarget.cpp
   SystemZTargetMachine.cpp
   SystemZTargetTransformInfo.cpp
+  SystemZTDC.cpp
   )
 
 add_subdirectory(AsmParser)
diff --git a/lib/Target/SystemZ/Disassembler/Makefile b/lib/Target/SystemZ/Disassembler/Makefile
deleted file mode 100644
index efc4cc8e9cb0..000000000000
--- a/lib/Target/SystemZ/Disassembler/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===-- lib/Target/SystemZ/Disassembler/Makefile -----------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZDisassembler
-
-# Hack: we need to include 'main' x86 target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index bf67b75d5337..20e015b42d21 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZ.h"
-#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -46,6 +46,34 @@ extern "C" void LLVMInitializeSystemZDisassembler() {
                                          createSystemZDisassembler);
 }
 
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst.
+///
+/// @param Value      - The immediate Value, has had any PC adjustment made by
+///                     the caller.
+/// @param isBranch   - If the instruction is a branch instruction
+/// @param Address    - The starting address of the instruction
+/// @param Offset     - The byte offset to this immediate in the instruction
+/// @param Width      - The byte width of this immediate in the instruction
+///
+/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
+/// called then that function is called to get any symbolic information for the
+/// immediate in the instruction using the Address, Offset and Width.  If that
+/// returns non-zero then the symbolic information it returns is used to create
+/// an MCExpr and that is added as an operand to the MCInst.  If getOpInfo()
+/// returns zero and isBranch is true then a symbol look up for immediate Value
+/// is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with the immediate Value is created.  This function returns true
+/// if it adds an operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
+                                     uint64_t Address, uint64_t Offset,
+                                     uint64_t Width, MCInst &MI,
+                                     const void *Decoder) {
+  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+  return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
+                                       Offset, Width);
+}
+
 static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
                                         const unsigned *Regs, unsigned Size) {
   assert(RegNo < Size && "Invalid register");
@@ -206,22 +234,35 @@ static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm,
 
 template<unsigned N>
 static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
-                                       uint64_t Address) {
+                                       uint64_t Address,
+                                       bool isBranch,
+                                       const void *Decoder) {
   assert(isUInt<N>(Imm) && "Invalid PC-relative offset");
-  Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm) * 2 + Address));
+  uint64_t Value = SignExtend64<N>(Imm) * 2 + Address;
+
+  if (!tryAddingSymbolicOperand(Value, isBranch, Address, 2, N / 8,
+                                Inst, Decoder))
+    Inst.addOperand(MCOperand::createImm(Value));
+
   return MCDisassembler::Success;
 }
 
-static DecodeStatus decodePC16DBLOperand(MCInst &Inst, uint64_t Imm,
-                                         uint64_t Address,
-                                         const void *Decoder) {
-  return decodePCDBLOperand<16>(Inst, Imm, Address);
+static DecodeStatus decodePC16DBLBranchOperand(MCInst &Inst, uint64_t Imm,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodePCDBLOperand<16>(Inst, Imm, Address, true, Decoder);
+}
+
+static DecodeStatus decodePC32DBLBranchOperand(MCInst &Inst, uint64_t Imm,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodePCDBLOperand<32>(Inst, Imm, Address, true, Decoder);
 }
 
 static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm,
                                          uint64_t Address,
                                          const void *Decoder) {
-  return decodePCDBLOperand<32>(Inst, Imm, Address);
+  return decodePCDBLOperand<32>(Inst, Imm, Address, false, Decoder);
 }
 
 static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field,
diff --git a/lib/Target/SystemZ/InstPrinter/Makefile b/lib/Target/SystemZ/InstPrinter/Makefile
deleted file mode 100644
index 3ba8126735a5..000000000000
--- a/lib/Target/SystemZ/InstPrinter/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/SystemZ/AsmPrinter/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZAsmPrinter
-
-# Hack: we need to include 'main' mips target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/MCTargetDesc/Makefile b/lib/Target/SystemZ/MCTargetDesc/Makefile
deleted file mode 100644
index 08f1a9d51fb5..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 57eebe19c044..c4d546cb7dff 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -58,7 +58,8 @@ public:
                             const MCAsmLayout &Layout) const override {
     return false;
   }
-  void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
+  void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+                        MCInst &Res) const override {
     llvm_unreachable("SystemZ does do not have assembler relaxation");
   }
   bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index ee1af023769e..368c95f7bac2 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -24,8 +24,8 @@ public:
 
 protected:
   // Override MCELFObjectTargetWriter.
-  unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                        bool IsPCRel) const override;
+  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+                        const MCFixup &Fixup, bool IsPCRel) const override;
 };
 } // end anonymous namespace
 
@@ -106,7 +106,8 @@ static unsigned getPLTReloc(unsigned Kind) {
   llvm_unreachable("Unsupported absolute address");
 }
 
-unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
+unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
+                                           const MCValue &Target,
                                            const MCFixup &Fixup,
                                            bool IsPCRel) const {
   MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 2115d4480eef..e16ba9e15317 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -10,7 +10,6 @@
 #include "SystemZMCTargetDesc.h"
 #include "InstPrinter/SystemZInstPrinter.h"
 #include "SystemZMCAsmInfo.h"
-#include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
@@ -159,17 +158,8 @@ createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   return createSystemZMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
-static MCCodeGenInfo *createSystemZMCCodeGenInfo(const Triple &TT,
-                                                 Reloc::Model RM,
-                                                 CodeModel::Model CM,
-                                                 CodeGenOpt::Level OL) {
-  MCCodeGenInfo *X = new MCCodeGenInfo();
-
-  // Static code is suitable for use in a dynamic executable; there is no
-  // separate DynamicNoPIC model.
-  if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
-    RM = Reloc::Static;
-
+static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
+                              CodeModel::Model &CM) {
   // For SystemZ we define the models as follows:
   //
   // Small:  BRASL can call any function and will use a stub if necessary.
@@ -203,8 +193,6 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(const Triple &TT,
     CM = CodeModel::Small;
   else if (CM == CodeModel::JITDefault)
     CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
-  X->initMCCodeGenInfo(RM, CM, OL);
-  return X;
 }
 
 static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
@@ -220,9 +208,9 @@ extern "C" void LLVMInitializeSystemZTargetMC() {
   TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget,
                                     createSystemZMCAsmInfo);
 
-  // Register the MCCodeGenInfo.
-  TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget,
-                                        createSystemZMCCodeGenInfo);
+  // Register the adjustCodeGenOpts.
+  TargetRegistry::registerMCAdjustCodeGenOpts(TheSystemZTarget,
+                                              adjustCodeGenOpts);
 
   // Register the MCCodeEmitter.
   TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget,
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
deleted file mode 100644
index 732c31725538..000000000000
--- a/lib/Target/SystemZ/Makefile
+++ /dev/null
@@ -1,28 +0,0 @@
-##===- lib/Target/SystemZ/Makefile -------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMSystemZCodeGen
-TARGET = SystemZ
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = SystemZGenRegisterInfo.inc \
-		SystemZGenAsmWriter.inc \
-		SystemZGenAsmMatcher.inc \
-		SystemZGenDisassemblerTables.inc \
-		SystemZGenInstrInfo.inc \
-		SystemZGenDAGISel.inc \
-		SystemZGenSubtargetInfo.inc \
-		SystemZGenCallingConv.inc \
-		SystemZGenMCCodeEmitter.inc
-
-DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index cd367d60bab7..86a1322c9e23 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -7,13 +7,6 @@ for later architectures at some point.
 
 --
 
-SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand() is passed "m" for all
-inline asm memory constraints; it doesn't get to see the original constraint.
-This means that it must conservatively treat all inline asm constraints
-as the most restricted type, "R".
-
---
-
 If an inline asm ties an i32 "r" result to an i64 input, the input
 will be treated as an i32, leaving the upper bits uninitialised.
 For example:
@@ -43,15 +36,6 @@ We don't use the BRANCH ON INDEX instructions.
 
 --
 
-We might want to use BRANCH ON CONDITION for conditional indirect calls
-and conditional returns.
-
---
-
-We don't use the TEST DATA CLASS instructions.
-
---
-
 We only use MVC, XC and CLC for constant-length block operations.
 We could extend them to variable-length operations too,
 using EXECUTE RELATIVE LONG.
@@ -79,11 +63,6 @@ via a register.)
 
 --
 
-We don't use the halfword forms of LOAD REVERSED and STORE REVERSED
-(LRVH and STRVH).
-
---
-
 We don't use ICM or STCM.
 
 --
@@ -123,7 +102,7 @@ ought to be implemented as:
         ngr     %r2, %r0
         br      %r14
 
-but two-address optimisations reverse the order of the AND and force:
+but two-address optimizations reverse the order of the AND and force:
 
         lhi     %r0, 1
         ngr     %r0, %r2
@@ -166,3 +145,10 @@ If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG.
 
 We might want to model all access registers and use them to spill
 32-bit values.
+
+--
+
+We might want to use the 'overflow' condition of eg. AR to support
+llvm.sadd.with.overflow.i32 and related instructions - the generated code
+for signed overflow check is currently quite bad.  This would improve
+the results of using -ftrapv.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index cafe2c5948c4..c8ea9641fb62 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -87,6 +87,11 @@ const unsigned CCMASK_VCMP_MIXED     = CCMASK_1;
 const unsigned CCMASK_VCMP_NONE      = CCMASK_3;
 const unsigned CCMASK_VCMP           = CCMASK_0 | CCMASK_1 | CCMASK_3;
 
+// Condition-code mask assignments for Test Data Class.
+const unsigned CCMASK_TDC_NOMATCH   = CCMASK_0;
+const unsigned CCMASK_TDC_MATCH     = CCMASK_1;
+const unsigned CCMASK_TDC           = CCMASK_TDC_NOMATCH | CCMASK_TDC_MATCH;
+
 // The position of the low CC bit in an IPM result.
 const unsigned IPM_CC = 28;
 
@@ -94,6 +99,41 @@ const unsigned IPM_CC = 28;
 const unsigned PFD_READ  = 1;
 const unsigned PFD_WRITE = 2;
 
+// Mask assignments for TDC
+const unsigned TDCMASK_ZERO_PLUS       = 0x800;
+const unsigned TDCMASK_ZERO_MINUS      = 0x400;
+const unsigned TDCMASK_NORMAL_PLUS     = 0x200;
+const unsigned TDCMASK_NORMAL_MINUS    = 0x100;
+const unsigned TDCMASK_SUBNORMAL_PLUS  = 0x080;
+const unsigned TDCMASK_SUBNORMAL_MINUS = 0x040;
+const unsigned TDCMASK_INFINITY_PLUS   = 0x020;
+const unsigned TDCMASK_INFINITY_MINUS  = 0x010;
+const unsigned TDCMASK_QNAN_PLUS       = 0x008;
+const unsigned TDCMASK_QNAN_MINUS      = 0x004;
+const unsigned TDCMASK_SNAN_PLUS       = 0x002;
+const unsigned TDCMASK_SNAN_MINUS      = 0x001;
+
+const unsigned TDCMASK_ZERO            = TDCMASK_ZERO_PLUS | TDCMASK_ZERO_MINUS;
+const unsigned TDCMASK_POSITIVE        = TDCMASK_NORMAL_PLUS |
+                                         TDCMASK_SUBNORMAL_PLUS |
+                                         TDCMASK_INFINITY_PLUS;
+const unsigned TDCMASK_NEGATIVE        = TDCMASK_NORMAL_MINUS |
+                                         TDCMASK_SUBNORMAL_MINUS |
+                                         TDCMASK_INFINITY_MINUS;
+const unsigned TDCMASK_NAN             = TDCMASK_QNAN_PLUS |
+                                         TDCMASK_QNAN_MINUS |
+                                         TDCMASK_SNAN_PLUS |
+                                         TDCMASK_SNAN_MINUS;
+const unsigned TDCMASK_PLUS            = TDCMASK_POSITIVE |
+                                         TDCMASK_ZERO_PLUS |
+                                         TDCMASK_QNAN_PLUS |
+                                         TDCMASK_SNAN_PLUS;
+const unsigned TDCMASK_MINUS           = TDCMASK_NEGATIVE |
+                                         TDCMASK_ZERO_MINUS |
+                                         TDCMASK_QNAN_MINUS |
+                                         TDCMASK_SNAN_MINUS;
+const unsigned TDCMASK_ALL             = TDCMASK_PLUS | TDCMASK_MINUS;
+
 // Number of bits in a vector register.
 const unsigned VectorBits = 128;
 
@@ -138,6 +178,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZTDCPass();
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 75273114d62f..9c0f327ff744 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -109,6 +109,85 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D);
     break;
 
+  case SystemZ::CondReturn:
+    LoweredMI = MCInstBuilder(SystemZ::BCR)
+      .addImm(MI->getOperand(0).getImm())
+      .addImm(MI->getOperand(1).getImm())
+      .addReg(SystemZ::R14D);
+    break;
+
+  case SystemZ::CRBReturn:
+    LoweredMI = MCInstBuilder(SystemZ::CRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R14D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CGRBReturn:
+    LoweredMI = MCInstBuilder(SystemZ::CGRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R14D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CIBReturn:
+    LoweredMI = MCInstBuilder(SystemZ::CIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R14D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CGIBReturn:
+    LoweredMI = MCInstBuilder(SystemZ::CGIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R14D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLRBReturn:
+    LoweredMI = MCInstBuilder(SystemZ::CLRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R14D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLGRBReturn:
+    LoweredMI = MCInstBuilder(SystemZ::CLGRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R14D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLIBReturn:
+    LoweredMI = MCInstBuilder(SystemZ::CLIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R14D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLGIBReturn:
+    LoweredMI = MCInstBuilder(SystemZ::CLGIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R14D)
+      .addImm(0);
+    break;
+
   case SystemZ::CallBRASL:
     LoweredMI = MCInstBuilder(SystemZ::BRASL)
       .addReg(SystemZ::R14D)
@@ -126,10 +205,96 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT));
     break;
 
+  case SystemZ::CallBRCL:
+    LoweredMI = MCInstBuilder(SystemZ::BRCL)
+      .addImm(MI->getOperand(0).getImm())
+      .addImm(MI->getOperand(1).getImm())
+      .addExpr(Lower.getExpr(MI->getOperand(2), MCSymbolRefExpr::VK_PLT));
+    break;
+
   case SystemZ::CallBR:
     LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
     break;
 
+  case SystemZ::CallBCR:
+    LoweredMI = MCInstBuilder(SystemZ::BCR)
+      .addImm(MI->getOperand(0).getImm())
+      .addImm(MI->getOperand(1).getImm())
+      .addReg(SystemZ::R1D);
+    break;
+
+  case SystemZ::CRBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CGRBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CGRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CIBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CGIBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CGIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLRBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CLRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLGRBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CLGRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLIBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CLIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLGIBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CLGIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
   case SystemZ::TLS_GDCALL:
     LoweredMI = MCInstBuilder(SystemZ::BRASL)
       .addReg(SystemZ::R14D)
@@ -260,6 +425,41 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
         .addImm(15).addReg(SystemZ::R0D);
     break;
 
+  // Emit nothing here but a comment if we can.
+  case SystemZ::MemBarrier:
+    OutStreamer->emitRawComment("MEMBARRIER");
+    return;
+
+  // We want to emit "j .+2" for traps, jumping to the relative immediate field
+  // of the jump instruction, which is an illegal instruction. We cannot emit a
+  // "." symbol, so create and emit a temp label before the instruction and use
+  // that instead.
+  case SystemZ::Trap: {
+    MCSymbol *DotSym = OutContext.createTempSymbol();
+    OutStreamer->EmitLabel(DotSym);
+
+    const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);
+    const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext);
+    LoweredMI = MCInstBuilder(SystemZ::J)
+      .addExpr(MCBinaryExpr::createAdd(Expr, ConstExpr, OutContext));
+    }
+    break;
+
+  // Conditional traps will create a branch on condition instruction that jumps
+  // to the relative immediate field of the jump instruction. (eg. "jo .+2")
+  case SystemZ::CondTrap: {
+    MCSymbol *DotSym = OutContext.createTempSymbol();
+    OutStreamer->EmitLabel(DotSym);
+
+    const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);
+    const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext);
+    LoweredMI = MCInstBuilder(SystemZ::BRC)
+      .addImm(MI->getOperand(0).getImm())
+      .addImm(MI->getOperand(1).getImm())
+      .addExpr(MCBinaryExpr::createAdd(Expr, ConstExpr, OutContext));
+    }
+    break;
+
   default:
     Lower.lower(MI, LoweredMI);
     break;
diff --git a/lib/Target/SystemZ/SystemZCallingConv.cpp b/lib/Target/SystemZ/SystemZCallingConv.cpp
index cc9c84b6a058..72da51f74b10 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.cpp
+++ b/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -12,10 +12,10 @@
 
 using namespace llvm;
 
-const unsigned SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = {
+const MCPhysReg SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = {
   SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D
 };
 
-const unsigned SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = {
+const MCPhysReg SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = {
   SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
 };
diff --git a/lib/Target/SystemZ/SystemZCallingConv.h b/lib/Target/SystemZ/SystemZCallingConv.h
index bff0706618aa..b5523e586f4c 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/lib/Target/SystemZ/SystemZCallingConv.h
@@ -12,14 +12,15 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/MC/MCRegisterInfo.h"
 
 namespace llvm {
 namespace SystemZ {
   const unsigned NumArgGPRs = 5;
-  extern const unsigned ArgGPRs[NumArgGPRs];
+  extern const MCPhysReg ArgGPRs[NumArgGPRs];
 
   const unsigned NumArgFPRs = 4;
-  extern const unsigned ArgFPRs[NumArgFPRs];
+  extern const MCPhysReg ArgFPRs[NumArgFPRs];
 } // end namespace SystemZ
 
 class SystemZCCState : public CCState {
@@ -79,6 +80,51 @@ public:
   bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; }
 };
 
+// Handle i128 argument types.  These need to be passed by implicit
+// reference.  This could be as simple as the following .td line:
+//    CCIfType<[i128], CCPassIndirect<i64>>,
+// except that i128 is not a legal type, and therefore gets split by
+// common code into a pair of i64 arguments.
+inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
+                                    MVT &LocVT,
+                                    CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags,
+                                    CCState &State) {
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+  // ArgFlags.isSplit() is true on the first part of a i128 argument;
+  // PendingMembers.empty() is false on all subsequent parts.
+  if (!ArgFlags.isSplit() && PendingMembers.empty())
+    return false;
+
+  // Push a pending Indirect value location for each part.
+  LocVT = MVT::i64;
+  LocInfo = CCValAssign::Indirect;
+  PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT,
+                                                   LocVT, LocInfo));
+  if (!ArgFlags.isSplitEnd())
+    return true;
+
+  // OK, we've collected all parts in the pending list.  Allocate
+  // the location (register or stack slot) for the indirect pointer.
+  // (This duplicates the usual i64 calling convention rules.)
+  unsigned Reg = State.AllocateReg(SystemZ::ArgGPRs);
+  unsigned Offset = Reg ? 0 : State.AllocateStack(8, 8);
+
+  // Use that same location for all the pending parts.
+  for (auto &It : PendingMembers) {
+    if (Reg)
+      It.convertToReg(Reg);
+    else
+      It.convertToMem(Offset);
+    State.addLoc(It);
+  }
+
+  PendingMembers.clear();
+
+  return true;
+}
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
index bdd1b1598adb..2bf5ac29865f 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -33,6 +33,9 @@ def RetCC_SystemZ : CallingConv<[
   // Promote i32 to i64 if it has an explicit extension type.
   CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
 
+  // A SwiftError is returned in R9.
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>,
+
   // ABI-compliant code returns 64-bit integers in R2.  Make the other
   // call-clobbered argument registers available for code that doesn't
   // care about the ABI.  (R6 is an argument register too, but is
@@ -65,8 +68,17 @@ def CC_SystemZ : CallingConv<[
   // are smaller than 64 bits shouldn't.
   CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
 
+  // A SwiftSelf is passed in callee-saved R10.
+  CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>,
+
+  // A SwiftError is passed in callee-saved R9.
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>,
+
   // Force long double values to the stack and pass i64 pointers to them.
   CCIfType<[f128], CCPassIndirect<i64>>,
+  // Same for i128 values.  These are already split into two i64 here,
+  // so we have to use a custom handler.
+  CCIfType<[i64], CCCustom<"CC_SystemZ_I128Indirect">>,
 
   // The first 5 integer arguments are passed in R2-R6.  Note that R6
   // is call-saved.
@@ -105,3 +117,6 @@ def CC_SystemZ : CallingConv<[
 //===----------------------------------------------------------------------===//
 def CSR_SystemZ : CalleeSavedRegs<(add (sequence "R%dD", 6, 15),
                                        (sequence "F%dD", 8, 15))>;
+
+// R9 is used to return SwiftError; remove it from CSR.
+def CSR_SystemZ_SwiftError : CalleeSavedRegs<(sub CSR_SystemZ, R9D)>;
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index 4818ed015522..27350b88554d 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -18,7 +18,6 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/IR/Function.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -65,18 +64,22 @@ public:
 
   bool processBlock(MachineBasicBlock &MBB);
   bool runOnMachineFunction(MachineFunction &F) override;
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::AllVRegsAllocated);
+  }
 
 private:
-  Reference getRegReferences(MachineInstr *MI, unsigned Reg);
-  bool convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
+  Reference getRegReferences(MachineInstr &MI, unsigned Reg);
+  bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare,
                      SmallVectorImpl<MachineInstr *> &CCUsers);
-  bool convertToLoadAndTest(MachineInstr *MI);
-  bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
+  bool convertToLoadAndTest(MachineInstr &MI);
+  bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare,
                              SmallVectorImpl<MachineInstr *> &CCUsers);
-  bool optimizeCompareZero(MachineInstr *Compare,
+  bool optimizeCompareZero(MachineInstr &Compare,
                            SmallVectorImpl<MachineInstr *> &CCUsers);
-  bool fuseCompareAndBranch(MachineInstr *Compare,
-                            SmallVectorImpl<MachineInstr *> &CCUsers);
+  bool fuseCompareOperations(MachineInstr &Compare,
+                             SmallVectorImpl<MachineInstr *> &CCUsers);
 
   const SystemZInstrInfo *TII;
   const TargetRegisterInfo *TRI;
@@ -98,14 +101,12 @@ static bool isCCLiveOut(MachineBasicBlock &MBB) {
 }
 
 // Return true if any CC result of MI would reflect the value of Reg.
-static bool resultTests(MachineInstr *MI, unsigned Reg) {
-  if (MI->getNumOperands() > 0 &&
-      MI->getOperand(0).isReg() &&
-      MI->getOperand(0).isDef() &&
-      MI->getOperand(0).getReg() == Reg)
+static bool resultTests(MachineInstr &MI, unsigned Reg) {
+  if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() &&
+      MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg)
     return true;
 
-  switch (MI->getOpcode()) {
+  switch (MI.getOpcode()) {
   case SystemZ::LR:
   case SystemZ::LGR:
   case SystemZ::LGFR:
@@ -118,7 +119,7 @@ static bool resultTests(MachineInstr *MI, unsigned Reg) {
   case SystemZ::LTEBR:
   case SystemZ::LTDBR:
   case SystemZ::LTXBR:
-    if (MI->getOperand(1).getReg() == Reg)
+    if (MI.getOperand(1).getReg() == Reg)
       return true;
   }
 
@@ -126,10 +127,10 @@ static bool resultTests(MachineInstr *MI, unsigned Reg) {
 }
 
 // Describe the references to Reg or any of its aliases in MI.
-Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
+Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) {
   Reference Ref;
-  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
-    const MachineOperand &MO = MI->getOperand(I);
+  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI.getOperand(I);
     if (MO.isReg()) {
       if (unsigned MOReg = MO.getReg()) {
         if (TRI->regsOverlap(MOReg, Reg)) {
@@ -146,23 +147,23 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
 
 // Return true if this is a load and test which can be optimized the
 // same way as compare instruction.
-static bool isLoadAndTestAsCmp(MachineInstr *MI) {
+static bool isLoadAndTestAsCmp(MachineInstr &MI) {
   // If we during isel used a load-and-test as a compare with 0, the
   // def operand is dead.
-  return ((MI->getOpcode() == SystemZ::LTEBR ||
-           MI->getOpcode() == SystemZ::LTDBR ||
-           MI->getOpcode() == SystemZ::LTXBR) &&
-          MI->getOperand(0).isDead());
+  return (MI.getOpcode() == SystemZ::LTEBR ||
+          MI.getOpcode() == SystemZ::LTDBR ||
+          MI.getOpcode() == SystemZ::LTXBR) &&
+         MI.getOperand(0).isDead();
 }
 
 // Return the source register of Compare, which is the unknown value
 // being tested.
-static unsigned getCompareSourceReg(MachineInstr *Compare) {
+static unsigned getCompareSourceReg(MachineInstr &Compare) {
   unsigned reg = 0;
-  if (Compare->isCompare())
-    reg = Compare->getOperand(0).getReg();
+  if (Compare.isCompare())
+    reg = Compare.getOperand(0).getReg();
   else if (isLoadAndTestAsCmp(Compare))
-    reg = Compare->getOperand(1).getReg();
+    reg = Compare.getOperand(1).getReg();
   assert (reg);
 
   return reg;
@@ -171,11 +172,11 @@ static unsigned getCompareSourceReg(MachineInstr *Compare) {
 // Compare compares the result of MI against zero.  If MI is an addition
 // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
 // and convert the branch to a BRCT(G).  Return true on success.
-bool
-SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
-                                  SmallVectorImpl<MachineInstr *> &CCUsers) {
+bool SystemZElimCompare::convertToBRCT(
+    MachineInstr &MI, MachineInstr &Compare,
+    SmallVectorImpl<MachineInstr *> &CCUsers) {
   // Check whether we have an addition of -1.
-  unsigned Opcode = MI->getOpcode();
+  unsigned Opcode = MI.getOpcode();
   unsigned BRCT;
   if (Opcode == SystemZ::AHI)
     BRCT = SystemZ::BRCT;
@@ -183,7 +184,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
     BRCT = SystemZ::BRCTG;
   else
     return false;
-  if (MI->getOperand(2).getImm() != -1)
+  if (MI.getOperand(2).getImm() != -1)
     return false;
 
   // Check whether we have a single JLH.
@@ -201,7 +202,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
   unsigned SrcReg = getCompareSourceReg(Compare);
   MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
   for (++MBBI; MBBI != MBBE; ++MBBI)
-    if (getRegReferences(MBBI, SrcReg))
+    if (getRegReferences(*MBBI, SrcReg))
       return false;
 
   // The transformation is OK.  Rebuild Branch as a BRCT(G).
@@ -210,24 +211,24 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
     Branch->RemoveOperand(0);
   Branch->setDesc(TII->get(BRCT));
   MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
-    .addOperand(MI->getOperand(0))
-    .addOperand(MI->getOperand(1))
-    .addOperand(Target)
-    .addReg(SystemZ::CC, RegState::ImplicitDefine);
-  MI->eraseFromParent();
+      .addOperand(MI.getOperand(0))
+      .addOperand(MI.getOperand(1))
+      .addOperand(Target)
+      .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+  MI.eraseFromParent();
   return true;
 }
 
 // If MI is a load instruction, try to convert it into a LOAD AND TEST.
 // Return true on success.
-bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) {
-  unsigned Opcode = TII->getLoadAndTest(MI->getOpcode());
+bool SystemZElimCompare::convertToLoadAndTest(MachineInstr &MI) {
+  unsigned Opcode = TII->getLoadAndTest(MI.getOpcode());
   if (!Opcode)
     return false;
 
-  MI->setDesc(TII->get(Opcode));
-  MachineInstrBuilder(*MI->getParent()->getParent(), MI)
-    .addReg(SystemZ::CC, RegState::ImplicitDefine);
+  MI.setDesc(TII->get(Opcode));
+  MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+      .addReg(SystemZ::CC, RegState::ImplicitDefine);
   return true;
 }
 
@@ -236,10 +237,10 @@ bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) {
 // would also reflect the value of X.  Try to adjust CCUsers so that
 // they test the result of MI directly, returning true on success.
 // Leave everything unchanged on failure.
-bool SystemZElimCompare::
-adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
-                      SmallVectorImpl<MachineInstr *> &CCUsers) {
-  int Opcode = MI->getOpcode();
+bool SystemZElimCompare::adjustCCMasksForInstr(
+    MachineInstr &MI, MachineInstr &Compare,
+    SmallVectorImpl<MachineInstr *> &CCUsers) {
+  int Opcode = MI.getOpcode();
   const MCInstrDesc &Desc = TII->get(Opcode);
   unsigned MIFlags = Desc.TSFlags;
 
@@ -247,7 +248,7 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
   unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags);
 
   // For unsigned comparisons with zero, only equality makes sense.
-  unsigned CompareFlags = Compare->getDesc().TSFlags;
+  unsigned CompareFlags = Compare.getDesc().TSFlags;
   if (CompareFlags & SystemZII::IsLogical)
     ReusableCCMask &= SystemZ::CCMASK_CMP_EQ;
 
@@ -296,9 +297,9 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
   }
 
   // CC is now live after MI.
-  int CCDef = MI->findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI);
+  int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI);
   assert(CCDef >= 0 && "Couldn't find CC set");
-  MI->getOperand(CCDef).setIsDead(false);
+  MI.getOperand(CCDef).setIsDead(false);
 
   // Clear any intervening kills of CC.
   MachineBasicBlock::iterator MBBI = MI, MBBE = Compare;
@@ -309,8 +310,8 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
 }
 
 // Return true if Compare is a comparison against zero.
-static bool isCompareZero(MachineInstr *Compare) {
-  switch (Compare->getOpcode()) {
+static bool isCompareZero(MachineInstr &Compare) {
+  switch (Compare.getOpcode()) {
   case SystemZ::LTEBRCompare:
   case SystemZ::LTDBRCompare:
   case SystemZ::LTXBRCompare:
@@ -321,9 +322,8 @@ static bool isCompareZero(MachineInstr *Compare) {
     if (isLoadAndTestAsCmp(Compare))
       return true;
 
-    return (Compare->getNumExplicitOperands() == 2 &&
-            Compare->getOperand(1).isImm() &&
-            Compare->getOperand(1).getImm() == 0);
+    return Compare.getNumExplicitOperands() == 2 &&
+           Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
   }
 }
 
@@ -331,21 +331,20 @@ static bool isCompareZero(MachineInstr *Compare) {
 // a value against zero.  Return true on success and if Compare should be
 // deleted as dead.  CCUsers is the list of instructions that use the CC
 // value produced by Compare.
-bool SystemZElimCompare::
-optimizeCompareZero(MachineInstr *Compare,
-                    SmallVectorImpl<MachineInstr *> &CCUsers) {
+bool SystemZElimCompare::optimizeCompareZero(
+    MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) {
   if (!isCompareZero(Compare))
     return false;
 
   // Search back for CC results that are based on the first operand.
   unsigned SrcReg = getCompareSourceReg(Compare);
-  MachineBasicBlock &MBB = *Compare->getParent();
+  MachineBasicBlock &MBB = *Compare.getParent();
   MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin();
   Reference CCRefs;
   Reference SrcRefs;
   while (MBBI != MBBE) {
     --MBBI;
-    MachineInstr *MI = MBBI;
+    MachineInstr &MI = *MBBI;
     if (resultTests(MI, SrcReg)) {
       // Try to remove both MI and Compare by converting a branch to BRCT(G).
       // We don't care in this case whether CC is modified between MI and
@@ -373,54 +372,85 @@ optimizeCompareZero(MachineInstr *Compare,
 
 // Try to fuse comparison instruction Compare into a later branch.
 // Return true on success and if Compare is therefore redundant.
-bool SystemZElimCompare::
-fuseCompareAndBranch(MachineInstr *Compare,
-                     SmallVectorImpl<MachineInstr *> &CCUsers) {
-  // See whether we have a comparison that can be fused.
-  unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(),
-                                                  Compare);
-  if (!FusedOpcode)
-    return false;
-
+bool SystemZElimCompare::fuseCompareOperations(
+    MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) {
   // See whether we have a single branch with which to fuse.
   if (CCUsers.size() != 1)
     return false;
   MachineInstr *Branch = CCUsers[0];
-  if (Branch->getOpcode() != SystemZ::BRC)
+  SystemZII::FusedCompareType Type;
+  switch (Branch->getOpcode()) {
+  case SystemZ::BRC:
+    Type = SystemZII::CompareAndBranch;
+    break;
+  case SystemZ::CondReturn:
+    Type = SystemZII::CompareAndReturn;
+    break;
+  case SystemZ::CallBCR:
+    Type = SystemZII::CompareAndSibcall;
+    break;
+  case SystemZ::CondTrap:
+    Type = SystemZII::CompareAndTrap;
+    break;
+  default:
+    return false;
+  }
+
+  // See whether we have a comparison that can be fused.
+  unsigned FusedOpcode =
+      TII->getFusedCompare(Compare.getOpcode(), Type, &Compare);
+  if (!FusedOpcode)
     return false;
 
   // Make sure that the operands are available at the branch.
-  unsigned SrcReg = Compare->getOperand(0).getReg();
-  unsigned SrcReg2 = (Compare->getOperand(1).isReg() ?
-                      Compare->getOperand(1).getReg() : 0);
+  unsigned SrcReg = Compare.getOperand(0).getReg();
+  unsigned SrcReg2 =
+      Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0;
   MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
   for (++MBBI; MBBI != MBBE; ++MBBI)
     if (MBBI->modifiesRegister(SrcReg, TRI) ||
         (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
       return false;
 
-  // Read the branch mask and target.
+  // Read the branch mask, target (if applicable), regmask (if applicable).
   MachineOperand CCMask(MBBI->getOperand(1));
-  MachineOperand Target(MBBI->getOperand(2));
   assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
          "Invalid condition-code mask for integer comparison");
+  // This is only valid for CompareAndBranch.
+  MachineOperand Target(MBBI->getOperand(
+    Type == SystemZII::CompareAndBranch ? 2 : 0));
+  const uint32_t *RegMask;
+  if (Type == SystemZII::CompareAndSibcall)
+    RegMask = MBBI->getOperand(2).getRegMask();
 
   // Clear out all current operands.
   int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
-  assert(CCUse >= 0 && "BRC must use CC");
+  assert(CCUse >= 0 && "BRC/BCR must use CC");
   Branch->RemoveOperand(CCUse);
-  Branch->RemoveOperand(2);
+  // Remove target (branch) or regmask (sibcall).
+  if (Type == SystemZII::CompareAndBranch ||
+      Type == SystemZII::CompareAndSibcall)
+    Branch->RemoveOperand(2);
   Branch->RemoveOperand(1);
   Branch->RemoveOperand(0);
 
   // Rebuild Branch as a fused compare and branch.
   Branch->setDesc(TII->get(FusedOpcode));
-  MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
-    .addOperand(Compare->getOperand(0))
-    .addOperand(Compare->getOperand(1))
-    .addOperand(CCMask)
-    .addOperand(Target)
-    .addReg(SystemZ::CC, RegState::ImplicitDefine);
+  MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
+  MIB.addOperand(Compare.getOperand(0))
+      .addOperand(Compare.getOperand(1))
+      .addOperand(CCMask);
+
+  if (Type == SystemZII::CompareAndBranch) {
+    // Only conditional branches define CC, as they may be converted back
+    // to a non-fused branch because of a long displacement.  Conditional
+    // returns don't have that problem.
+    MIB.addOperand(Target)
+       .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+  }
+
+  if (Type == SystemZII::CompareAndSibcall)
+    MIB.addRegMask(RegMask);
 
   // Clear any intervening kills of SrcReg and SrcReg2.
   MBBI = Compare;
@@ -445,29 +475,31 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
   SmallVector<MachineInstr *, 4> CCUsers;
   MachineBasicBlock::iterator MBBI = MBB.end();
   while (MBBI != MBB.begin()) {
-    MachineInstr *MI = --MBBI;
-    if (CompleteCCUsers &&
-        (MI->isCompare() || isLoadAndTestAsCmp(MI)) &&
+    MachineInstr &MI = *--MBBI;
+    if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) &&
         (optimizeCompareZero(MI, CCUsers) ||
-         fuseCompareAndBranch(MI, CCUsers))) {
+         fuseCompareOperations(MI, CCUsers))) {
       ++MBBI;
-      MI->eraseFromParent();
+      MI.eraseFromParent();
       Changed = true;
       CCUsers.clear();
       continue;
     }
 
-    if (MI->definesRegister(SystemZ::CC)) {
+    if (MI.definesRegister(SystemZ::CC)) {
       CCUsers.clear();
       CompleteCCUsers = true;
     }
-    if (MI->readsRegister(SystemZ::CC) && CompleteCCUsers)
-      CCUsers.push_back(MI);
+    if (MI.readsRegister(SystemZ::CC) && CompleteCCUsers)
+      CCUsers.push_back(&MI);
   }
   return Changed;
 }
 
 bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
+  if (skipFunction(*F.getFunction()))
+    return false;
+
   TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
   TRI = &TII->getRegisterInfo();
 
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index e1b20d0536d1..ccaed49475ca 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -81,6 +81,12 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
     for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
       SavedRegs.set(SystemZ::ArgGPRs[I]);
 
+  // If there are any landing pads, entering them will modify r6/r7.
+  if (!MF.getMMI().getLandingPads().empty()) {
+    SavedRegs.set(SystemZ::R6D);
+    SavedRegs.set(SystemZ::R7D);
+  }
+
   // If the function requires a frame pointer, record that the hard
   // frame pointer will be clobbered.
   if (HasFP)
@@ -258,7 +264,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
     // Do a second scan adding regs as being defined by instruction
     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
       unsigned Reg = CSI[I].getReg();
-      if (Reg != LowGPR && Reg != HighGPR)
+      if (Reg != LowGPR && Reg != HighGPR &&
+          SystemZ::GR64BitRegClass.contains(Reg))
         MIB.addReg(Reg, RegState::ImplicitDefine);
     }
   }
@@ -353,6 +360,15 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
 
   uint64_t StackSize = getAllocatedStackSize(MF);
   if (StackSize) {
+    // Determine if we want to store a backchain.
+    bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
+
+    // If we need backchain, save current stack pointer.  R1 is free at this
+    // point.
+    if (StoreBackchain)
+      BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR))
+        .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
+
     // Allocate StackSize bytes.
     int64_t Delta = -int64_t(StackSize);
     emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
@@ -363,6 +379,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
     BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
         .addCFIIndex(CFIIndex);
     SPOffsetFromCFA += Delta;
+
+    if (StoreBackchain)
+      BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
+        .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0);
   }
 
   if (HasFP) {
@@ -511,7 +531,7 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   return true;
 }
 
-void SystemZFrameLowering::
+MachineBasicBlock::iterator SystemZFrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF,
                               MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI) const {
@@ -520,7 +540,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
   case SystemZ::ADJCALLSTACKUP:
     assert(hasReservedCallFrame(MF) &&
            "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
-    MBB.erase(MI);
+    return MBB.erase(MI);
     break;
 
   default:
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 46bb6b7a7573..d43a176ad874 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -46,10 +46,9 @@ public:
   int getFrameIndexReference(const MachineFunction &MF, int FI,
                              unsigned &FrameReg) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator MI) const
-    override;
+  MachineBasicBlock::iterator
+  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const override;
 
   // Return the number of bytes in the callee-allocated part of the frame.
   uint64_t getAllocatedStackSize(const MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index a9093094d884..cd7fcc3070a4 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -113,7 +113,8 @@ static uint64_t allOnes(unsigned int Count) {
 //   (and (rotl Input, Rotate), Mask)
 //
 // otherwise.  The output value has BitSize bits, although Input may be
-// narrower (in which case the upper bits are don't care).
+// narrower (in which case the upper bits are don't care), or wider (in which
+// case the result will be truncated as part of the operation).
 struct RxSBGOperands {
   RxSBGOperands(unsigned Op, SDValue N)
     : Opcode(Op), BitSize(N.getValueType().getSizeInBits()),
@@ -279,18 +280,18 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
   bool expandRxSBG(RxSBGOperands &RxSBG) const;
 
   // Return an undefined value of type VT.
-  SDValue getUNDEF(SDLoc DL, EVT VT) const;
+  SDValue getUNDEF(const SDLoc &DL, EVT VT) const;
 
   // Convert N to VT, if it isn't already.
-  SDValue convertTo(SDLoc DL, EVT VT, SDValue N) const;
+  SDValue convertTo(const SDLoc &DL, EVT VT, SDValue N) const;
 
   // Try to implement AND or shift node N using RISBG with the zero flag set.
   // Return the selected node on success, otherwise return null.
-  SDNode *tryRISBGZero(SDNode *N);
+  bool tryRISBGZero(SDNode *N);
 
   // Try to use RISBG or Opcode to implement OR or XOR node N.
   // Return the selected node on success, otherwise return null.
-  SDNode *tryRxSBG(SDNode *N, unsigned Opcode);
+  bool tryRxSBG(SDNode *N, unsigned Opcode);
 
   // If Op0 is null, then Node is a constant that can be loaded using:
   //
@@ -299,14 +300,14 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
   // If Op0 is nonnull, then Node can be implemented using:
   //
   //   (Opcode (Opcode Op0 UpperVal) LowerVal)
-  SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
-                              uint64_t UpperVal, uint64_t LowerVal);
+  void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
+                           uint64_t UpperVal, uint64_t LowerVal);
 
   // Try to use gather instruction Opcode to implement vector insertion N.
-  SDNode *tryGather(SDNode *N, unsigned Opcode);
+  bool tryGather(SDNode *N, unsigned Opcode);
 
   // Try to use scatter instruction Opcode to implement store Store.
-  SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode);
+  bool tryScatter(StoreSDNode *Store, unsigned Opcode);
 
   // Return true if Load and Store are loads and stores of the same size
   // and are guaranteed not to overlap.  Such operations can be implemented
@@ -343,7 +344,7 @@ public:
   }
 
   // Override SelectionDAGISel.
-  SDNode *Select(SDNode *Node) override;
+  void Select(SDNode *Node) override;
   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 
@@ -554,6 +555,10 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
       expandDisp(AM, true, SDValue(),
                  cast<ConstantSDNode>(Addr)->getSExtValue()))
     ;
+  // Also see if it's a bare ADJDYNALLOC.
+  else if (Addr.getOpcode() == SystemZISD::ADJDYNALLOC &&
+           expandAdjDynAlloc(AM, true, SDValue()))
+    ;
   else
     // Otherwise try expanding each component.
     while (expandAddress(AM, true) ||
@@ -741,6 +746,16 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
   SDValue N = RxSBG.Input;
   unsigned Opcode = N.getOpcode();
   switch (Opcode) {
+  case ISD::TRUNCATE: {
+    if (RxSBG.Opcode == SystemZ::RNSBG)
+      return false;
+    uint64_t BitSize = N.getValueType().getSizeInBits();
+    uint64_t Mask = allOnes(BitSize);
+    if (!refineRxSBGMask(RxSBG, Mask))
+      return false;
+    RxSBG.Input = N.getOperand(0);
+    return true;
+  }
   case ISD::AND: {
     if (RxSBG.Opcode == SystemZ::RNSBG)
       return false;
@@ -888,12 +903,13 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
   }
 }
 
-SDValue SystemZDAGToDAGISel::getUNDEF(SDLoc DL, EVT VT) const {
+SDValue SystemZDAGToDAGISel::getUNDEF(const SDLoc &DL, EVT VT) const {
   SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
   return SDValue(N, 0);
 }
 
-SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
+SDValue SystemZDAGToDAGISel::convertTo(const SDLoc &DL, EVT VT,
+                                       SDValue N) const {
   if (N.getValueType() == MVT::i32 && VT == MVT::i64)
     return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32,
                                          DL, VT, getUNDEF(DL, MVT::i64), N);
@@ -903,23 +919,27 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
   return N;
 }
 
-SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
+bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
   if (!VT.isInteger() || VT.getSizeInBits() > 64)
-    return nullptr;
+    return false;
   RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
   unsigned Count = 0;
   while (expandRxSBG(RISBG))
-    if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND)
+    // The widening or narrowing is expected to be free.
+    // Counting widening or narrowing as a saved operation will result in
+    // preferring an R*SBG over a simple shift/logical instruction.
+    if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND &&
+        RISBG.Input.getOpcode() != ISD::TRUNCATE)
       Count += 1;
   if (Count == 0)
-    return nullptr;
+    return false;
   if (Count == 1) {
     // Prefer to use normal shift instructions over RISBG, since they can handle
     // all cases and are sometimes shorter.
     if (N->getOpcode() != ISD::AND)
-      return nullptr;
+      return false;
 
     // Prefer register extensions like LLC over RISBG.  Also prefer to start
     // out with normal ANDs if one instruction would be enough.  We can convert
@@ -934,9 +954,10 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
       if (MaskN->getZExtValue() != RISBG.Mask) {
         SDValue NewMask = CurDAG->getConstant(RISBG.Mask, DL, VT);
         N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask);
-        return SelectCode(N);
+        SelectCode(N);
+        return true;
       }
-      return nullptr;
+      return false;
     }
   }
 
@@ -952,8 +973,11 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
     }
 
     SDValue In = convertTo(DL, VT, RISBG.Input);
-    N = CurDAG->getMachineNode(OpCode, DL, VT, In);
-    return convertTo(DL, VT, SDValue(N, 0)).getNode();
+    SDValue New = convertTo(
+        DL, VT, SDValue(CurDAG->getMachineNode(OpCode, DL, VT, In), 0));
+    ReplaceUses(N, New.getNode());
+    CurDAG->RemoveDeadNode(N);
+    return true;
   }
 
   unsigned Opcode = SystemZ::RISBG;
@@ -974,15 +998,18 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
     CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32),
     CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32)
   };
-  N = CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops);
-  return convertTo(DL, VT, SDValue(N, 0)).getNode();
+  SDValue New = convertTo(
+      DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops), 0));
+  ReplaceUses(N, New.getNode());
+  CurDAG->RemoveDeadNode(N);
+  return true;
 }
 
-SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
+bool SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
   if (!VT.isInteger() || VT.getSizeInBits() > 64)
-    return nullptr;
+    return false;
   // Try treating each operand of N as the second operand of the RxSBG
   // and see which goes deepest.
   RxSBGOperands RxSBG[] = {
@@ -992,12 +1019,16 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
   unsigned Count[] = { 0, 0 };
   for (unsigned I = 0; I < 2; ++I)
     while (expandRxSBG(RxSBG[I]))
-      if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND)
+      // The widening or narrowing is expected to be free.
+      // Counting widening or narrowing as a saved operation will result in
+      // preferring an R*SBG over a simple shift/logical instruction.
+      if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND &&
+          RxSBG[I].Input.getOpcode() != ISD::TRUNCATE)
         Count[I] += 1;
 
   // Do nothing if neither operand is suitable.
   if (Count[0] == 0 && Count[1] == 0)
-    return nullptr;
+    return false;
 
   // Pick the deepest second operand.
   unsigned I = Count[0] > Count[1] ? 0 : 1;
@@ -1007,7 +1038,7 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
   if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0)
     if (auto *Load = dyn_cast<LoadSDNode>(Op0.getNode()))
       if (Load->getMemoryVT() == MVT::i8)
-        return nullptr;
+        return false;
 
   // See whether we can avoid an AND in the first operand by converting
   // ROSBG to RISBG.
@@ -1025,47 +1056,70 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
     CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32),
     CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32)
   };
-  N = CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops);
-  return convertTo(DL, VT, SDValue(N, 0)).getNode();
+  SDValue New = convertTo(
+      DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops), 0));
+  ReplaceNode(N, New.getNode());
+  return true;
 }
 
-SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
-                                                 SDValue Op0, uint64_t UpperVal,
-                                                 uint64_t LowerVal) {
+void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
+                                              SDValue Op0, uint64_t UpperVal,
+                                              uint64_t LowerVal) {
   EVT VT = Node->getValueType(0);
   SDLoc DL(Node);
   SDValue Upper = CurDAG->getConstant(UpperVal, DL, VT);
   if (Op0.getNode())
     Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper);
-  Upper = SDValue(Select(Upper.getNode()), 0);
+
+  {
+    // When we haven't passed in Op0, Upper will be a constant. In order to
+    // prevent folding back to the large immediate in `Or = getNode(...)` we run
+    // SelectCode first and end up with an opaque machine node. This means that
+    // we need to use a handle to keep track of Upper in case it gets CSE'd by
+    // SelectCode.
+    //
+    // Note that in the case where Op0 is passed in we could just call
+    // SelectCode(Upper) later, along with the SelectCode(Or), and avoid needing
+    // the handle at all, but it's fine to do it here.
+    //
+    // TODO: This is a pretty hacky way to do this. Can we do something that
+    // doesn't require a two paragraph explanation?
+    HandleSDNode Handle(Upper);
+    SelectCode(Upper.getNode());
+    Upper = Handle.getValue();
+  }
 
   SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT);
   SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower);
-  return Or.getNode();
+
+  ReplaceUses(Node, Or.getNode());
+  CurDAG->RemoveDeadNode(Node);
+
+  SelectCode(Or.getNode());
 }
 
-SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
+bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
   SDValue ElemV = N->getOperand(2);
   auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
   if (!ElemN)
-    return 0;
+    return false;
 
   unsigned Elem = ElemN->getZExtValue();
   EVT VT = N->getValueType(0);
   if (Elem >= VT.getVectorNumElements())
-    return 0;
+    return false;
 
   auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
   if (!Load || !Load->hasOneUse())
-    return 0;
+    return false;
   if (Load->getMemoryVT().getSizeInBits() !=
       Load->getValueType(0).getSizeInBits())
-    return 0;
+    return false;
 
   SDValue Base, Disp, Index;
   if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) ||
       Index.getValueType() != VT.changeVectorElementTypeToInteger())
-    return 0;
+    return false;
 
   SDLoc DL(Load);
   SDValue Ops[] = {
@@ -1074,39 +1128,41 @@ SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
   };
   SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops);
   ReplaceUses(SDValue(Load, 1), SDValue(Res, 1));
-  return Res;
+  ReplaceNode(N, Res);
+  return true;
 }
 
-SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
+bool SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
   SDValue Value = Store->getValue();
   if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
-    return 0;
+    return false;
   if (Store->getMemoryVT().getSizeInBits() !=
       Value.getValueType().getSizeInBits())
-    return 0;
+    return false;
 
   SDValue ElemV = Value.getOperand(1);
   auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
   if (!ElemN)
-    return 0;
+    return false;
 
   SDValue Vec = Value.getOperand(0);
   EVT VT = Vec.getValueType();
   unsigned Elem = ElemN->getZExtValue();
   if (Elem >= VT.getVectorNumElements())
-    return 0;
+    return false;
 
   SDValue Base, Disp, Index;
   if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) ||
       Index.getValueType() != VT.changeVectorElementTypeToInteger())
-    return 0;
+    return false;
 
   SDLoc DL(Store);
   SDValue Ops[] = {
     Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32),
     Store->getChain()
   };
-  return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+  ReplaceNode(Store, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
+  return true;
 }
 
 bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
@@ -1167,7 +1223,7 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
   return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB);
 }
 
-SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
+void SystemZDAGToDAGISel::Select(SDNode *Node) {
   // Dump information about the Node being selected
   DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
 
@@ -1175,43 +1231,47 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
   if (Node->isMachineOpcode()) {
     DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
     Node->setNodeId(-1);
-    return nullptr;
+    return;
   }
 
   unsigned Opcode = Node->getOpcode();
-  SDNode *ResNode = nullptr;
   switch (Opcode) {
   case ISD::OR:
     if (Node->getOperand(1).getOpcode() != ISD::Constant)
-      ResNode = tryRxSBG(Node, SystemZ::ROSBG);
+      if (tryRxSBG(Node, SystemZ::ROSBG))
+        return;
     goto or_xor;
 
   case ISD::XOR:
     if (Node->getOperand(1).getOpcode() != ISD::Constant)
-      ResNode = tryRxSBG(Node, SystemZ::RXSBG);
+      if (tryRxSBG(Node, SystemZ::RXSBG))
+        return;
     // Fall through.
   or_xor:
     // If this is a 64-bit operation in which both 32-bit halves are nonzero,
     // split the operation into two.
-    if (!ResNode && Node->getValueType(0) == MVT::i64)
+    if (Node->getValueType(0) == MVT::i64)
       if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
         uint64_t Val = Op1->getZExtValue();
-        if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val))
-          Node = splitLargeImmediate(Opcode, Node, Node->getOperand(0),
-                                     Val - uint32_t(Val), uint32_t(Val));
+        if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {
+          splitLargeImmediate(Opcode, Node, Node->getOperand(0),
+                              Val - uint32_t(Val), uint32_t(Val));
+          return;
+        }
       }
     break;
 
   case ISD::AND:
     if (Node->getOperand(1).getOpcode() != ISD::Constant)
-      ResNode = tryRxSBG(Node, SystemZ::RNSBG);
+      if (tryRxSBG(Node, SystemZ::RNSBG))
+        return;
     // Fall through.
   case ISD::ROTL:
   case ISD::SHL:
   case ISD::SRL:
   case ISD::ZERO_EXTEND:
-    if (!ResNode)
-      ResNode = tryRISBGZero(Node);
+    if (tryRISBGZero(Node))
+      return;
     break;
 
   case ISD::Constant:
@@ -1219,9 +1279,11 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
     // LLIHF and LGFI, split it into two 32-bit pieces.
     if (Node->getValueType(0) == MVT::i64) {
       uint64_t Val = cast<ConstantSDNode>(Node)->getZExtValue();
-      if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val))
-        Node = splitLargeImmediate(ISD::OR, Node, SDValue(),
-                                   Val - uint32_t(Val), uint32_t(Val));
+      if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val)) {
+        splitLargeImmediate(ISD::OR, Node, SDValue(), Val - uint32_t(Val),
+                            uint32_t(Val));
+        return;
+      }
     }
     break;
 
@@ -1249,63 +1311,75 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
   case ISD::INSERT_VECTOR_ELT: {
     EVT VT = Node->getValueType(0);
     unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
-    if (ElemBitSize == 32)
-      ResNode = tryGather(Node, SystemZ::VGEF);
-    else if (ElemBitSize == 64)
-      ResNode = tryGather(Node, SystemZ::VGEG);
+    if (ElemBitSize == 32) {
+      if (tryGather(Node, SystemZ::VGEF))
+        return;
+    } else if (ElemBitSize == 64) {
+      if (tryGather(Node, SystemZ::VGEG))
+        return;
+    }
     break;
   }
 
   case ISD::STORE: {
     auto *Store = cast<StoreSDNode>(Node);
     unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits();
-    if (ElemBitSize == 32)
-      ResNode = tryScatter(Store, SystemZ::VSCEF);
-    else if (ElemBitSize == 64)
-      ResNode = tryScatter(Store, SystemZ::VSCEG);
+    if (ElemBitSize == 32) {
+      if (tryScatter(Store, SystemZ::VSCEF))
+        return;
+    } else if (ElemBitSize == 64) {
+      if (tryScatter(Store, SystemZ::VSCEG))
+        return;
+    }
     break;
   }
   }
 
-  // Select the default instruction
-  if (!ResNode)
-    ResNode = SelectCode(Node);
-
-  DEBUG(errs() << "=> ";
-        if (ResNode == nullptr || ResNode == Node)
-          Node->dump(CurDAG);
-        else
-          ResNode->dump(CurDAG);
-        errs() << "\n";
-        );
-  return ResNode;
+  SelectCode(Node);
 }
 
 bool SystemZDAGToDAGISel::
 SelectInlineAsmMemoryOperand(const SDValue &Op,
                              unsigned ConstraintID,
                              std::vector<SDValue> &OutOps) {
+  SystemZAddressingMode::AddrForm Form;
+  SystemZAddressingMode::DispRange DispRange;
+  SDValue Base, Disp, Index;
+
   switch(ConstraintID) {
   default:
     llvm_unreachable("Unexpected asm memory constraint");
   case InlineAsm::Constraint_i:
-  case InlineAsm::Constraint_m:
   case InlineAsm::Constraint_Q:
+    // Accept an address with a short displacement, but no index.
+    Form = SystemZAddressingMode::FormBD;
+    DispRange = SystemZAddressingMode::Disp12Only;
+    break;
   case InlineAsm::Constraint_R:
+    // Accept an address with a short displacement and an index.
+    Form = SystemZAddressingMode::FormBDXNormal;
+    DispRange = SystemZAddressingMode::Disp12Only;
+    break;
   case InlineAsm::Constraint_S:
+    // Accept an address with a long displacement, but no index.
+    Form = SystemZAddressingMode::FormBD;
+    DispRange = SystemZAddressingMode::Disp20Only;
+    break;
   case InlineAsm::Constraint_T:
-    // Accept addresses with short displacements, which are compatible
-    // with Q, R, S and T.  But keep the index operand for future expansion.
-    SDValue Base, Disp, Index;
-    if (selectBDXAddr(SystemZAddressingMode::FormBD,
-                      SystemZAddressingMode::Disp12Only,
-                      Op, Base, Disp, Index)) {
-      OutOps.push_back(Base);
-      OutOps.push_back(Disp);
-      OutOps.push_back(Index);
-      return false;
-    }
+  case InlineAsm::Constraint_m:
+    // Accept an address with a long displacement and an index.
+    // m works the same as T, as this is the most general case.
+    Form = SystemZAddressingMode::FormBDXNormal;
+    DispRange = SystemZAddressingMode::Disp20Only;
     break;
   }
+
+  if (selectBDXAddr(Form, DispRange, Op, Base, Disp, Index)) {
+    OutOps.push_back(Base);
+    OutOps.push_back(Disp);
+    OutOps.push_back(Index);
+    return false;
+  }
+
   return true;
 }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index b0a612764636..14991bbbd365 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -184,8 +184,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
 
       // No special instructions for these.
       setOperationAction(ISD::CTTZ,            VT, Expand);
-      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
-      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::ROTR,            VT, Expand);
 
       // Use *MUL_LOHI where possible instead of MULH*.
@@ -216,6 +214,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
   setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Custom);
 
+  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+  // Traps are legal, as we will convert them to "j .+2".
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
   // z10 has instructions for signed but not unsigned FP conversion.
   // Handle unsigned 32-bit types as signed 64-bit types.
   if (!Subtarget.hasFPExtension()) {
@@ -253,6 +256,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   // We need to handle dynamic allocations specially because of the
   // 160-byte area at the bottom of the stack.
   setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
+  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
 
   // Use custom expanders so that we can force the function to use
   // a frame pointer.
@@ -310,8 +314,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::CTPOP, VT, Custom);
       setOperationAction(ISD::CTTZ, VT, Legal);
       setOperationAction(ISD::CTLZ, VT, Legal);
-      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
-      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
 
       // Convert a GPR scalar to a vector by inserting it into element 0.
       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
@@ -437,6 +439,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::FP_ROUND);
+  setTargetDAGCombine(ISD::BSWAP);
+  setTargetDAGCombine(ISD::SHL);
+  setTargetDAGCombine(ISD::SRA);
+  setTargetDAGCombine(ISD::SRL);
+  setTargetDAGCombine(ISD::ROTL);
 
   // Handle intrinsics.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -799,7 +806,7 @@ static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
 // Value is a value that has been passed to us in the location described by VA
 // (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
 // any loads onto Chain.
-static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
                                    CCValAssign &VA, SDValue Chain,
                                    SDValue Value) {
   // If the argument has been promoted from a smaller type, insert an
@@ -813,16 +820,12 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
 
   if (VA.isExtInLoc())
     Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
-  else if (VA.getLocInfo() == CCValAssign::Indirect)
-    Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
-                        MachinePointerInfo(), false, false, false, 0);
   else if (VA.getLocInfo() == CCValAssign::BCvt) {
     // If this is a short vector argument loaded from the stack,
     // extend from i64 to full vector size and then bitcast.
     assert(VA.getLocVT() == MVT::i64);
     assert(VA.getValVT().isVector());
-    Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
-                        Value, DAG.getUNDEF(MVT::i64));
+    Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
     Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
   } else
     assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
@@ -832,7 +835,7 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
 // Value is a value of type VA.getValVT() that we need to copy into
 // the location described by VA.  Return a copy of Value converted to
 // VA.getValVT().  The caller is responsible for handling indirect values.
-static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
                                    CCValAssign &VA, SDValue Value) {
   switch (VA.getLocInfo()) {
   case CCValAssign::SExt:
@@ -856,11 +859,10 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
   }
 }
 
-SDValue SystemZTargetLowering::
-LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
-                     const SmallVectorImpl<ISD::InputArg> &Ins,
-                     SDLoc DL, SelectionDAG &DAG,
-                     SmallVectorImpl<SDValue> &InVals) const {
+SDValue SystemZTargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -868,6 +870,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
       MF.getInfo<SystemZMachineFunctionInfo>();
   auto *TFL =
       static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   // Detect unsupported vector argument types.
   if (Subtarget.hasVector())
@@ -930,19 +933,34 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
       // Create the SelectionDAG nodes corresponding to a load
       // from this parameter.  Unpromoted ints and floats are
       // passed as right-justified 8-byte values.
-      EVT PtrVT = getPointerTy(DAG.getDataLayout());
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
         FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
                           DAG.getIntPtrConstant(4, DL));
       ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
-                             MachinePointerInfo::getFixedStack(MF, FI), false,
-                             false, false, 0);
+                             MachinePointerInfo::getFixedStack(MF, FI));
     }
 
     // Convert the value of the argument register into the value that's
     // being passed.
-    InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
+    if (VA.getLocInfo() == CCValAssign::Indirect) {
+      InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
+                                   MachinePointerInfo()));
+      // If the original argument was split (e.g. i128), we need
+      // to load all parts of it here (using the same address).
+      unsigned ArgIndex = Ins[I].OrigArgIndex;
+      assert (Ins[I].PartOffset == 0);
+      while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
+        CCValAssign &PartVA = ArgLocs[I + 1];
+        unsigned PartOffset = Ins[I + 1].PartOffset;
+        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
+                                      DAG.getIntPtrConstant(PartOffset, DL));
+        InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
+                                     MachinePointerInfo()));
+        ++I;
+      }
+    } else
+      InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
   }
 
   if (IsVarArg) {
@@ -973,8 +991,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
                                      &SystemZ::FP64BitRegClass);
         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
         MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
-                                 MachinePointerInfo::getFixedStack(MF, FI),
-                                 false, false, 0);
+                                 MachinePointerInfo::getFixedStack(MF, FI));
       }
       // Join the stores, which are independent of one another.
       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
@@ -987,9 +1004,11 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
 }
 
 static bool canUseSiblingCall(const CCState &ArgCCInfo,
-                              SmallVectorImpl<CCValAssign> &ArgLocs) {
+                              SmallVectorImpl<CCValAssign> &ArgLocs,
+                              SmallVectorImpl<ISD::OutputArg> &Outs) {
   // Punt if there are any indirect or stack arguments, or if the call
-  // needs the call-saved argument register R6.
+  // needs the callee-saved argument register R6, or if the call uses
+  // the callee-saved register arguments SwiftSelf and SwiftError.
   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
     CCValAssign &VA = ArgLocs[I];
     if (VA.getLocInfo() == CCValAssign::Indirect)
@@ -999,6 +1018,8 @@ static bool canUseSiblingCall(const CCState &ArgCCInfo,
     unsigned Reg = VA.getLocReg();
     if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
       return false;
+    if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
+      return false;
   }
   return true;
 }
@@ -1032,7 +1053,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // We don't support GuaranteedTailCallOpt, only automatically-detected
   // sibling calls.
-  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs))
+  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
     IsTailCall = false;
 
   // Get a count of how many bytes are to be pushed on the stack.
@@ -1054,11 +1075,25 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
     if (VA.getLocInfo() == CCValAssign::Indirect) {
       // Store the argument in a stack slot and pass its address.
-      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+      SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
-      MemOpChains.push_back(DAG.getStore(
-          Chain, DL, ArgValue, SpillSlot,
-          MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
+      MemOpChains.push_back(
+          DAG.getStore(Chain, DL, ArgValue, SpillSlot,
+                       MachinePointerInfo::getFixedStack(MF, FI)));
+      // If the original argument was split (e.g. i128), we need
+      // to store all parts of it here (and pass just one address).
+      unsigned ArgIndex = Outs[I].OrigArgIndex;
+      assert (Outs[I].PartOffset == 0);
+      while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
+        SDValue PartValue = OutVals[I + 1];
+        unsigned PartOffset = Outs[I + 1].PartOffset;
+        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
+                                      DAG.getIntPtrConstant(PartOffset, DL));
+        MemOpChains.push_back(
+            DAG.getStore(Chain, DL, PartValue, Address,
+                         MachinePointerInfo::getFixedStack(MF, FI)));
+        ++I;
+      }
       ArgValue = SpillSlot;
     } else
       ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
@@ -1080,9 +1115,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
                                     DAG.getIntPtrConstant(Offset, DL));
 
       // Emit the store.
-      MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
-                                         MachinePointerInfo(),
-                                         false, false, 0));
+      MemOpChains.push_back(
+          DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
     }
   }
 
@@ -1180,17 +1214,23 @@ CanLowerReturn(CallingConv::ID CallConv,
   if (Subtarget.hasVector())
     VerifyVectorTypes(Outs);
 
+  // Special case that we cannot easily detect in RetCC_SystemZ since
+  // i128 is not a legal type.
+  for (auto &Out : Outs)
+    if (Out.ArgVT == MVT::i128)
+      return false;
+
   SmallVector<CCValAssign, 16> RetLocs;
   CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
   return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
 }
 
 SDValue
-SystemZTargetLowering::LowerReturn(SDValue Chain,
-                                   CallingConv::ID CallConv, bool IsVarArg,
+SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+                                   bool IsVarArg,
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
                                    const SmallVectorImpl<SDValue> &OutVals,
-                                   SDLoc DL, SelectionDAG &DAG) const {
+                                   const SDLoc &DL, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
 
   // Detect unsupported vector return types.
@@ -1235,8 +1275,8 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
   return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
 }
 
-SDValue SystemZTargetLowering::
-prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
+SDValue SystemZTargetLowering::prepareVolatileOrAtomicLoad(
+    SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const {
   return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
 }
 
@@ -1399,6 +1439,11 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
     CCValid = SystemZ::CCMASK_VCMP;
     return true;
 
+  case Intrinsic::s390_tdc:
+    Opcode = SystemZISD::TDC;
+    CCValid = SystemZ::CCMASK_TDC;
+    return true;
+
   default:
     return false;
   }
@@ -1538,7 +1583,7 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
 
 // If C can be converted to a comparison against zero, adjust the operands
 // as necessary.
-static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
   if (C.ICmpType == SystemZICMP::UnsignedOnly)
     return;
 
@@ -1558,7 +1603,8 @@ static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
 
 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
 // adjust the operands as necessary.
-static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
+                             Comparison &C) {
   // For us to make any changes, it must a comparison between a single-use
   // load and a constant.
   if (!C.Op0.hasOneUse() ||
@@ -1614,11 +1660,10 @@ static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
                               ISD::ZEXTLOAD);
   if (C.Op0.getValueType() != MVT::i32 ||
       Load->getExtensionType() != ExtType)
-    C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32,
-                           Load->getChain(), Load->getBasePtr(),
-                           Load->getPointerInfo(), Load->getMemoryVT(),
-                           Load->isVolatile(), Load->isNonTemporal(),
-                           Load->isInvariant(), Load->getAlignment());
+    C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
+                           Load->getBasePtr(), Load->getPointerInfo(),
+                           Load->getMemoryVT(), Load->getAlignment(),
+                           Load->getMemOperand()->getFlags());
 
   // Make sure that the second operand is an i32 with the right value.
   if (C.Op1.getValueType() != MVT::i32 ||
@@ -1719,7 +1764,8 @@ static unsigned reverseCCMask(unsigned CCMask) {
 // Check whether C tests for equality between X and Y and whether X - Y
 // or Y - X is also computed.  In that case it's better to compare the
 // result of the subtraction against zero.
-static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
+                                 Comparison &C) {
   if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
       C.CCMask == SystemZ::CCMASK_CMP_NE) {
     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
@@ -1784,7 +1830,8 @@ static void adjustForLTGFR(Comparison &C) {
 // If C compares the truncation of an extending load, try to compare
 // the untruncated value instead.  This exposes more opportunities to
 // reuse CC.
-static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
+                               Comparison &C) {
   if (C.Op0.getOpcode() == ISD::TRUNCATE &&
       C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
       C.Op1.getOpcode() == ISD::Constant &&
@@ -1915,7 +1962,8 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
 
 // See whether C can be implemented as a TEST UNDER MASK instruction.
 // Update the arguments with the TM version if so.
-static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
+                                   Comparison &C) {
   // Check that we have a comparison with a constant.
   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
   if (!ConstOp1)
@@ -2036,7 +2084,7 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
 
 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
-                         ISD::CondCode Cond, SDLoc DL) {
+                         ISD::CondCode Cond, const SDLoc &DL) {
   if (CmpOp1.getOpcode() == ISD::Constant) {
     uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
     unsigned Opcode, CCValid;
@@ -2089,7 +2137,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
 }
 
 // Emit the comparison instruction described by C.
-static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
   if (!C.Op1.getNode()) {
     SDValue Op;
     switch (C.Op0.getOpcode()) {
@@ -2119,9 +2167,9 @@ static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
 // 64 bits.  Extend is the extension type to use.  Store the high part
 // in Hi and the low part in Lo.
-static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
-                            unsigned Extend, SDValue Op0, SDValue Op1,
-                            SDValue &Hi, SDValue &Lo) {
+static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
+                            SDValue Op0, SDValue Op1, SDValue &Hi,
+                            SDValue &Lo) {
   Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
   Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
   SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
@@ -2136,10 +2184,9 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
 // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
 // on the extended Op0 and (unextended) Op1.  Store the even register result
 // in Even and the odd register result in Odd.
-static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
-                             unsigned Extend, unsigned Opcode,
-                             SDValue Op0, SDValue Op1,
-                             SDValue &Even, SDValue &Odd) {
+static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+                             unsigned Extend, unsigned Opcode, SDValue Op0,
+                             SDValue Op1, SDValue &Even, SDValue &Odd) {
   SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
   SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
                                SDValue(In128, 0), Op1);
@@ -2151,7 +2198,7 @@ static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
 // Return an i32 value that is 1 if the CC value produced by Glue is
 // in the mask CCMask and 0 otherwise.  CC is known to have a value
 // in CCValid, so other values can be ignored.
-static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue,
+static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue,
                          unsigned CCValid, unsigned CCMask) {
   IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
   SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
@@ -2220,7 +2267,7 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
 
 // Return a v2f64 that contains the extended form of elements Start and Start+1
 // of v4f32 value Op.
-static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
+static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
                                   SDValue Op) {
   int Mask[] = { Start, -1, Start + 1, -1 };
   Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
@@ -2229,7 +2276,7 @@ static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
 
 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
 // producing a result of type VT.
-static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
+static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL,
                             EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
   // There is no hardware support for v4f32, so extend the vector into
   // two v2f64s and compare those.
@@ -2247,7 +2294,7 @@ static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
 
 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
 // an integer mask of type VT.
-static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
+static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
                                 ISD::CondCode CC, SDValue CmpOp0,
                                 SDValue CmpOp1) {
   bool IsFP = CmpOp0.getValueType().isFloatingPoint();
@@ -2342,7 +2389,7 @@ static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
 }
 
 // Return the absolute or negative absolute of Op; IsNegative decides which.
-static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op,
+static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
                            bool IsNegative) {
   Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
   if (IsNegative)
@@ -2414,11 +2461,10 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
   const GlobalValue *GV = Node->getGlobal();
   int64_t Offset = Node->getOffset();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  Reloc::Model RM = DAG.getTarget().getRelocationModel();
   CodeModel::Model CM = DAG.getTarget().getCodeModel();
 
   SDValue Result;
-  if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
+  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
     // Assign anchors at 1<<12 byte boundaries.
     uint64_t Anchor = Offset & ~uint64_t(0xfff);
     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
@@ -2435,8 +2481,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
     Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
-                         MachinePointerInfo::getGOT(DAG.getMachineFunction()),
-                         false, false, false, 0);
+                         MachinePointerInfo::getGOT(DAG.getMachineFunction()));
   }
 
   // If there was a non-zero offset that we didn't fold, create an explicit
@@ -2495,14 +2540,9 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
   return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
 }
 
-SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
-                                                     SelectionDAG &DAG) const {
-  if (DAG.getTarget().Options.EmulatedTLS)
-    return LowerToTLSEmulatedModel(Node, DAG);
-  SDLoc DL(Node);
-  const GlobalValue *GV = Node->getGlobal();
+SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
+                                                  SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
 
   // The high part of the thread pointer is in access register 0.
   SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
@@ -2517,7 +2557,19 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
   // Merge them into a single 64-bit address.
   SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
                                     DAG.getConstant(32, DL, PtrVT));
-  SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
+  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
+}
+
+SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
+                                                     SelectionDAG &DAG) const {
+  if (DAG.getTarget().Options.EmulatedTLS)
+    return LowerToTLSEmulatedModel(Node, DAG);
+  SDLoc DL(Node);
+  const GlobalValue *GV = Node->getGlobal();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
+
+  SDValue TP = lowerThreadPointer(DL, DAG);
 
   // Get the offset of GA from the thread pointer, based on the TLS model.
   SDValue Offset;
@@ -2530,8 +2582,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
       Offset = DAG.getLoad(
           PtrVT, DL, DAG.getEntryNode(), Offset,
-          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
-          false, false, 0);
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
 
       // Call __tls_get_offset to retrieve the offset.
       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
@@ -2546,8 +2597,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
       Offset = DAG.getLoad(
           PtrVT, DL, DAG.getEntryNode(), Offset,
-          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
-          false, false, 0);
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
 
       // Call __tls_get_offset to retrieve the module base offset.
       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
@@ -2565,8 +2615,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
       SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
       DTPOffset = DAG.getLoad(
           PtrVT, DL, DAG.getEntryNode(), DTPOffset,
-          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
-          false, false, 0);
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
 
       Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
       break;
@@ -2577,9 +2626,9 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
       Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
                                           SystemZII::MO_INDNTPOFF);
       Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
-      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
-                           MachinePointerInfo::getGOT(DAG.getMachineFunction()),
-                           false, false, false, 0);
+      Offset =
+          DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
+                      MachinePointerInfo::getGOT(DAG.getMachineFunction()));
       break;
     }
 
@@ -2591,8 +2640,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
       Offset = DAG.getLoad(
           PtrVT, DL, DAG.getEntryNode(), Offset,
-          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
-          false, false, 0);
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
       break;
     }
   }
@@ -2640,6 +2688,57 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
   return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
 }
 
+SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  SDLoc DL(Op);
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+  // If the back chain frame index has not been allocated yet, do so.
+  SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>();
+  int BackChainIdx = FI->getFramePointerSaveIndex();
+  if (!BackChainIdx) {
+    // By definition, the frame address is the address of the back chain.
+    BackChainIdx = MFI->CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
+    FI->setFramePointerSaveIndex(BackChainIdx);
+  }
+  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
+
+  // FIXME The frontend should detect this case.
+  if (Depth > 0) {
+    report_fatal_error("Unsupported stack frame traversal count");
+  }
+
+  return BackChain;
+}
+
+SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+    return SDValue();
+
+  SDLoc DL(Op);
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+  // FIXME The frontend should detect this case.
+  if (Depth > 0) {
+    report_fatal_error("Unsupported stack frame traversal count");
+  }
+
+  // Return R14D, which has the return address. Mark it an implicit live-in.
+  unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
+  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
+}
+
 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
                                             SelectionDAG &DAG) const {
   SDLoc DL(Op);
@@ -2715,8 +2814,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
       FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
                               DAG.getIntPtrConstant(Offset, DL));
     MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
-                             MachinePointerInfo(SV, Offset),
-                             false, false, 0);
+                             MachinePointerInfo(SV, Offset));
     Offset += 8;
   }
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
@@ -2740,8 +2838,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
 SDValue SystemZTargetLowering::
 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
-  bool RealignOpt = !DAG.getMachineFunction().getFunction()->
-    hasFnAttribute("no-realign-stack");
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool RealignOpt = !MF.getFunction()-> hasFnAttribute("no-realign-stack");
+  bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
 
   SDValue Chain = Op.getOperand(0);
   SDValue Size  = Op.getOperand(1);
@@ -2763,10 +2862,15 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
   // Get a reference to the stack pointer.
   SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
 
+  // If we need a backchain, save it now.
+  SDValue Backchain;
+  if (StoreBackchain)
+    Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
+
   // Add extra space for alignment if needed.
   if (ExtraAlignSpace)
     NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
-                              DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 
+                              DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
 
   // Get the new stack pointer value.
   SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
@@ -2790,10 +2894,20 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
                   DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
   }
 
+  if (StoreBackchain)
+    Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
+
   SDValue Ops[2] = { Result, Chain };
   return DAG.getMergeValues(Ops, DL);
 }
 
+SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
+    SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+
+  return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
+}
+
 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
                                               SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
@@ -3031,6 +3145,27 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
   return Op;
 }
 
+SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+    cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+  SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
+    cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+  // The only fence that needs an instruction is a sequentially-consistent
+  // cross-thread fence.
+  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
+      FenceScope == CrossThread) {
+    return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
+                                      Op.getOperand(0)),
+                   0);
+  }
+
+  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
 // Op is an atomic load.  Lower it into a normal volatile load.
 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
                                                 SelectionDAG &DAG) const {
@@ -3220,8 +3355,24 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
                                                  SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
-  return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op),
-                          SystemZ::R15D, Op.getOperand(1));
+  bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
+
+  SDValue Chain = Op.getOperand(0);
+  SDValue NewSP = Op.getOperand(1);
+  SDValue Backchain;
+  SDLoc DL(Op);
+
+  if (StoreBackchain) {
+    SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
+    Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
+  }
+
+  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
+
+  if (StoreBackchain)
+    Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
+
+  return Chain;
 }
 
 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
@@ -3286,6 +3437,9 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
 
   unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   switch (Id) {
+  case Intrinsic::thread_pointer:
+    return lowerThreadPointer(SDLoc(Op), DAG);
+
   case Intrinsic::s390_vpdi:
     return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
@@ -3553,7 +3707,7 @@ static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
 
 // Create a node that performs P on operands Op0 and Op1, casting the
 // operands to the appropriate type.  The type of the result is determined by P.
-static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL,
+static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
                               const Permute &P, SDValue Op0, SDValue Op1) {
   // VPDI (PERMUTE_DWORDS) always operates on v2i64s.  The input
   // elements of a PACK are twice as wide as the outputs.
@@ -3582,7 +3736,8 @@ static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL,
 // Bytes is a VPERM-like permute vector, except that -1 is used for
 // undefined bytes.  Implement it on operands Ops[0] and Ops[1] using
 // VSLDI or VPERM.
-static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops,
+static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
+                                     SDValue *Ops,
                                      const SmallVectorImpl<int> &Bytes) {
   for (unsigned I = 0; I < 2; ++I)
     Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
@@ -3600,7 +3755,7 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops,
       IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
     else
       IndexNodes[I] = DAG.getUNDEF(MVT::i32);
-  SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes);
+  SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
   return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
 }
 
@@ -3610,7 +3765,7 @@ struct GeneralShuffle {
   GeneralShuffle(EVT vt) : VT(vt) {}
   void addUndef();
   void add(SDValue, unsigned);
-  SDValue getNode(SelectionDAG &, SDLoc);
+  SDValue getNode(SelectionDAG &, const SDLoc &);
 
   // The operands of the shuffle.
   SmallVector<SDValue, SystemZ::VectorBytes> Ops;
@@ -3667,7 +3822,7 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
       }
       Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
       Byte = unsigned(NewByte) % SystemZ::VectorBytes;
-    } else if (Op.getOpcode() == ISD::UNDEF) {
+    } else if (Op.isUndef()) {
       addUndef();
       return;
     } else
@@ -3689,7 +3844,7 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
 }
 
 // Return SDNodes for the completed shuffle.
-SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
+SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
   assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
 
   if (Ops.size() == 0)
@@ -3770,37 +3925,37 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
 static bool isScalarToVector(SDValue Op) {
   for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
-    if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
+    if (!Op.getOperand(I).isUndef())
       return false;
   return true;
 }
 
 // Return a vector of type VT that contains Value in the first element.
 // The other elements don't matter.
-static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
+static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
                                    SDValue Value) {
   // If we have a constant, replicate it to all elements and let the
   // BUILD_VECTOR lowering take care of it.
   if (Value.getOpcode() == ISD::Constant ||
       Value.getOpcode() == ISD::ConstantFP) {
     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
-    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+    return DAG.getBuildVector(VT, DL, Ops);
   }
-  if (Value.getOpcode() == ISD::UNDEF)
+  if (Value.isUndef())
     return DAG.getUNDEF(VT);
   return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
 }
 
 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
 // element 1.  Used for cases in which replication is cheap.
-static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
+static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
                                  SDValue Op0, SDValue Op1) {
-  if (Op0.getOpcode() == ISD::UNDEF) {
-    if (Op1.getOpcode() == ISD::UNDEF)
+  if (Op0.isUndef()) {
+    if (Op1.isUndef())
       return DAG.getUNDEF(VT);
     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
   }
-  if (Op1.getOpcode() == ISD::UNDEF)
+  if (Op1.isUndef())
     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
   return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
                      buildScalarToVector(DAG, DL, VT, Op0),
@@ -3809,15 +3964,15 @@ static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
 
 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
 // vector for them.
-static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0,
+static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
                           SDValue Op1) {
-  if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF)
+  if (Op0.isUndef() && Op1.isUndef())
     return DAG.getUNDEF(MVT::v2i64);
   // If one of the two inputs is undefined then replicate the other one,
   // in order to avoid using another register unnecessarily.
-  if (Op0.getOpcode() == ISD::UNDEF)
+  if (Op0.isUndef())
     Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
-  else if (Op1.getOpcode() == ISD::UNDEF)
+  else if (Op1.isUndef())
     Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
   else {
     Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
@@ -3834,7 +3989,7 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
   unsigned BytesPerElement = ElemVT.getStoreSize();
   for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
     SDValue Op = BVN->getOperand(I);
-    if (Op.getOpcode() != ISD::UNDEF) {
+    if (!Op.isUndef()) {
       uint64_t Value;
       if (Op.getOpcode() == ISD::Constant)
         Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
@@ -3862,7 +4017,7 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
 // an empty value.
 static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
                                        const SystemZInstrInfo *TII,
-                                       SDLoc DL, EVT VT, uint64_t Value,
+                                       const SDLoc &DL, EVT VT, uint64_t Value,
                                        unsigned BitsPerElement) {
   // Signed 16-bit values can be replicated using VREPI.
   int64_t SignedValue = SignExtend64(Value, BitsPerElement);
@@ -3919,7 +4074,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
       unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
       GS.add(Op.getOperand(0), Elem);
       FoundOne = true;
-    } else if (Op.getOpcode() == ISD::UNDEF) {
+    } else if (Op.isUndef()) {
       GS.addUndef();
     } else {
       GS.add(SDValue(), ResidueOps.size());
@@ -3937,7 +4092,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
       ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
     for (auto &Op : GS.Ops) {
       if (!Op.getNode()) {
-        Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
+        Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
         break;
       }
     }
@@ -3946,14 +4101,14 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
 }
 
 // Combine GPR scalar values Elems into a vector of type VT.
-static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
+static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
                            SmallVectorImpl<SDValue> &Elems) {
   // See whether there is a single replicated value.
   SDValue Single;
   unsigned int NumElements = Elems.size();
   unsigned int Count = 0;
   for (auto Elem : Elems) {
-    if (Elem.getOpcode() != ISD::UNDEF) {
+    if (!Elem.isUndef()) {
       if (!Single.getNode())
         Single = Elem;
       else if (Elem != Single) {
@@ -3998,9 +4153,9 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
     SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
     SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
     // Avoid unnecessary undefs by reusing the other operand.
-    if (Op01.getOpcode() == ISD::UNDEF)
+    if (Op01.isUndef())
       Op01 = Op23;
-    else if (Op23.getOpcode() == ISD::UNDEF)
+    else if (Op23.isUndef())
       Op23 = Op01;
     // Merging identical replications is a no-op.
     if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
@@ -4034,7 +4189,7 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
     for (unsigned I = 0; I < NumElements; ++I)
       if (!Constants[I].getNode())
         Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
-    Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants);
+    Result = DAG.getBuildVector(VT, DL, Constants);
   } else {
     // Otherwise try to use VLVGP to start the sequence in order to
     // avoid a false dependency on any previous contents of the vector
@@ -4042,8 +4197,8 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
     // is defined.
     unsigned I1 = NumElements / 2 - 1;
     unsigned I2 = NumElements - 1;
-    bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF);
-    bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF);
+    bool Def1 = !Elems[I1].isUndef();
+    bool Def2 = !Elems[I2].isUndef();
     if (Def1 || Def2) {
       SDValue Elem1 = Elems[Def1 ? I1 : I2];
       SDValue Elem2 = Elems[Def2 ? I2 : I1];
@@ -4057,7 +4212,7 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
 
   // Use VLVGx to insert the other elements.
   for (unsigned I = 0; I < NumElements; ++I)
-    if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF)
+    if (!Done[I] && !Elems[I].isUndef())
       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
                            DAG.getConstant(I, DL, MVT::i32));
   return Result;
@@ -4120,8 +4275,7 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
   }
 
   // See if we should use shuffles to construct the vector from other vectors.
-  SDValue Res = tryBuildVectorShuffle(DAG, BVN);
-  if (Res.getNode())
+  if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
     return Res;
 
   // Detect SCALAR_TO_VECTOR conversions.
@@ -4312,6 +4466,10 @@ SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
 SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
+  case ISD::FRAMEADDR:
+    return lowerFRAMEADDR(Op, DAG);
+  case ISD::RETURNADDR:
+    return lowerRETURNADDR(Op, DAG);
   case ISD::BR_CC:
     return lowerBR_CC(Op, DAG);
   case ISD::SELECT_CC:
@@ -4336,6 +4494,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerVACOPY(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC:
     return lowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::GET_DYNAMIC_AREA_OFFSET:
+    return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
   case ISD::SMUL_LOHI:
     return lowerSMUL_LOHI(Op, DAG);
   case ISD::UMUL_LOHI:
@@ -4348,12 +4508,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerOR(Op, DAG);
   case ISD::CTPOP:
     return lowerCTPOP(Op, DAG);
-  case ISD::CTLZ_ZERO_UNDEF:
-    return DAG.getNode(ISD::CTLZ, SDLoc(Op),
-                       Op.getValueType(), Op.getOperand(0));
-  case ISD::CTTZ_ZERO_UNDEF:
-    return DAG.getNode(ISD::CTTZ, SDLoc(Op),
-                       Op.getValueType(), Op.getOperand(0));
+  case ISD::ATOMIC_FENCE:
+    return lowerATOMIC_FENCE(Op, DAG);
   case ISD::ATOMIC_SWAP:
     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
   case ISD::ATOMIC_STORE:
@@ -4457,6 +4613,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(SEARCH_STRING);
     OPCODE(IPM);
     OPCODE(SERIALIZE);
+    OPCODE(MEMBARRIER);
     OPCODE(TBEGIN);
     OPCODE(TBEGIN_NOFLOAT);
     OPCODE(TEND);
@@ -4506,6 +4663,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(VISTR_CC);
     OPCODE(VSTRC_CC);
     OPCODE(VSTRCZ_CC);
+    OPCODE(TDC);
     OPCODE(ATOMIC_SWAPW);
     OPCODE(ATOMIC_LOADW_ADD);
     OPCODE(ATOMIC_LOADW_SUB);
@@ -4518,6 +4676,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(ATOMIC_LOADW_UMIN);
     OPCODE(ATOMIC_LOADW_UMAX);
     OPCODE(ATOMIC_CMP_SWAPW);
+    OPCODE(LRV);
+    OPCODE(STRV);
     OPCODE(PREFETCH);
   }
   return nullptr;
@@ -4535,8 +4695,9 @@ static bool canTreatAsByteVector(EVT VT) {
 // of the input vector and Index is the index (based on type VecVT) that
 // should be extracted.  Return the new extraction if a simplification
 // was possible or if Force is true.
-SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
-                                              SDValue Op, unsigned Index,
+SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
+                                              EVT VecVT, SDValue Op,
+                                              unsigned Index,
                                               DAGCombinerInfo &DCI,
                                               bool Force) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -4639,9 +4800,8 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
 
 // Optimize vector operations in scalar value Op on the basis that Op
 // is truncated to TruncVT.
-SDValue
-SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
-                                              DAGCombinerInfo &DCI) const {
+SDValue SystemZTargetLowering::combineTruncateExtract(
+    const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
   // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
   // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
   // of type TruncVT.
@@ -4675,145 +4835,295 @@ SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
   return SDValue();
 }
 
-SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
-                                                 DAGCombinerInfo &DCI) const {
+SDValue SystemZTargetLowering::combineSIGN_EXTEND(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  // Convert (sext (ashr (shl X, C1), C2)) to
+  // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
+  // cheap as narrower ones.
   SelectionDAG &DAG = DCI.DAG;
-  unsigned Opcode = N->getOpcode();
-  if (Opcode == ISD::SIGN_EXTEND) {
-    // Convert (sext (ashr (shl X, C1), C2)) to
-    // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
-    // cheap as narrower ones.
-    SDValue N0 = N->getOperand(0);
-    EVT VT = N->getValueType(0);
-    if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
-      auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-      SDValue Inner = N0.getOperand(0);
-      if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
-        if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
-          unsigned Extra = (VT.getSizeInBits() -
-                            N0.getValueType().getSizeInBits());
-          unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
-          unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
-          EVT ShiftVT = N0.getOperand(1).getValueType();
-          SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
-                                    Inner.getOperand(0));
-          SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
-                                    DAG.getConstant(NewShlAmt, SDLoc(Inner),
-                                                    ShiftVT));
-          return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
-                             DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
-        }
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
+    auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    SDValue Inner = N0.getOperand(0);
+    if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
+      if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
+        unsigned Extra = (VT.getSizeInBits() -
+                          N0.getValueType().getSizeInBits());
+        unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
+        unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
+        EVT ShiftVT = N0.getOperand(1).getValueType();
+        SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
+                                  Inner.getOperand(0));
+        SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
+                                  DAG.getConstant(NewShlAmt, SDLoc(Inner),
+                                                  ShiftVT));
+        return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
+                           DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
       }
     }
   }
-  if (Opcode == SystemZISD::MERGE_HIGH ||
-      Opcode == SystemZISD::MERGE_LOW) {
-    SDValue Op0 = N->getOperand(0);
-    SDValue Op1 = N->getOperand(1);
-    if (Op0.getOpcode() == ISD::BITCAST)
-      Op0 = Op0.getOperand(0);
-    if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
-        cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
-      // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
-      // for v4f32.
-      if (Op1 == N->getOperand(0))
-        return Op1;
-      // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
-      EVT VT = Op1.getValueType();
-      unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
-      if (ElemBytes <= 4) {
-        Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
-                  SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
-        EVT InVT = VT.changeVectorElementTypeToInteger();
-        EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
-                                     SystemZ::VectorBytes / ElemBytes / 2);
-        if (VT != InVT) {
-          Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
-          DCI.AddToWorklist(Op1.getNode());
-        }
-        SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
-        DCI.AddToWorklist(Op.getNode());
-        return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineMERGE(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  unsigned Opcode = N->getOpcode();
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op0.getOpcode() == ISD::BITCAST)
+    Op0 = Op0.getOperand(0);
+  if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
+      cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+    // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
+    // for v4f32.
+    if (Op1 == N->getOperand(0))
+      return Op1;
+    // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
+    EVT VT = Op1.getValueType();
+    unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
+    if (ElemBytes <= 4) {
+      Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
+                SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
+      EVT InVT = VT.changeVectorElementTypeToInteger();
+      EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
+                                   SystemZ::VectorBytes / ElemBytes / 2);
+      if (VT != InVT) {
+        Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
+        DCI.AddToWorklist(Op1.getNode());
       }
+      SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
+      DCI.AddToWorklist(Op.getNode());
+      return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
     }
   }
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSTORE(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  auto *SN = cast<StoreSDNode>(N);
+  auto &Op1 = N->getOperand(1);
+  EVT MemVT = SN->getMemoryVT();
   // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
   // for the extraction to be done on a vMiN value, so that we can use VSTE.
   // If X has wider elements then convert it to:
   // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
-  if (Opcode == ISD::STORE) {
-    auto *SN = cast<StoreSDNode>(N);
-    EVT MemVT = SN->getMemoryVT();
-    if (MemVT.isInteger()) {
-      SDValue Value = combineTruncateExtract(SDLoc(N), MemVT,
-                                             SN->getValue(), DCI);
-      if (Value.getNode()) {
-        DCI.AddToWorklist(Value.getNode());
-
-        // Rewrite the store with the new form of stored value.
-        return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
-                                 SN->getBasePtr(), SN->getMemoryVT(),
-                                 SN->getMemOperand());
-      }
+  if (MemVT.isInteger()) {
+    if (SDValue Value =
+            combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
+      DCI.AddToWorklist(Value.getNode());
+
+      // Rewrite the store with the new form of stored value.
+      return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
+                               SN->getBasePtr(), SN->getMemoryVT(),
+                               SN->getMemOperand());
     }
   }
-  // Try to simplify a vector extraction.
-  if (Opcode == ISD::EXTRACT_VECTOR_ELT) {
-    if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
-      SDValue Op0 = N->getOperand(0);
-      EVT VecVT = Op0.getValueType();
-      return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
-                            IndexN->getZExtValue(), DCI, false);
+  // Combine STORE (BSWAP) into STRVH/STRV/STRVG
+  // See comment in combineBSWAP about volatile accesses.
+  if (!SN->isVolatile() &&
+      Op1.getOpcode() == ISD::BSWAP &&
+      Op1.getNode()->hasOneUse() &&
+      (Op1.getValueType() == MVT::i16 ||
+       Op1.getValueType() == MVT::i32 ||
+       Op1.getValueType() == MVT::i64)) {
+
+      SDValue BSwapOp = Op1.getOperand(0);
+
+      if (BSwapOp.getValueType() == MVT::i16)
+        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
+
+      SDValue Ops[] = {
+        N->getOperand(0), BSwapOp, N->getOperand(2),
+        DAG.getValueType(Op1.getValueType())
+      };
+
+      return
+        DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
+                                Ops, MemVT, SN->getMemOperand());
     }
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  // Try to simplify a vector extraction.
+  if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+    SDValue Op0 = N->getOperand(0);
+    EVT VecVT = Op0.getValueType();
+    return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
+                          IndexN->getZExtValue(), DCI, false);
   }
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineJOIN_DWORDS(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
   // (join_dwords X, X) == (replicate X)
-  if (Opcode == SystemZISD::JOIN_DWORDS &&
-      N->getOperand(0) == N->getOperand(1))
+  if (N->getOperand(0) == N->getOperand(1))
     return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
                        N->getOperand(0));
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineFP_ROUND(
+    SDNode *N, DAGCombinerInfo &DCI) const {
   // (fround (extract_vector_elt X 0))
   // (fround (extract_vector_elt X 1)) ->
   // (extract_vector_elt (VROUND X) 0)
   // (extract_vector_elt (VROUND X) 1)
   //
   // This is a special case since the target doesn't really support v2f32s.
-  if (Opcode == ISD::FP_ROUND) {
-    SDValue Op0 = N->getOperand(0);
-    if (N->getValueType(0) == MVT::f32 &&
-        Op0.hasOneUse() &&
-        Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-        Op0.getOperand(0).getValueType() == MVT::v2f64 &&
-        Op0.getOperand(1).getOpcode() == ISD::Constant &&
-        cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
-      SDValue Vec = Op0.getOperand(0);
-      for (auto *U : Vec->uses()) {
-        if (U != Op0.getNode() &&
-            U->hasOneUse() &&
-            U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-            U->getOperand(0) == Vec &&
-            U->getOperand(1).getOpcode() == ISD::Constant &&
-            cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
-          SDValue OtherRound = SDValue(*U->use_begin(), 0);
-          if (OtherRound.getOpcode() == ISD::FP_ROUND &&
-              OtherRound.getOperand(0) == SDValue(U, 0) &&
-              OtherRound.getValueType() == MVT::f32) {
-            SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
-                                         MVT::v4f32, Vec);
-            DCI.AddToWorklist(VRound.getNode());
-            SDValue Extract1 =
-              DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
-                          VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
-            DCI.AddToWorklist(Extract1.getNode());
-            DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
-            SDValue Extract0 =
-              DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
-                          VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
-            return Extract0;
-          }
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op0 = N->getOperand(0);
+  if (N->getValueType(0) == MVT::f32 &&
+      Op0.hasOneUse() &&
+      Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      Op0.getOperand(0).getValueType() == MVT::v2f64 &&
+      Op0.getOperand(1).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+    SDValue Vec = Op0.getOperand(0);
+    for (auto *U : Vec->uses()) {
+      if (U != Op0.getNode() &&
+          U->hasOneUse() &&
+          U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+          U->getOperand(0) == Vec &&
+          U->getOperand(1).getOpcode() == ISD::Constant &&
+          cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
+        SDValue OtherRound = SDValue(*U->use_begin(), 0);
+        if (OtherRound.getOpcode() == ISD::FP_ROUND &&
+            OtherRound.getOperand(0) == SDValue(U, 0) &&
+            OtherRound.getValueType() == MVT::f32) {
+          SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+                                       MVT::v4f32, Vec);
+          DCI.AddToWorklist(VRound.getNode());
+          SDValue Extract1 =
+            DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
+                        VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
+          DCI.AddToWorklist(Extract1.getNode());
+          DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+          SDValue Extract0 =
+            DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
+                        VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+          return Extract0;
+        }
+      }
+    }
+  }
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineBSWAP(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  // Combine BSWAP (LOAD) into LRVH/LRV/LRVG
+  // These loads are allowed to access memory multiple times, and so we must check
+  // that the loads are not volatile before performing the combine.
+  if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+      N->getOperand(0).hasOneUse() &&
+      (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
+       N->getValueType(0) == MVT::i64) &&
+       !cast<LoadSDNode>(N->getOperand(0))->isVolatile()) {
+      SDValue Load = N->getOperand(0);
+      LoadSDNode *LD = cast<LoadSDNode>(Load);
+
+      // Create the byte-swapping load.
+      SDValue Ops[] = {
+        LD->getChain(),    // Chain
+        LD->getBasePtr(),  // Ptr
+        DAG.getValueType(N->getValueType(0)) // VT
+      };
+      SDValue BSLoad =
+        DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
+                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+                                              MVT::i64 : MVT::i32, MVT::Other),
+                                Ops, LD->getMemoryVT(), LD->getMemOperand());
+
+      // If this is an i16 load, insert the truncate.
+      SDValue ResVal = BSLoad;
+      if (N->getValueType(0) == MVT::i16)
+        ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
+
+      // First, combine the bswap away.  This makes the value produced by the
+      // load dead.
+      DCI.CombineTo(N, ResVal);
+
+      // Next, combine the load away, we give it a bogus result value but a real
+      // chain result.  The result value is dead because the bswap is dead.
+      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+      // Return N so it doesn't get rechecked!
+      return SDValue(N, 0);
+    }
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSHIFTROT(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+
+  SelectionDAG &DAG = DCI.DAG;
+
+  // Shift/rotate instructions only use the last 6 bits of the second operand
+  // register. If the second operand is the result of an AND with an immediate
+  // value that has its last 6 bits set, we can safely remove the AND operation.
+  SDValue N1 = N->getOperand(1);
+  if (N1.getOpcode() == ISD::AND) {
+    auto *AndMask = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+
+    // The AND mask is constant
+    if (AndMask) {
+      auto AmtVal = AndMask->getZExtValue();
+
+      // Bottom 6 bits are set
+      if ((AmtVal & 0x3f) == 0x3f) {
+        SDValue AndOp = N1->getOperand(0);
+
+        // This is the only use, so remove the node
+        if (N1.hasOneUse()) {
+          // Combine the AND away
+          DCI.CombineTo(N1.getNode(), AndOp);
+
+          // Return N so it isn't rechecked
+          return SDValue(N, 0);
+
+        // The node will be reused, so create a new node for this one use
+        } else {
+          SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
+                                        N->getValueType(0), N->getOperand(0),
+                                        AndOp);
+          DCI.AddToWorklist(Replace.getNode());
+
+          return Replace;
         }
       }
     }
   }
+
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
+                                                 DAGCombinerInfo &DCI) const {
+  switch(N->getOpcode()) {
+  default: break;
+  case ISD::SIGN_EXTEND:        return combineSIGN_EXTEND(N, DCI);
+  case SystemZISD::MERGE_HIGH:
+  case SystemZISD::MERGE_LOW:   return combineMERGE(N, DCI);
+  case ISD::STORE:              return combineSTORE(N, DCI);
+  case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
+  case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
+  case ISD::FP_ROUND:           return combineFP_ROUND(N, DCI);
+  case ISD::BSWAP:              return combineBSWAP(N, DCI);
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:               return combineSHIFTROT(N, DCI);
+  }
+
   return SDValue();
 }
 
@@ -4831,7 +5141,7 @@ static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
 
 // Split MBB after MI and return the new block (the one that contains
 // instructions after MI).
-static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
+static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
                                           MachineBasicBlock *MBB) {
   MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
   NewMBB->splice(NewMBB->begin(), MBB,
@@ -4841,7 +5151,7 @@ static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
 }
 
 // Split MBB before MI and return the new block (the one that contains MI).
-static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
+static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
                                            MachineBasicBlock *MBB) {
   MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
   NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
@@ -4850,34 +5160,36 @@ static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
 }
 
 // Force base value Base into a register before MI.  Return the register.
-static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
+static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
                          const SystemZInstrInfo *TII) {
   if (Base.isReg())
     return Base.getReg();
 
-  MachineBasicBlock *MBB = MI->getParent();
+  MachineBasicBlock *MBB = MI.getParent();
   MachineFunction &MF = *MBB->getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
-  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg)
-    .addOperand(Base).addImm(0).addReg(0);
+  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
+      .addOperand(Base)
+      .addImm(0)
+      .addReg(0);
   return Reg;
 }
 
 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
 MachineBasicBlock *
-SystemZTargetLowering::emitSelect(MachineInstr *MI,
+SystemZTargetLowering::emitSelect(MachineInstr &MI,
                                   MachineBasicBlock *MBB) const {
   const SystemZInstrInfo *TII =
       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
 
-  unsigned DestReg  = MI->getOperand(0).getReg();
-  unsigned TrueReg  = MI->getOperand(1).getReg();
-  unsigned FalseReg = MI->getOperand(2).getReg();
-  unsigned CCValid  = MI->getOperand(3).getImm();
-  unsigned CCMask   = MI->getOperand(4).getImm();
-  DebugLoc DL       = MI->getDebugLoc();
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned TrueReg = MI.getOperand(1).getReg();
+  unsigned FalseReg = MI.getOperand(2).getReg();
+  unsigned CCValid = MI.getOperand(3).getImm();
+  unsigned CCMask = MI.getOperand(4).getImm();
+  DebugLoc DL = MI.getDebugLoc();
 
   MachineBasicBlock *StartMBB = MBB;
   MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB);
@@ -4905,7 +5217,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
     .addReg(TrueReg).addMBB(StartMBB)
     .addReg(FalseReg).addMBB(FalseMBB);
 
-  MI->eraseFromParent();
+  MI.eraseFromParent();
   return JoinMBB;
 }
 
@@ -4913,21 +5225,21 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
 // StoreOpcode is the store to use and Invert says whether the store should
 // happen when the condition is false rather than true.  If a STORE ON
 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
-MachineBasicBlock *
-SystemZTargetLowering::emitCondStore(MachineInstr *MI,
-                                     MachineBasicBlock *MBB,
-                                     unsigned StoreOpcode, unsigned STOCOpcode,
-                                     bool Invert) const {
+MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
+                                                        MachineBasicBlock *MBB,
+                                                        unsigned StoreOpcode,
+                                                        unsigned STOCOpcode,
+                                                        bool Invert) const {
   const SystemZInstrInfo *TII =
       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
 
-  unsigned SrcReg     = MI->getOperand(0).getReg();
-  MachineOperand Base = MI->getOperand(1);
-  int64_t Disp        = MI->getOperand(2).getImm();
-  unsigned IndexReg   = MI->getOperand(3).getReg();
-  unsigned CCValid    = MI->getOperand(4).getImm();
-  unsigned CCMask     = MI->getOperand(5).getImm();
-  DebugLoc DL         = MI->getDebugLoc();
+  unsigned SrcReg = MI.getOperand(0).getReg();
+  MachineOperand Base = MI.getOperand(1);
+  int64_t Disp = MI.getOperand(2).getImm();
+  unsigned IndexReg = MI.getOperand(3).getReg();
+  unsigned CCValid = MI.getOperand(4).getImm();
+  unsigned CCMask = MI.getOperand(5).getImm();
+  DebugLoc DL = MI.getDebugLoc();
 
   StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
 
@@ -4940,7 +5252,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
     BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
       .addReg(SrcReg).addOperand(Base).addImm(Disp)
       .addImm(CCValid).addImm(CCMask);
-    MI->eraseFromParent();
+    MI.eraseFromParent();
     return MBB;
   }
 
@@ -4969,7 +5281,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
     .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
   MBB->addSuccessor(JoinMBB);
 
-  MI->eraseFromParent();
+  MI.eraseFromParent();
   return JoinMBB;
 }
 
@@ -4980,12 +5292,9 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
 // is one of the operands.  Invert says whether the field should be
 // inverted after performing BinOpcode (e.g. for NAND).
-MachineBasicBlock *
-SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
-                                            MachineBasicBlock *MBB,
-                                            unsigned BinOpcode,
-                                            unsigned BitSize,
-                                            bool Invert) const {
+MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
+    MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
+    unsigned BitSize, bool Invert) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
@@ -4994,15 +5303,15 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
 
   // Extract the operands.  Base can be a register or a frame index.
   // Src2 can be a register or immediate.
-  unsigned Dest        = MI->getOperand(0).getReg();
-  MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
-  int64_t Disp         = MI->getOperand(2).getImm();
-  MachineOperand Src2  = earlyUseOperand(MI->getOperand(3));
-  unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
-  unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
-  DebugLoc DL          = MI->getDebugLoc();
+  unsigned Dest = MI.getOperand(0).getReg();
+  MachineOperand Base = earlyUseOperand(MI.getOperand(1));
+  int64_t Disp = MI.getOperand(2).getImm();
+  MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
+  unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
+  unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
+  DebugLoc DL = MI.getDebugLoc();
   if (IsSubWord)
-    BitSize = MI->getOperand(6).getImm();
+    BitSize = MI.getOperand(6).getImm();
 
   // Subword operations use 32-bit registers.
   const TargetRegisterClass *RC = (BitSize <= 32 ?
@@ -5090,7 +5399,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
   MBB->addSuccessor(LoopMBB);
   MBB->addSuccessor(DoneMBB);
 
-  MI->eraseFromParent();
+  MI.eraseFromParent();
   return DoneMBB;
 }
 
@@ -5100,12 +5409,9 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
 // minimum or maximum value.  KeepOldMask is the BRC condition-code mask
 // for when the current field should be kept.  BitSize is the width of
 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
-MachineBasicBlock *
-SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
-                                            MachineBasicBlock *MBB,
-                                            unsigned CompareOpcode,
-                                            unsigned KeepOldMask,
-                                            unsigned BitSize) const {
+MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
+    MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
+    unsigned KeepOldMask, unsigned BitSize) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
@@ -5113,15 +5419,15 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
   bool IsSubWord = (BitSize < 32);
 
   // Extract the operands.  Base can be a register or a frame index.
-  unsigned Dest        = MI->getOperand(0).getReg();
-  MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
-  int64_t  Disp        = MI->getOperand(2).getImm();
-  unsigned Src2        = MI->getOperand(3).getReg();
-  unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
-  unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
-  DebugLoc DL          = MI->getDebugLoc();
+  unsigned Dest = MI.getOperand(0).getReg();
+  MachineOperand Base = earlyUseOperand(MI.getOperand(1));
+  int64_t Disp = MI.getOperand(2).getImm();
+  unsigned Src2 = MI.getOperand(3).getReg();
+  unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
+  unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
+  DebugLoc DL = MI.getDebugLoc();
   if (IsSubWord)
-    BitSize = MI->getOperand(6).getImm();
+    BitSize = MI.getOperand(6).getImm();
 
   // Subword operations use 32-bit registers.
   const TargetRegisterClass *RC = (BitSize <= 32 ?
@@ -5209,30 +5515,31 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
   MBB->addSuccessor(LoopMBB);
   MBB->addSuccessor(DoneMBB);
 
-  MI->eraseFromParent();
+  MI.eraseFromParent();
   return DoneMBB;
 }
 
 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
 // instruction MI.
 MachineBasicBlock *
-SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
+SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
                                           MachineBasicBlock *MBB) const {
+
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   // Extract the operands.  Base can be a register or a frame index.
-  unsigned Dest        = MI->getOperand(0).getReg();
-  MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
-  int64_t  Disp        = MI->getOperand(2).getImm();
-  unsigned OrigCmpVal  = MI->getOperand(3).getReg();
-  unsigned OrigSwapVal = MI->getOperand(4).getReg();
-  unsigned BitShift    = MI->getOperand(5).getReg();
-  unsigned NegBitShift = MI->getOperand(6).getReg();
-  int64_t  BitSize     = MI->getOperand(7).getImm();
-  DebugLoc DL          = MI->getDebugLoc();
+  unsigned Dest = MI.getOperand(0).getReg();
+  MachineOperand Base = earlyUseOperand(MI.getOperand(1));
+  int64_t Disp = MI.getOperand(2).getImm();
+  unsigned OrigCmpVal = MI.getOperand(3).getReg();
+  unsigned OrigSwapVal = MI.getOperand(4).getReg();
+  unsigned BitShift = MI.getOperand(5).getReg();
+  unsigned NegBitShift = MI.getOperand(6).getReg();
+  int64_t BitSize = MI.getOperand(7).getImm();
+  DebugLoc DL = MI.getDebugLoc();
 
   const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
 
@@ -5323,7 +5630,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
   MBB->addSuccessor(LoopMBB);
   MBB->addSuccessor(DoneMBB);
 
-  MI->eraseFromParent();
+  MI.eraseFromParent();
   return DoneMBB;
 }
 
@@ -5331,18 +5638,18 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
 // if the high register of the GR128 value must be cleared or false if
 // it's "don't care".  SubReg is subreg_l32 when extending a GR32
 // and subreg_l64 when extending a GR64.
-MachineBasicBlock *
-SystemZTargetLowering::emitExt128(MachineInstr *MI,
-                                  MachineBasicBlock *MBB,
-                                  bool ClearEven, unsigned SubReg) const {
+MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
+                                                     MachineBasicBlock *MBB,
+                                                     bool ClearEven,
+                                                     unsigned SubReg) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  DebugLoc DL = MI->getDebugLoc();
+  DebugLoc DL = MI.getDebugLoc();
 
-  unsigned Dest  = MI->getOperand(0).getReg();
-  unsigned Src   = MI->getOperand(1).getReg();
+  unsigned Dest = MI.getOperand(0).getReg();
+  unsigned Src = MI.getOperand(1).getReg();
   unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
 
   BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
@@ -5359,25 +5666,23 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
   BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
     .addReg(In128).addReg(Src).addImm(SubReg);
 
-  MI->eraseFromParent();
+  MI.eraseFromParent();
   return MBB;
 }
 
-MachineBasicBlock *
-SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
-                                         MachineBasicBlock *MBB,
-                                         unsigned Opcode) const {
+MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
+    MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  DebugLoc DL = MI->getDebugLoc();
+  DebugLoc DL = MI.getDebugLoc();
 
-  MachineOperand DestBase = earlyUseOperand(MI->getOperand(0));
-  uint64_t       DestDisp = MI->getOperand(1).getImm();
-  MachineOperand SrcBase  = earlyUseOperand(MI->getOperand(2));
-  uint64_t       SrcDisp  = MI->getOperand(3).getImm();
-  uint64_t       Length   = MI->getOperand(4).getImm();
+  MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
+  uint64_t DestDisp = MI.getOperand(1).getImm();
+  MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
+  uint64_t SrcDisp = MI.getOperand(3).getImm();
+  uint64_t Length = MI.getOperand(4).getImm();
 
   // When generating more than one CLC, all but the last will need to
   // branch to the end when a difference is found.
@@ -5385,10 +5690,10 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
                                splitBlockAfter(MI, MBB) : nullptr);
 
   // Check for the loop form, in which operand 5 is the trip count.
-  if (MI->getNumExplicitOperands() > 5) {
+  if (MI.getNumExplicitOperands() > 5) {
     bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
 
-    uint64_t StartCountReg = MI->getOperand(5).getReg();
+    uint64_t StartCountReg = MI.getOperand(5).getReg();
     uint64_t StartSrcReg   = forceReg(MI, SrcBase, TII);
     uint64_t StartDestReg  = (HaveSingleBase ? StartSrcReg :
                               forceReg(MI, DestBase, TII));
@@ -5491,15 +5796,19 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
     // Apply them using LAY if so.
     if (!isUInt<12>(DestDisp)) {
       unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
-      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
-        .addOperand(DestBase).addImm(DestDisp).addReg(0);
+      BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+          .addOperand(DestBase)
+          .addImm(DestDisp)
+          .addReg(0);
       DestBase = MachineOperand::CreateReg(Reg, false);
       DestDisp = 0;
     }
     if (!isUInt<12>(SrcDisp)) {
       unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
-      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
-        .addOperand(SrcBase).addImm(SrcDisp).addReg(0);
+      BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+          .addOperand(SrcBase)
+          .addImm(SrcDisp)
+          .addReg(0);
       SrcBase = MachineOperand::CreateReg(Reg, false);
       SrcDisp = 0;
     }
@@ -5527,26 +5836,24 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
     MBB->addLiveIn(SystemZ::CC);
   }
 
-  MI->eraseFromParent();
+  MI.eraseFromParent();
   return MBB;
 }
 
 // Decompose string pseudo-instruction MI into a loop that continually performs
 // Opcode until CC != 3.
-MachineBasicBlock *
-SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
-                                         MachineBasicBlock *MBB,
-                                         unsigned Opcode) const {
+MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
+    MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  DebugLoc DL = MI->getDebugLoc();
+  DebugLoc DL = MI.getDebugLoc();
 
-  uint64_t End1Reg   = MI->getOperand(0).getReg();
-  uint64_t Start1Reg = MI->getOperand(1).getReg();
-  uint64_t Start2Reg = MI->getOperand(2).getReg();
-  uint64_t CharReg   = MI->getOperand(3).getReg();
+  uint64_t End1Reg = MI.getOperand(0).getReg();
+  uint64_t Start1Reg = MI.getOperand(1).getReg();
+  uint64_t Start2Reg = MI.getOperand(2).getReg();
+  uint64_t CharReg = MI.getOperand(3).getReg();
 
   const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
   uint64_t This1Reg = MRI.createVirtualRegister(RC);
@@ -5589,26 +5896,24 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
 
   DoneMBB->addLiveIn(SystemZ::CC);
 
-  MI->eraseFromParent();
+  MI.eraseFromParent();
   return DoneMBB;
 }
 
 // Update TBEGIN instruction with final opcode and register clobbers.
-MachineBasicBlock *
-SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
-                                            MachineBasicBlock *MBB,
-                                            unsigned Opcode,
-                                            bool NoFloat) const {
+MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
+    MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
+    bool NoFloat) const {
   MachineFunction &MF = *MBB->getParent();
   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
   const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
 
   // Update opcode.
-  MI->setDesc(TII->get(Opcode));
+  MI.setDesc(TII->get(Opcode));
 
   // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
   // Make sure to add the corresponding GRSM bits if they are missing.
-  uint64_t Control = MI->getOperand(2).getImm();
+  uint64_t Control = MI.getOperand(2).getImm();
   static const unsigned GPRControlBit[16] = {
     0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
     0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
@@ -5616,13 +5921,13 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
   Control |= GPRControlBit[15];
   if (TFI->hasFP(MF))
     Control |= GPRControlBit[11];
-  MI->getOperand(2).setImm(Control);
+  MI.getOperand(2).setImm(Control);
 
   // Add GPR clobbers.
   for (int I = 0; I < 16; I++) {
     if ((Control & GPRControlBit[I]) == 0) {
       unsigned Reg = SystemZMC::GR64Regs[I];
-      MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+      MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
     }
   }
 
@@ -5631,12 +5936,12 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
     if (Subtarget.hasVector()) {
       for (int I = 0; I < 32; I++) {
         unsigned Reg = SystemZMC::VR128Regs[I];
-        MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+        MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
       }
     } else {
       for (int I = 0; I < 16; I++) {
         unsigned Reg = SystemZMC::FP64Regs[I];
-        MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+        MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
       }
     }
   }
@@ -5644,17 +5949,15 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
   return MBB;
 }
 
-MachineBasicBlock *
-SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI,
-                                          MachineBasicBlock *MBB,
-                                          unsigned Opcode) const {
+MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
+    MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
   MachineFunction &MF = *MBB->getParent();
   MachineRegisterInfo *MRI = &MF.getRegInfo();
   const SystemZInstrInfo *TII =
       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
-  DebugLoc DL = MI->getDebugLoc();
+  DebugLoc DL = MI.getDebugLoc();
 
-  unsigned SrcReg = MI->getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(0).getReg();
 
   // Create new virtual register of the same class as source.
   const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
@@ -5664,14 +5967,14 @@ SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI,
   // well.
   BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
     .addReg(SrcReg);
-  MI->eraseFromParent();
+  MI.eraseFromParent();
 
   return MBB;
 }
 
-MachineBasicBlock *SystemZTargetLowering::
-EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
-  switch (MI->getOpcode()) {
+MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
+    MachineInstr &MI, MachineBasicBlock *MBB) const {
+  switch (MI.getOpcode()) {
   case SystemZ::Select32Mux:
   case SystemZ::Select32:
   case SystemZ::SelectF32:
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 391636e5467f..b1de8936beed 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -146,6 +146,9 @@ enum NodeType : unsigned {
   // Perform a serialization operation.  (BCR 15,0 or BCR 14,0.)
   SERIALIZE,
 
+  // Compiler barrier only; generate a no-op.
+  MEMBARRIER,
+
   // Transaction begin.  The first operand is the chain, the second
   // the TDB pointer, and the third the immediate control field.
   // Returns chain and glue.
@@ -275,6 +278,12 @@ enum NodeType : unsigned {
   VSTRC_CC,
   VSTRCZ_CC,
 
+  // Test Data Class.
+  //
+  // Operand 0: the value to test
+  // Operand 1: the bit mask
+  TDC,
+
   // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
   // ATOMIC_LOAD_<op>.
   //
@@ -308,6 +317,19 @@ enum NodeType : unsigned {
   // Operand 5: the width of the field in bits (8 or 16)
   ATOMIC_CMP_SWAPW,
 
+  // Byte swapping load.
+  //
+  // Operand 0: the address to load from
+  // Operand 1: the type of load (i16, i32, i64)
+  LRV,
+
+  // Byte swapping store.
+  //
+  // Operand 0: the value to store
+  // Operand 1: the address to store to
+  // Operand 2: the type of store (i16, i32, i64)
+  STRV,
+
   // Prefetch from the second operand using the 4-bit control code in
   // the first operand.  The code is 1 for a load prefetch and 2 for
   // a store prefetch.
@@ -423,16 +445,23 @@ public:
     return SystemZ::R7D;
   }
 
-  MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB) const
-    override;
+  /// Override to support customized stack guard loading.
+  bool useLoadStackGuardNode() const override {
+    return true;
+  }
+  void insertSSPDeclarations(Module &M) const override {
+  }
+
+  MachineBasicBlock *
+  EmitInstrWithCustomInserter(MachineInstr &MI,
+                              MachineBasicBlock *BB) const override;
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
   bool allowTruncateForTailCall(Type *, Type *) const override;
   bool mayBeEmittedAsTailCall(CallInst *CI) const override;
   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
                                bool isVarArg,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
-                               SDLoc DL, SelectionDAG &DAG,
+                               const SDLoc &DL, SelectionDAG &DAG,
                                SmallVectorImpl<SDValue> &InVals) const override;
   SDValue LowerCall(CallLoweringInfo &CLI,
                     SmallVectorImpl<SDValue> &InVals) const override;
@@ -443,12 +472,20 @@ public:
                       LLVMContext &Context) const override;
   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
                       const SmallVectorImpl<ISD::OutputArg> &Outs,
-                      const SmallVectorImpl<SDValue> &OutVals,
-                      SDLoc DL, SelectionDAG &DAG) const override;
-  SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL,
+                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
+                      SelectionDAG &DAG) const override;
+  SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL,
                                       SelectionDAG &DAG) const override;
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+  ISD::NodeType getExtendForAtomicOps() const override {
+    return ISD::ANY_EXTEND;
+  }
+
+  bool supportSwiftError() const override {
+    return true;
+  }
+
 private:
   const SystemZSubtarget &Subtarget;
 
@@ -461,15 +498,19 @@ private:
   SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node,
                             SelectionDAG &DAG, unsigned Opcode,
                             SDValue GOTOffset) const;
+  SDValue lowerThreadPointer(const SDLoc &DL, SelectionDAG &DAG) const;
   SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
                                 SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(BlockAddressSDNode *Node,
                             SelectionDAG &DAG) const;
   SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const;
   SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
+  SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
@@ -477,6 +518,7 @@ private:
   SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
@@ -498,11 +540,19 @@ private:
                                  unsigned UnpackHigh) const;
   SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
 
-  SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
+  SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
                          unsigned Index, DAGCombinerInfo &DCI,
                          bool Force) const;
-  SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
+  SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op,
                                  DAGCombinerInfo &DCI) const;
+  SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
 
   // If the last instruction before MBBI in MBB was some form of COMPARE,
   // try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -514,40 +564,33 @@ private:
                                   MachineBasicBlock *Target) const;
 
   // Implement EmitInstrWithCustomInserter for individual operation types.
-  MachineBasicBlock *emitSelect(MachineInstr *MI,
-                                MachineBasicBlock *BB) const;
-  MachineBasicBlock *emitCondStore(MachineInstr *MI,
-                                   MachineBasicBlock *BB,
+  MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const;
+  MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
                                    unsigned StoreOpcode, unsigned STOCOpcode,
                                    bool Invert) const;
-  MachineBasicBlock *emitExt128(MachineInstr *MI,
-                                MachineBasicBlock *MBB,
+  MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB,
                                 bool ClearEven, unsigned SubReg) const;
-  MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI,
+  MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI,
                                           MachineBasicBlock *BB,
                                           unsigned BinOpcode, unsigned BitSize,
                                           bool Invert = false) const;
-  MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI,
+  MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr &MI,
                                           MachineBasicBlock *MBB,
                                           unsigned CompareOpcode,
                                           unsigned KeepOldMask,
                                           unsigned BitSize) const;
-  MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
+  MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr &MI,
                                         MachineBasicBlock *BB) const;
-  MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI,
-                                       MachineBasicBlock *BB,
+  MachineBasicBlock *emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *BB,
                                        unsigned Opcode) const;
-  MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
-                                       MachineBasicBlock *BB,
+  MachineBasicBlock *emitStringWrapper(MachineInstr &MI, MachineBasicBlock *BB,
                                        unsigned Opcode) const;
-  MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
+  MachineBasicBlock *emitTransactionBegin(MachineInstr &MI,
                                           MachineBasicBlock *MBB,
-                                          unsigned Opcode,
-                                          bool NoFloat) const;
-  MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr *MI,
+                                          unsigned Opcode, bool NoFloat) const;
+  MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
                                          MachineBasicBlock *MBB,
                                          unsigned Opcode) const;
-
 };
 } // end namespace llvm
 
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index 5a1c874dfa36..2cb8aba1b322 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -29,7 +29,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) {
   MachineFunction &MF = *MI->getParent()->getParent();
   MachineFrameInfo *MFFrame = MF.getFrameInfo();
   const MCInstrDesc &MCID = MI->getDesc();
-  unsigned Flags = 0;
+  auto Flags = MachineMemOperand::MONone;
   if (MCID.mayLoad())
     Flags |= MachineMemOperand::MOLoad;
   if (MCID.mayStore())
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 0cb267290cc1..8b32047e08e3 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -37,6 +37,10 @@ let hasSideEffects = 0 in {
   def LER : UnaryRR <"le", 0x38,   null_frag, FP32,  FP32>;
   def LDR : UnaryRR <"ld", 0x28,   null_frag, FP64,  FP64>;
   def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>;
+
+  // For z13 we prefer LDR over LER to avoid partial register dependencies.
+  let isCodeGenOnly = 1 in
+    def LDR32 : UnaryRR<"ld", 0x28, null_frag, FP32, FP32>;
 }
 
 // Moves between two floating-point registers that also set the condition
@@ -443,6 +447,13 @@ let Defs = [CC], CCValues = 0xF in {
   def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>;
 }
 
+// Test Data Class.
+let Defs = [CC], CCValues = 0xC in {
+  def TCEB : TestRXE<"tceb", 0xED10, z_tdc, FP32>;
+  def TCDB : TestRXE<"tcdb", 0xED11, z_tdc, FP64>;
+  def TCXB : TestRXE<"tcxb", 0xED12, z_tdc, FP128>;
+}
+
 //===----------------------------------------------------------------------===//
 // Peepholes
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 01f4cdec05cb..973894d5c001 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -158,6 +158,17 @@ def getThreeOperandOpcode : InstrMapping {
 //
 //===----------------------------------------------------------------------===//
 
+class InstI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<2, outs, ins, asmstr, pattern> {
+  field bits<16> Inst;
+  field bits<16> SoftFail = 0;
+
+  bits<8> I1;
+
+  let Inst{15-8} = op;
+  let Inst{7-0}  = I1;
+}
+
 class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
@@ -172,6 +183,24 @@ class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{15-0}  = I2;
 }
 
+class InstRIEa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> R1;
+  bits<16> I2;
+  bits<4> M3;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = 0;
+  let Inst{31-16} = I2;
+  let Inst{15-12} = M3;
+  let Inst{11-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
 class InstRIEb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
@@ -260,6 +289,24 @@ class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{31-0}  = I2;
 }
 
+class InstRIS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> R1;
+  bits<8> I2;
+  bits<4> M3;
+  bits<16> BD4;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = M3;
+  let Inst{31-16} = BD4;
+  let Inst{15-8}  = I2;
+  let Inst{7-0}   = op{7-0};
+}
+
 class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<2, outs, ins, asmstr, pattern> {
   field bits<16> Inst;
@@ -320,6 +367,41 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{3-0}   = R2;
 }
 
+class InstRRFc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<4> R1;
+  bits<4> R2;
+  bits<4> M3;
+
+  let Inst{31-16} = op;
+  let Inst{15-12} = M3;
+  let Inst{11-8}  = 0;
+  let Inst{7-4}   = R1;
+  let Inst{3-0}   = R2;
+}
+
+class InstRRS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> R1;
+  bits<4> R2;
+  bits<4> M3;
+  bits<16> BD4;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = R2;
+  let Inst{31-16} = BD4;
+  let Inst{15-12} = M3;
+  let Inst{11-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
 class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
@@ -919,6 +1001,10 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 //   Compare:
 //     Two input operands and an implicit CC output operand.
 //
+//   Test:
+//     Two input operands and an implicit CC output operand.  The second
+//     input operand is an "address" operand used as a test class mask.
+//
 //   Ternary:
 //     One register output operand and three input operands.
 //
@@ -974,12 +1060,30 @@ class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>
   let DisableEncoding = "$R1src";
 }
 
-class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
-  : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins bdaddr20only:$BD2),
+class LoadMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+                     AddressingMode mode = bdaddr12only>
+  : InstRS<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
+           mnemonic#"\t$R1, $R3, $BD2", []> {
+  let mayLoad = 1;
+}
+
+class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                      AddressingMode mode = bdaddr20only>
+  : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
             mnemonic#"\t$R1, $R3, $BD2", []> {
   let mayLoad = 1;
 }
 
+multiclass LoadMultipleRSPair<string mnemonic, bits<8> rsOpcode,
+                              bits<16> rsyOpcode, RegisterOperand cls> {
+  let DispKey = mnemonic ## #cls in {
+    let DispSize = "12" in
+      def "" : LoadMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;
+    let DispSize = "20" in
+      def Y  : LoadMultipleRSY<mnemonic#"y", rsyOpcode, cls, bdaddr20pair>;
+  }
+}
+
 class LoadMultipleVRSa<string mnemonic, bits<16> opcode>
   : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2),
              mnemonic#"\t$V1, $V3, $BD2", []> {
@@ -1055,12 +1159,30 @@ class StoreLengthVRSb<string mnemonic, bits<16> opcode,
   let AccessBytes = bytes;
 }
 
-class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
-  : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2),
+class StoreMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+                      AddressingMode mode = bdaddr12only>
+  : InstRS<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
+           mnemonic#"\t$R1, $R3, $BD2", []> {
+  let mayStore = 1;
+}
+
+class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                       AddressingMode mode = bdaddr20only>
+  : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
             mnemonic#"\t$R1, $R3, $BD2", []> {
   let mayStore = 1;
 }
 
+multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode,
+                               bits<16> rsyOpcode, RegisterOperand cls> {
+  let DispKey = mnemonic ## #cls in {
+    let DispSize = "12" in
+      def "" : StoreMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;
+    let DispSize = "20" in
+      def Y  : StoreMultipleRSY<mnemonic#"y", rsyOpcode, cls, bdaddr20pair>;
+  }
+}
+
 class StoreMultipleVRSa<string mnemonic, bits<16> opcode>
   : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2),
              mnemonic#"\t$V1, $V3, $BD2", []> {
@@ -1186,6 +1308,15 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
   let R4 = 0;
 }
 
+class CondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                   Immediate imm>
+  : InstRIEd<opcode, (outs cls:$R1),
+                     (ins imm:$I2, cond4:$valid, cond4:$R3),
+             mnemonic#"$R3\t$R1, $I2", []>,
+    Requires<[FeatureLoadStoreOnCond2]> {
+  let CCMaskLast = 1;
+}
+
 // Like CondUnaryRRF, but used for the raw assembly form.  The condition-code
 // mask is the third operand rather than being part of the mnemonic.
 class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
@@ -1198,6 +1329,16 @@ class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
   let R4 = 0;
 }
 
+class AsmCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                   Immediate imm>
+  : InstRIEd<opcode, (outs cls:$R1),
+                     (ins cls:$R1src, imm:$I2, imm32zx4:$R3),
+             mnemonic#"\t$R1, $I2, $R3", []>,
+    Requires<[FeatureLoadStoreOnCond2]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
 // Like CondUnaryRRF, but with a fixed CC mask.
 class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
                         RegisterOperand cls2, bits<4> ccmask>
@@ -1210,6 +1351,17 @@ class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
   let R4 = 0;
 }
 
+class FixedCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                   Immediate imm, bits<4> ccmask>
+  : InstRIEd<opcode, (outs cls:$R1),
+                     (ins cls:$R1src, imm:$I2),
+             mnemonic#"\t$R1, $I2", []>,
+    Requires<[FeatureLoadStoreOnCond2]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+  let R3 = ccmask;
+}
+
 class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
               RegisterOperand cls, Immediate imm>
   : InstRI<opcode, (outs cls:$R1), (ins imm:$I2),
@@ -1391,9 +1543,9 @@ class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
 
 class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                 RegisterOperand cls1, RegisterOperand cls2>
-  : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R3, cls2:$R2),
+  : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3),
             mnemonic#"r\t$R1, $R3, $R2",
-            [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> {
+            [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> {
   let OpKey = mnemonic ## cls1;
   let OpType = "reg";
   let R4 = 0;
@@ -1874,6 +2026,14 @@ class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M5 = 0;
 }
 
+class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+              RegisterOperand cls>
+  : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(operator cls:$R1, bdxaddr12only:$XBD2)]> {
+  let M3 = 0;
+}
+
 class TernaryRRD<string mnemonic, bits<16> opcode,
                  SDPatternOperator operator, RegisterOperand cls>
   : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2),
@@ -1885,6 +2045,40 @@ class TernaryRRD<string mnemonic, bits<16> opcode,
   let DisableEncoding = "$R1src";
 }
 
+class TernaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+                bits<5> bytes, AddressingMode mode = bdaddr12only>
+  : InstRS<opcode, (outs cls:$R1),
+          (ins cls:$R1src, imm32zx4:$R3, mode:$BD2),
+           mnemonic#"\t$R1, $R3, $BD2", []> {
+
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+  let mayLoad = 1;
+  let AccessBytes = bytes;
+}
+
+class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                bits<5> bytes, AddressingMode mode = bdaddr20only>
+  : InstRSY<opcode, (outs cls:$R1),
+           (ins cls:$R1src, imm32zx4:$R3, mode:$BD2),
+            mnemonic#"\t$R1, $R3, $BD2", []> {
+
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+  let mayLoad = 1;
+  let AccessBytes = bytes;
+}
+
+multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
+                         RegisterOperand cls, bits<5> bytes> {
+  let DispKey = mnemonic ## #cls in {
+    let DispSize = "12" in
+      def "" : TernaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
+    let DispSize = "20" in
+      def Y  : TernaryRSY<mnemonic#"y", rsyOpcode, cls, bytes, bdaddr20pair>;
+  }
+}
+
 class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
   : InstRXF<opcode, (outs cls:$R1),
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index e6b5fc8e6235..4084e93e5acb 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -15,6 +15,7 @@
 #include "SystemZInstrBuilder.h"
 #include "SystemZTargetMachine.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 
 using namespace llvm;
@@ -54,7 +55,7 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
 
   // Get two load or store instructions.  Use the original instruction for one
   // of them (arbitrarily the second here) and create a clone for the other.
-  MachineInstr *EarlierMI = MF.CloneMachineInstr(MI);
+  MachineInstr *EarlierMI = MF.CloneMachineInstr(&*MI);
   MBB->insert(MI, EarlierMI);
 
   // Set up the two 64-bit registers.
@@ -69,8 +70,8 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
   MachineOperand &LowOffsetOp = MI->getOperand(2);
   LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
 
- // Clear the kill flags for the base and index registers in the first
- // instruction.
+  // Clear the kill flags for the base and index registers in the first
+  // instruction.
   EarlierMI->getOperand(1).setIsKill(false);
   EarlierMI->getOperand(3).setIsKill(false);
 
@@ -105,59 +106,89 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
 // and HighOpcode takes an unsigned 32-bit operand.  In those cases,
 // MI has the same kind of operand as LowOpcode, so needs to be converted
 // if HighOpcode is used.
-void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
+void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode,
                                       unsigned HighOpcode,
                                       bool ConvertHigh) const {
-  unsigned Reg = MI->getOperand(0).getReg();
+  unsigned Reg = MI.getOperand(0).getReg();
   bool IsHigh = isHighReg(Reg);
-  MI->setDesc(get(IsHigh ? HighOpcode : LowOpcode));
+  MI.setDesc(get(IsHigh ? HighOpcode : LowOpcode));
   if (IsHigh && ConvertHigh)
-    MI->getOperand(1).setImm(uint32_t(MI->getOperand(1).getImm()));
+    MI.getOperand(1).setImm(uint32_t(MI.getOperand(1).getImm()));
 }
 
 // MI is a three-operand RIE-style pseudo instruction.  Replace it with
 // LowOpcodeK if the registers are both low GR32s, otherwise use a move
 // followed by HighOpcode or LowOpcode, depending on whether the target
 // is a high or low GR32.
-void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
+void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
                                        unsigned LowOpcodeK,
                                        unsigned HighOpcode) const {
-  unsigned DestReg = MI->getOperand(0).getReg();
-  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
   bool DestIsHigh = isHighReg(DestReg);
   bool SrcIsHigh = isHighReg(SrcReg);
   if (!DestIsHigh && !SrcIsHigh)
-    MI->setDesc(get(LowOpcodeK));
+    MI.setDesc(get(LowOpcodeK));
   else {
-    emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(),
-                  DestReg, SrcReg, SystemZ::LR, 32,
-                  MI->getOperand(1).isKill());
-    MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
-    MI->getOperand(1).setReg(DestReg);
-    MI->tieOperands(0, 1);
+    emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg,
+                  SystemZ::LR, 32, MI.getOperand(1).isKill());
+    MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
+    MI.getOperand(1).setReg(DestReg);
+    MI.tieOperands(0, 1);
   }
 }
 
 // MI is an RXY-style pseudo instruction.  Replace it with LowOpcode
 // if the first operand is a low GR32 and HighOpcode if the first operand
 // is a high GR32.
-void SystemZInstrInfo::expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
+void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
                                        unsigned HighOpcode) const {
-  unsigned Reg = MI->getOperand(0).getReg();
+  unsigned Reg = MI.getOperand(0).getReg();
   unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode,
-                                       MI->getOperand(2).getImm());
-  MI->setDesc(get(Opcode));
+                                       MI.getOperand(2).getImm());
+  MI.setDesc(get(Opcode));
 }
 
 // MI is an RR-style pseudo instruction that zero-extends the low Size bits
 // of one GRX32 into another.  Replace it with LowOpcode if both operands
 // are low registers, otherwise use RISB[LH]G.
-void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
+void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
                                         unsigned Size) const {
-  emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(),
-                MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
-                LowOpcode, Size, MI->getOperand(1).isKill());
-  MI->eraseFromParent();
+  emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(),
+                MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode,
+                Size, MI.getOperand(1).isKill());
+  MI.eraseFromParent();
+}
+
+void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineFunction &MF = *MBB->getParent();
+  const unsigned Reg = MI->getOperand(0).getReg();
+
+  // Conveniently, all 4 instructions are cloned from LOAD_STACK_GUARD,
+  // so they already have operand 0 set to reg.
+
+  // ear <reg>, %a0
+  MachineInstr *Ear1MI = MF.CloneMachineInstr(MI);
+  MBB->insert(MI, Ear1MI);
+  Ear1MI->setDesc(get(SystemZ::EAR));
+  MachineInstrBuilder(MF, Ear1MI).addImm(0);
+
+  // sllg <reg>, <reg>, 32
+  MachineInstr *SllgMI = MF.CloneMachineInstr(MI);
+  MBB->insert(MI, SllgMI);
+  SllgMI->setDesc(get(SystemZ::SLLG));
+  MachineInstrBuilder(MF, SllgMI).addReg(Reg).addReg(0).addImm(32);
+
+  // ear <reg>, %a1
+  MachineInstr *Ear2MI = MF.CloneMachineInstr(MI);
+  MBB->insert(MI, Ear2MI);
+  Ear2MI->setDesc(get(SystemZ::EAR));
+  MachineInstrBuilder(MF, Ear2MI).addImm(1);
+
+  // lg <reg>, 40(<reg>)
+  MI->setDesc(get(SystemZ::LG));
+  MachineInstrBuilder(MF, MI).addReg(Reg).addImm(40).addReg(0);
 }
 
 // Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR
@@ -167,7 +198,7 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
 // KillSrc is true if this move is the last use of SrcReg.
 void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI,
-                                     DebugLoc DL, unsigned DestReg,
+                                     const DebugLoc &DL, unsigned DestReg,
                                      unsigned SrcReg, unsigned LowLowOpcode,
                                      unsigned Size, bool KillSrc) const {
   unsigned Opcode;
@@ -196,45 +227,41 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
 // Return 0 otherwise.
 //
 // Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
-static int isSimpleMove(const MachineInstr *MI, int &FrameIndex,
+static int isSimpleMove(const MachineInstr &MI, int &FrameIndex,
                         unsigned Flag) {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if ((MCID.TSFlags & Flag) &&
-      MI->getOperand(1).isFI() &&
-      MI->getOperand(2).getImm() == 0 &&
-      MI->getOperand(3).getReg() == 0) {
-    FrameIndex = MI->getOperand(1).getIndex();
-    return MI->getOperand(0).getReg();
+  const MCInstrDesc &MCID = MI.getDesc();
+  if ((MCID.TSFlags & Flag) && MI.getOperand(1).isFI() &&
+      MI.getOperand(2).getImm() == 0 && MI.getOperand(3).getReg() == 0) {
+    FrameIndex = MI.getOperand(1).getIndex();
+    return MI.getOperand(0).getReg();
   }
   return 0;
 }
 
-unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
                                                int &FrameIndex) const {
   return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad);
 }
 
-unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
                                               int &FrameIndex) const {
   return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore);
 }
 
-bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI,
+bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr &MI,
                                        int &DestFrameIndex,
                                        int &SrcFrameIndex) const {
   // Check for MVC 0(Length,FI1),0(FI2)
-  const MachineFrameInfo *MFI = MI->getParent()->getParent()->getFrameInfo();
-  if (MI->getOpcode() != SystemZ::MVC ||
-      !MI->getOperand(0).isFI() ||
-      MI->getOperand(1).getImm() != 0 ||
-      !MI->getOperand(3).isFI() ||
-      MI->getOperand(4).getImm() != 0)
+  const MachineFrameInfo *MFI = MI.getParent()->getParent()->getFrameInfo();
+  if (MI.getOpcode() != SystemZ::MVC || !MI.getOperand(0).isFI() ||
+      MI.getOperand(1).getImm() != 0 || !MI.getOperand(3).isFI() ||
+      MI.getOperand(4).getImm() != 0)
     return false;
 
   // Check that Length covers the full slots.
-  int64_t Length = MI->getOperand(2).getImm();
-  unsigned FI1 = MI->getOperand(0).getIndex();
-  unsigned FI2 = MI->getOperand(3).getIndex();
+  int64_t Length = MI.getOperand(2).getImm();
+  unsigned FI1 = MI.getOperand(0).getIndex();
+  unsigned FI2 = MI.getOperand(3).getIndex();
   if (MFI->getObjectSize(FI1) != Length ||
       MFI->getObjectSize(FI2) != Length)
     return false;
@@ -244,7 +271,7 @@ bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI,
   return true;
 }
 
-bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
                                      MachineBasicBlock *&TBB,
                                      MachineBasicBlock *&FBB,
                                      SmallVectorImpl<MachineOperand> &Cond,
@@ -261,7 +288,7 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // Working from the bottom, when we see a non-terminator instruction, we're
     // done.
-    if (!isUnpredicatedTerminator(I))
+    if (!isUnpredicatedTerminator(*I))
       break;
 
     // A terminator that isn't a branch can't easily be handled by this
@@ -270,7 +297,7 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       return true;
 
     // Can't handle indirect branches.
-    SystemZII::Branch Branch(getBranchInfo(I));
+    SystemZII::Branch Branch(getBranchInfo(*I));
     if (!Branch.Target->isMBB())
       return true;
 
@@ -347,7 +374,7 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
       continue;
     if (!I->isBranch())
       break;
-    if (!getBranchInfo(I).Target->isMBB())
+    if (!getBranchInfo(*I).Target->isMBB())
       break;
     // Remove the branch.
     I->eraseFromParent();
@@ -365,11 +392,11 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   return false;
 }
 
-unsigned
-SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                               MachineBasicBlock *FBB,
-                               ArrayRef<MachineOperand> Cond,
-                               DebugLoc DL) const {
+unsigned SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+                                        MachineBasicBlock *TBB,
+                                        MachineBasicBlock *FBB,
+                                        ArrayRef<MachineOperand> Cond,
+                                        const DebugLoc &DL) const {
   // In this function we output 32-bit branches, which should always
   // have enough range.  They can be shortened and relaxed by later code
   // in the pipeline, if desired.
@@ -402,17 +429,16 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   return Count;
 }
 
-bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
-                                      unsigned &SrcReg, unsigned &SrcReg2,
-                                      int &Mask, int &Value) const {
-  assert(MI->isCompare() && "Caller should have checked for a comparison");
+bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+                                      unsigned &SrcReg2, int &Mask,
+                                      int &Value) const {
+  assert(MI.isCompare() && "Caller should have checked for a comparison");
 
-  if (MI->getNumExplicitOperands() == 2 &&
-      MI->getOperand(0).isReg() &&
-      MI->getOperand(1).isImm()) {
-    SrcReg = MI->getOperand(0).getReg();
+  if (MI.getNumExplicitOperands() == 2 && MI.getOperand(0).isReg() &&
+      MI.getOperand(1).isImm()) {
+    SrcReg = MI.getOperand(0).getReg();
     SrcReg2 = 0;
-    Value = MI->getOperand(1).getImm();
+    Value = MI.getOperand(1).getImm();
     Mask = ~0;
     return true;
   }
@@ -445,7 +471,7 @@ static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
 // the result of an IPM sequence whose input CC survives until Compare,
 // and whether Compare is therefore redundant.  Delete it and return
 // true if so.
-static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
+static bool removeIPMBasedCompare(MachineInstr &Compare, unsigned SrcReg,
                                   const MachineRegisterInfo *MRI,
                                   const TargetRegisterInfo *TRI) {
   MachineInstr *LGFR = nullptr;
@@ -466,16 +492,16 @@ static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
     return false;
 
   // Check that there are no assignments to CC between the IPM and Compare,
-  if (IPM->getParent() != Compare->getParent())
+  if (IPM->getParent() != Compare.getParent())
     return false;
-  MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare;
+  MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare.getIterator();
   for (++MBBI; MBBI != MBBE; ++MBBI) {
-    MachineInstr *MI = MBBI;
-    if (MI->modifiesRegister(SystemZ::CC, TRI))
+    MachineInstr &MI = *MBBI;
+    if (MI.modifiesRegister(SystemZ::CC, TRI))
       return false;
   }
 
-  Compare->eraseFromParent();
+  Compare.eraseFromParent();
   if (LGFR)
     eraseIfDead(LGFR, MRI);
   eraseIfDead(RLL, MRI);
@@ -485,13 +511,11 @@ static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
   return true;
 }
 
-bool
-SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
-                                       unsigned SrcReg, unsigned SrcReg2,
-                                       int Mask, int Value,
-                                       const MachineRegisterInfo *MRI) const {
+bool SystemZInstrInfo::optimizeCompareInstr(
+    MachineInstr &Compare, unsigned SrcReg, unsigned SrcReg2, int Mask,
+    int Value, const MachineRegisterInfo *MRI) const {
   assert(!SrcReg2 && "Only optimizing constant comparisons so far");
-  bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
+  bool IsLogical = (Compare.getDesc().TSFlags & SystemZII::IsLogical) != 0;
   return Value == 0 && !IsLogical &&
          removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
 }
@@ -506,15 +530,43 @@ static unsigned getConditionalMove(unsigned Opcode) {
   }
 }
 
-bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const {
-  unsigned Opcode = MI->getOpcode();
-  return STI.hasLoadStoreOnCond() && getConditionalMove(Opcode);
+static unsigned getConditionalLoadImmediate(unsigned Opcode) {
+  switch (Opcode) {
+  case SystemZ::LHI:  return SystemZ::LOCHI;
+  case SystemZ::LGHI: return SystemZ::LOCGHI;
+  default:           return 0;
+  }
+}
+
+bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const {
+  unsigned Opcode = MI.getOpcode();
+  if (STI.hasLoadStoreOnCond() && getConditionalMove(Opcode))
+    return true;
+  if (STI.hasLoadStoreOnCond2() && getConditionalLoadImmediate(Opcode))
+    return true;
+  if (Opcode == SystemZ::Return ||
+      Opcode == SystemZ::Trap ||
+      Opcode == SystemZ::CallJG ||
+      Opcode == SystemZ::CallBR)
+    return true;
+  return false;
 }
 
 bool SystemZInstrInfo::
 isProfitableToIfCvt(MachineBasicBlock &MBB,
                     unsigned NumCycles, unsigned ExtraPredCycles,
                     BranchProbability Probability) const {
+  // Avoid using conditional returns at the end of a loop (since then
+  // we'd need to emit an unconditional branch to the beginning anyway,
+  // making the loop body longer).  This doesn't apply for low-probability
+  // loops (eg. compare-and-swap retry), so just decide based on branch
+  // probability instead of looping structure.
+  // However, since Compare and Trap instructions cost the same as a regular
+  // Compare instruction, we should allow the if conversion to convert this
+  // into a Conditional Compare regardless of the branch probability.
+  if (MBB.getLastNonDebugInstr()->getOpcode() != SystemZ::Trap &&
+      MBB.succ_empty() && Probability < BranchProbability(1, 8))
+    return false;
   // For now only convert single instructions.
   return NumCycles == 1;
 }
@@ -530,27 +582,82 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
 }
 
 bool SystemZInstrInfo::
-PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
+isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+                          BranchProbability Probability) const {
+  // For now only duplicate single instructions.
+  return NumCycles == 1;
+}
+
+bool SystemZInstrInfo::PredicateInstruction(
+    MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
   assert(Pred.size() == 2 && "Invalid condition");
   unsigned CCValid = Pred[0].getImm();
   unsigned CCMask = Pred[1].getImm();
   assert(CCMask > 0 && CCMask < 15 && "Invalid predicate");
-  unsigned Opcode = MI->getOpcode();
+  unsigned Opcode = MI.getOpcode();
   if (STI.hasLoadStoreOnCond()) {
     if (unsigned CondOpcode = getConditionalMove(Opcode)) {
-      MI->setDesc(get(CondOpcode));
-      MachineInstrBuilder(*MI->getParent()->getParent(), MI)
-        .addImm(CCValid).addImm(CCMask)
-        .addReg(SystemZ::CC, RegState::Implicit);
+      MI.setDesc(get(CondOpcode));
+      MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+          .addImm(CCValid)
+          .addImm(CCMask)
+          .addReg(SystemZ::CC, RegState::Implicit);
       return true;
     }
   }
+  if (STI.hasLoadStoreOnCond2()) {
+    if (unsigned CondOpcode = getConditionalLoadImmediate(Opcode)) {
+      MI.setDesc(get(CondOpcode));
+      MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+          .addImm(CCValid)
+          .addImm(CCMask)
+          .addReg(SystemZ::CC, RegState::Implicit);
+      return true;
+    }
+  }
+  if (Opcode == SystemZ::Trap) {
+    MI.setDesc(get(SystemZ::CondTrap));
+    MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+      .addImm(CCValid).addImm(CCMask)
+      .addReg(SystemZ::CC, RegState::Implicit);
+    return true;
+  }
+  if (Opcode == SystemZ::Return) {
+    MI.setDesc(get(SystemZ::CondReturn));
+    MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+      .addImm(CCValid).addImm(CCMask)
+      .addReg(SystemZ::CC, RegState::Implicit);
+    return true;
+  }
+  if (Opcode == SystemZ::CallJG) {
+    MachineOperand FirstOp = MI.getOperand(0);
+    const uint32_t *RegMask = MI.getOperand(1).getRegMask();
+    MI.RemoveOperand(1);
+    MI.RemoveOperand(0);
+    MI.setDesc(get(SystemZ::CallBRCL));
+    MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+      .addImm(CCValid).addImm(CCMask)
+      .addOperand(FirstOp)
+      .addRegMask(RegMask)
+      .addReg(SystemZ::CC, RegState::Implicit);
+    return true;
+  }
+  if (Opcode == SystemZ::CallBR) {
+    const uint32_t *RegMask = MI.getOperand(0).getRegMask();
+    MI.RemoveOperand(0);
+    MI.setDesc(get(SystemZ::CallBCR));
+    MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+      .addImm(CCValid).addImm(CCMask)
+      .addRegMask(RegMask)
+      .addReg(SystemZ::CC, RegState::Implicit);
+    return true;
+  }
   return false;
 }
 
 void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
-                                   DebugLoc DL, unsigned DestReg,
+                                   const DebugLoc &DL, unsigned DestReg,
                                    unsigned SrcReg, bool KillSrc) const {
   // Split 128-bit GPR moves into two 64-bit moves.  This handles ADDR128 too.
   if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
@@ -571,7 +678,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
     Opcode = SystemZ::LGR;
   else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
-    Opcode = SystemZ::LER;
+    // For z13 we prefer LDR over LER to avoid partial register dependencies.
+    Opcode = STI.hasVector() ? SystemZ::LDR32 : SystemZ::LER;
   else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg))
     Opcode = SystemZ::LDR;
   else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
@@ -654,6 +762,14 @@ static LogicOp interpretAndImmediate(unsigned Opcode) {
   }
 }
 
+static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) {
+  if (OldMI->registerDefIsDead(SystemZ::CC)) {
+    MachineOperand *CCDef = NewMI->findRegisterDefOperand(SystemZ::CC);
+    if (CCDef != nullptr)
+      CCDef->setIsDead(true);
+  }
+}
+
 // Used to return from convertToThreeAddress after replacing two-address
 // instruction OldMI with three-address instruction NewMI.
 static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
@@ -664,31 +780,29 @@ static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
     for (unsigned I = 1; I < NumOps; ++I) {
       MachineOperand &Op = OldMI->getOperand(I);
       if (Op.isReg() && Op.isKill())
-        LV->replaceKillInstruction(Op.getReg(), OldMI, NewMI);
+        LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI);
     }
   }
+  transferDeadCC(OldMI, NewMI);
   return NewMI;
 }
 
-MachineInstr *
-SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
-                                        MachineBasicBlock::iterator &MBBI,
-                                        LiveVariables *LV) const {
-  MachineInstr *MI = MBBI;
-  MachineBasicBlock *MBB = MI->getParent();
+MachineInstr *SystemZInstrInfo::convertToThreeAddress(
+    MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
+  MachineBasicBlock *MBB = MI.getParent();
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
-  unsigned Opcode = MI->getOpcode();
-  unsigned NumOps = MI->getNumOperands();
+  unsigned Opcode = MI.getOpcode();
+  unsigned NumOps = MI.getNumOperands();
 
   // Try to convert something like SLL into SLLK, if supported.
   // We prefer to keep the two-operand form where possible both
   // because it tends to be shorter and because some instructions
   // have memory forms that can be used during spilling.
   if (STI.hasDistinctOps()) {
-    MachineOperand &Dest = MI->getOperand(0);
-    MachineOperand &Src = MI->getOperand(1);
+    MachineOperand &Dest = MI.getOperand(0);
+    MachineOperand &Src = MI.getOperand(1);
     unsigned DestReg = Dest.getReg();
     unsigned SrcReg = Src.getReg();
     // AHIMux is only really a three-operand instruction when both operands
@@ -707,23 +821,23 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       // Create three address instruction without adding the implicit
       // operands. Those will instead be copied over from the original
       // instruction by the loop below.
-      MachineInstrBuilder MIB(*MF,
-                              MF->CreateMachineInstr(get(ThreeOperandOpcode),
-                                    MI->getDebugLoc(), /*NoImplicit=*/true));
+      MachineInstrBuilder MIB(
+          *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
+                                      /*NoImplicit=*/true));
       MIB.addOperand(Dest);
       // Keep the kill state, but drop the tied flag.
       MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
       // Keep the remaining operands as-is.
       for (unsigned I = 2; I < NumOps; ++I)
-        MIB.addOperand(MI->getOperand(I));
+        MIB.addOperand(MI.getOperand(I));
       MBB->insert(MI, MIB);
-      return finishConvertToThreeAddress(MI, MIB, LV);
+      return finishConvertToThreeAddress(&MI, MIB, LV);
     }
   }
 
   // Try to convert an AND into an RISBG-type instruction.
   if (LogicOp And = interpretAndImmediate(Opcode)) {
-    uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB;
+    uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB;
     // AND IMMEDIATE leaves the other bits of the register unchanged.
     Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
     unsigned Start, End;
@@ -739,36 +853,55 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
         Start &= 31;
         End &= 31;
       }
-      MachineOperand &Dest = MI->getOperand(0);
-      MachineOperand &Src = MI->getOperand(1);
+      MachineOperand &Dest = MI.getOperand(0);
+      MachineOperand &Src = MI.getOperand(1);
       MachineInstrBuilder MIB =
-        BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode))
-        .addOperand(Dest).addReg(0)
-        .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg())
-        .addImm(Start).addImm(End + 128).addImm(0);
-      return finishConvertToThreeAddress(MI, MIB, LV);
+          BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpcode))
+              .addOperand(Dest)
+              .addReg(0)
+              .addReg(Src.getReg(), getKillRegState(Src.isKill()),
+                      Src.getSubReg())
+              .addImm(Start)
+              .addImm(End + 128)
+              .addImm(0);
+      return finishConvertToThreeAddress(&MI, MIB, LV);
     }
   }
   return nullptr;
 }
 
 MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
-    MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
-    MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
+    MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+    MachineBasicBlock::iterator InsertPt, int FrameIndex,
+    LiveIntervals *LIS) const {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   unsigned Size = MFI->getObjectSize(FrameIndex);
-  unsigned Opcode = MI->getOpcode();
+  unsigned Opcode = MI.getOpcode();
 
   if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
-    if ((Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
-        isInt<8>(MI->getOperand(2).getImm()) &&
-        !MI->getOperand(3).getReg()) {
-      // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
-      return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
-                     get(SystemZ::AGSI))
-          .addFrameIndex(FrameIndex)
-          .addImm(0)
-          .addImm(MI->getOperand(2).getImm());
+    if (LIS != nullptr && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
+        isInt<8>(MI.getOperand(2).getImm()) && !MI.getOperand(3).getReg()) {
+
+      // Check CC liveness, since new instruction introduces a dead
+      // def of CC.
+      MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
+      LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit);
+      ++CCUnit;
+      assert (!CCUnit.isValid() && "CC only has one reg unit.");
+      SlotIndex MISlot =
+          LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
+      if (!CCLiveRange.liveAt(MISlot)) {
+        // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
+        MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt,
+                                        MI.getDebugLoc(), get(SystemZ::AGSI))
+                                    .addFrameIndex(FrameIndex)
+                                    .addImm(0)
+                                    .addImm(MI.getOperand(2).getImm());
+        BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true);
+        CCLiveRange.createDeadDef(MISlot, LIS->getVNInfoAllocator());
+        return BuiltMI;
+      }
     }
     return nullptr;
   }
@@ -778,20 +911,23 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
     return nullptr;
 
   unsigned OpNum = Ops[0];
-  assert(Size == MF.getRegInfo()
-         .getRegClass(MI->getOperand(OpNum).getReg())->getSize() &&
+  assert(Size ==
+             MF.getRegInfo()
+                 .getRegClass(MI.getOperand(OpNum).getReg())
+                 ->getSize() &&
          "Invalid size combination");
 
-  if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) &&
-      OpNum == 0 &&
-      isInt<8>(MI->getOperand(2).getImm())) {
+  if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && OpNum == 0 &&
+      isInt<8>(MI.getOperand(2).getImm())) {
     // A(G)HI %reg, CONST -> A(G)SI %mem, CONST
     Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI);
-    return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
-                   get(Opcode))
-        .addFrameIndex(FrameIndex)
-        .addImm(0)
-        .addImm(MI->getOperand(2).getImm());
+    MachineInstr *BuiltMI =
+        BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(Opcode))
+            .addFrameIndex(FrameIndex)
+            .addImm(0)
+            .addImm(MI.getOperand(2).getImm());
+    transferDeadCC(&MI, BuiltMI);
+    return BuiltMI;
   }
 
   if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
@@ -801,9 +937,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
     // source register instead.
     if (OpNum == 0) {
       unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD;
-      return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+      return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
                      get(StoreOpcode))
-          .addOperand(MI->getOperand(1))
+          .addOperand(MI.getOperand(1))
           .addFrameIndex(FrameIndex)
           .addImm(0)
           .addReg(0);
@@ -812,8 +948,8 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
     // destination register instead.
     if (OpNum == 1) {
       unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD;
-      unsigned Dest = MI->getOperand(0).getReg();
-      return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+      unsigned Dest = MI.getOperand(0).getReg();
+      return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
                      get(LoadOpcode), Dest)
           .addFrameIndex(FrameIndex)
           .addImm(0)
@@ -834,26 +970,26 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
   // might be equal.  We don't worry about that case here, because spill slot
   // coloring happens later, and because we have special code to remove
   // MVCs that turn out to be redundant.
-  if (OpNum == 0 && MI->hasOneMemOperand()) {
-    MachineMemOperand *MMO = *MI->memoperands_begin();
+  if (OpNum == 0 && MI.hasOneMemOperand()) {
+    MachineMemOperand *MMO = *MI.memoperands_begin();
     if (MMO->getSize() == Size && !MMO->isVolatile()) {
       // Handle conversion of loads.
-      if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) {
-        return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+      if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXLoad)) {
+        return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
                        get(SystemZ::MVC))
             .addFrameIndex(FrameIndex)
             .addImm(0)
             .addImm(Size)
-            .addOperand(MI->getOperand(1))
-            .addImm(MI->getOperand(2).getImm())
+            .addOperand(MI.getOperand(1))
+            .addImm(MI.getOperand(2).getImm())
             .addMemOperand(MMO);
       }
       // Handle conversion of stores.
-      if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) {
-        return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+      if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXStore)) {
+        return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
                        get(SystemZ::MVC))
-            .addOperand(MI->getOperand(1))
-            .addImm(MI->getOperand(2).getImm())
+            .addOperand(MI.getOperand(1))
+            .addImm(MI.getOperand(2).getImm())
             .addImm(Size)
             .addFrameIndex(FrameIndex)
             .addImm(0)
@@ -866,7 +1002,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
   // into <INSN>.
   int MemOpcode = SystemZ::getMemOpcode(Opcode);
   if (MemOpcode >= 0) {
-    unsigned NumOps = MI->getNumExplicitOperands();
+    unsigned NumOps = MI.getNumExplicitOperands();
     if (OpNum == NumOps - 1) {
       const MCInstrDesc &MemDesc = get(MemOpcode);
       uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
@@ -874,12 +1010,13 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
       assert(AccessBytes <= Size && "Access outside the frame index");
       uint64_t Offset = Size - AccessBytes;
       MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
-                                        MI->getDebugLoc(), get(MemOpcode));
+                                        MI.getDebugLoc(), get(MemOpcode));
       for (unsigned I = 0; I < OpNum; ++I)
-        MIB.addOperand(MI->getOperand(I));
+        MIB.addOperand(MI.getOperand(I));
       MIB.addFrameIndex(FrameIndex).addImm(Offset);
       if (MemDesc.TSFlags & SystemZII::HasIndex)
         MIB.addReg(0);
+      transferDeadCC(&MI, MIB);
       return MIB;
     }
   }
@@ -888,14 +1025,14 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
 }
 
 MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
-    MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
-    MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
+    MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+    MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+    LiveIntervals *LIS) const {
   return nullptr;
 }
 
-bool
-SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
-  switch (MI->getOpcode()) {
+bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
   case SystemZ::L128:
     splitMove(MI, SystemZ::LG);
     return true;
@@ -1033,13 +1170,13 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
     return true;
 
   case SystemZ::RISBMux: {
-    bool DestIsHigh = isHighReg(MI->getOperand(0).getReg());
-    bool SrcIsHigh = isHighReg(MI->getOperand(2).getReg());
+    bool DestIsHigh = isHighReg(MI.getOperand(0).getReg());
+    bool SrcIsHigh = isHighReg(MI.getOperand(2).getReg());
     if (SrcIsHigh == DestIsHigh)
-      MI->setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL));
+      MI.setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL));
     else {
-      MI->setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH));
-      MI->getOperand(5).setImm(MI->getOperand(5).getImm() ^ 32);
+      MI.setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH));
+      MI.getOperand(5).setImm(MI.getOperand(5).getImm() ^ 32);
     }
     return true;
   }
@@ -1048,62 +1185,65 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
     splitAdjDynAlloc(MI);
     return true;
 
+  case TargetOpcode::LOAD_STACK_GUARD:
+    expandLoadStackGuard(&MI);
+    return true;
+
   default:
     return false;
   }
 }
 
-uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr *MI) const {
-  if (MI->getOpcode() == TargetOpcode::INLINEASM) {
-    const MachineFunction *MF = MI->getParent()->getParent();
-    const char *AsmStr = MI->getOperand(0).getSymbolName();
+uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+  if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+    const MachineFunction *MF = MI.getParent()->getParent();
+    const char *AsmStr = MI.getOperand(0).getSymbolName();
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
   }
-  return MI->getDesc().getSize();
+  return MI.getDesc().getSize();
 }
 
 SystemZII::Branch
-SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
+SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
   case SystemZ::BR:
   case SystemZ::J:
   case SystemZ::JG:
     return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY,
-                             SystemZ::CCMASK_ANY, &MI->getOperand(0));
+                             SystemZ::CCMASK_ANY, &MI.getOperand(0));
 
   case SystemZ::BRC:
   case SystemZ::BRCL:
-    return SystemZII::Branch(SystemZII::BranchNormal,
-                             MI->getOperand(0).getImm(),
-                             MI->getOperand(1).getImm(), &MI->getOperand(2));
+    return SystemZII::Branch(SystemZII::BranchNormal, MI.getOperand(0).getImm(),
+                             MI.getOperand(1).getImm(), &MI.getOperand(2));
 
   case SystemZ::BRCT:
     return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP,
-                             SystemZ::CCMASK_CMP_NE, &MI->getOperand(2));
+                             SystemZ::CCMASK_CMP_NE, &MI.getOperand(2));
 
   case SystemZ::BRCTG:
     return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP,
-                             SystemZ::CCMASK_CMP_NE, &MI->getOperand(2));
+                             SystemZ::CCMASK_CMP_NE, &MI.getOperand(2));
 
   case SystemZ::CIJ:
   case SystemZ::CRJ:
     return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP,
-                             MI->getOperand(2).getImm(), &MI->getOperand(3));
+                             MI.getOperand(2).getImm(), &MI.getOperand(3));
 
   case SystemZ::CLIJ:
   case SystemZ::CLRJ:
     return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP,
-                             MI->getOperand(2).getImm(), &MI->getOperand(3));
+                             MI.getOperand(2).getImm(), &MI.getOperand(3));
 
   case SystemZ::CGIJ:
   case SystemZ::CGRJ:
     return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP,
-                             MI->getOperand(2).getImm(), &MI->getOperand(3));
+                             MI.getOperand(2).getImm(), &MI.getOperand(3));
 
   case SystemZ::CLGIJ:
   case SystemZ::CLGRJ:
     return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP,
-                             MI->getOperand(2).getImm(), &MI->getOperand(3));
+                             MI.getOperand(2).getImm(), &MI.getOperand(3));
 
   default:
     llvm_unreachable("Unrecognized branch opcode");
@@ -1250,28 +1390,107 @@ bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize,
   return false;
 }
 
-unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode,
-                                               const MachineInstr *MI) const {
+unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
+                                           SystemZII::FusedCompareType Type,
+                                           const MachineInstr *MI) const {
   switch (Opcode) {
-  case SystemZ::CR:
-    return SystemZ::CRJ;
-  case SystemZ::CGR:
-    return SystemZ::CGRJ;
   case SystemZ::CHI:
-    return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CIJ : 0;
   case SystemZ::CGHI:
-    return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CGIJ : 0;
-  case SystemZ::CLR:
-    return SystemZ::CLRJ;
-  case SystemZ::CLGR:
-    return SystemZ::CLGRJ;
+    if (!(MI && isInt<8>(MI->getOperand(1).getImm())))
+      return 0;
+    break;
   case SystemZ::CLFI:
-    return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLIJ : 0;
   case SystemZ::CLGFI:
-    return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLGIJ : 0;
-  default:
-    return 0;
+    if (!(MI && isUInt<8>(MI->getOperand(1).getImm())))
+      return 0;
   }
+  switch (Type) {
+  case SystemZII::CompareAndBranch:
+    switch (Opcode) {
+    case SystemZ::CR:
+      return SystemZ::CRJ;
+    case SystemZ::CGR:
+      return SystemZ::CGRJ;
+    case SystemZ::CHI:
+      return SystemZ::CIJ;
+    case SystemZ::CGHI:
+      return SystemZ::CGIJ;
+    case SystemZ::CLR:
+      return SystemZ::CLRJ;
+    case SystemZ::CLGR:
+      return SystemZ::CLGRJ;
+    case SystemZ::CLFI:
+      return SystemZ::CLIJ;
+    case SystemZ::CLGFI:
+      return SystemZ::CLGIJ;
+    default:
+      return 0;
+    }
+  case SystemZII::CompareAndReturn:
+    switch (Opcode) {
+    case SystemZ::CR:
+      return SystemZ::CRBReturn;
+    case SystemZ::CGR:
+      return SystemZ::CGRBReturn;
+    case SystemZ::CHI:
+      return SystemZ::CIBReturn;
+    case SystemZ::CGHI:
+      return SystemZ::CGIBReturn;
+    case SystemZ::CLR:
+      return SystemZ::CLRBReturn;
+    case SystemZ::CLGR:
+      return SystemZ::CLGRBReturn;
+    case SystemZ::CLFI:
+      return SystemZ::CLIBReturn;
+    case SystemZ::CLGFI:
+      return SystemZ::CLGIBReturn;
+    default:
+      return 0;
+    }
+  case SystemZII::CompareAndSibcall:
+    switch (Opcode) {
+    case SystemZ::CR:
+      return SystemZ::CRBCall;
+    case SystemZ::CGR:
+      return SystemZ::CGRBCall;
+    case SystemZ::CHI:
+      return SystemZ::CIBCall;
+    case SystemZ::CGHI:
+      return SystemZ::CGIBCall;
+    case SystemZ::CLR:
+      return SystemZ::CLRBCall;
+    case SystemZ::CLGR:
+      return SystemZ::CLGRBCall;
+    case SystemZ::CLFI:
+      return SystemZ::CLIBCall;
+    case SystemZ::CLGFI:
+      return SystemZ::CLGIBCall;
+    default:
+      return 0;
+    }
+  case SystemZII::CompareAndTrap:
+    switch (Opcode) {
+    case SystemZ::CR:
+      return SystemZ::CRT;
+    case SystemZ::CGR:
+      return SystemZ::CGRT;
+    case SystemZ::CHI:
+      return SystemZ::CIT;
+    case SystemZ::CGHI:
+      return SystemZ::CGIT;
+    case SystemZ::CLR:
+      return SystemZ::CLRT;
+    case SystemZ::CLGR:
+      return SystemZ::CLGRT;
+    case SystemZ::CLFI:
+      return SystemZ::CLFIT;
+    case SystemZ::CLGFI:
+      return SystemZ::CLGIT;
+    default:
+      return 0;
+    }
+  }
+  return 0;
 }
 
 void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index d9094ba93658..010010b89dc8 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -111,6 +111,22 @@ struct Branch {
          const MachineOperand *target)
     : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {}
 };
+// Kinds of fused compares in compare-and-* instructions.  Together with type
+// of the converted compare, this identifies the compare-and-*
+// instruction.
+enum FusedCompareType {
+  // Relative branch - CRJ etc.
+  CompareAndBranch,
+
+  // Indirect branch, used for return - CRBReturn etc.
+  CompareAndReturn,
+
+  // Indirect branch, used for sibcall - CRBCall etc.
+  CompareAndSibcall,
+
+  // Trap
+  CompareAndTrap
+};
 } // end namespace SystemZII
 
 class SystemZSubtarget;
@@ -120,16 +136,17 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
 
   void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
   void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
-  void expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
-                      unsigned HighOpcode, bool ConvertHigh) const;
-  void expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
+  void expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode,
+                      bool ConvertHigh) const;
+  void expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
                        unsigned LowOpcodeK, unsigned HighOpcode) const;
-  void expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
+  void expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
                        unsigned HighOpcode) const;
-  void expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
+  void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
                         unsigned Size) const;
+  void expandLoadStackGuard(MachineInstr *MI) const;
   void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                     DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+                     const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                      unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
   virtual void anchor();
   
@@ -137,26 +154,26 @@ public:
   explicit SystemZInstrInfo(SystemZSubtarget &STI);
 
   // Override TargetInstrInfo.
-  unsigned isLoadFromStackSlot(const MachineInstr *MI,
+  unsigned isLoadFromStackSlot(const MachineInstr &MI,
                                int &FrameIndex) const override;
-  unsigned isStoreToStackSlot(const MachineInstr *MI,
+  unsigned isStoreToStackSlot(const MachineInstr &MI,
                               int &FrameIndex) const override;
-  bool isStackSlotCopy(const MachineInstr *MI, int &DestFrameIndex,
+  bool isStackSlotCopy(const MachineInstr &MI, int &DestFrameIndex,
                        int &SrcFrameIndex) const override;
-  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+  bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        DebugLoc DL) const override;
-  bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+                        const DebugLoc &DL) const override;
+  bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
                       unsigned &SrcReg2, int &Mask, int &Value) const override;
-  bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+  bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
                             unsigned SrcReg2, int Mask, int Value,
                             const MachineRegisterInfo *MRI) const override;
-  bool isPredicable(MachineInstr *MI) const override;
+  bool isPredicable(MachineInstr &MI) const override;
   bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
                            unsigned ExtraPredCycles,
                            BranchProbability Probability) const override;
@@ -165,10 +182,12 @@ public:
                            MachineBasicBlock &FMBB,
                            unsigned NumCyclesF, unsigned ExtraPredCyclesF,
                            BranchProbability Probability) const override;
-  bool PredicateInstruction(MachineInstr *MI,
+  bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+                            BranchProbability Probability) const override;
+  bool PredicateInstruction(MachineInstr &MI,
                             ArrayRef<MachineOperand> Pred) const override;
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                   DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+                   const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const override;
   void storeRegToStackSlot(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI,
@@ -181,17 +200,18 @@ public:
                             const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const override;
   MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
-                                      MachineBasicBlock::iterator &MBBI,
+                                      MachineInstr &MI,
                                       LiveVariables *LV) const override;
-  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                                      ArrayRef<unsigned> Ops,
-                                      MachineBasicBlock::iterator InsertPt,
-                                      int FrameIndex) const override;
-  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                                      ArrayRef<unsigned> Ops,
-                                      MachineBasicBlock::iterator InsertPt,
-                                      MachineInstr *LoadMI) const override;
-  bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override;
+  MachineInstr *
+  foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
+                        ArrayRef<unsigned> Ops,
+                        MachineBasicBlock::iterator InsertPt, int FrameIndex,
+                        LiveIntervals *LIS = nullptr) const override;
+  MachineInstr *foldMemoryOperandImpl(
+      MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+      MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+      LiveIntervals *LIS = nullptr) const override;
+  bool expandPostRAPseudo(MachineInstr &MBBI) const override;
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
     override;
 
@@ -199,14 +219,14 @@ public:
   const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
 
   // Return the size in bytes of MI.
-  uint64_t getInstSizeInBytes(const MachineInstr *MI) const;
+  uint64_t getInstSizeInBytes(const MachineInstr &MI) const;
 
   // Return true if MI is a conditional or unconditional branch.
   // When returning true, set Cond to the mask of condition-code
   // values on which the instruction will branch, and set Target
   // to the operand that contains the branch target.  This target
   // can be a register or a basic block.
-  SystemZII::Branch getBranchInfo(const MachineInstr *MI) const;
+  SystemZII::Branch getBranchInfo(const MachineInstr &MI) const;
 
   // Get the load and store opcodes for a given register class.
   void getLoadStoreOpcodes(const TargetRegisterClass *RC,
@@ -229,11 +249,12 @@ public:
   bool isRxSBGMask(uint64_t Mask, unsigned BitSize,
                    unsigned &Start, unsigned &End) const;
 
-  // If Opcode is a COMPARE opcode for which an associated COMPARE AND
-  // BRANCH exists, return the opcode for the latter, otherwise return 0.
+  // If Opcode is a COMPARE opcode for which an associated fused COMPARE AND *
+  // operation exists, return the opcode for the latter, otherwise return 0.
   // MI, if nonnull, is the compare instruction.
-  unsigned getCompareAndBranch(unsigned Opcode,
-                               const MachineInstr *MI = nullptr) const;
+  unsigned getFusedCompare(unsigned Opcode,
+                           SystemZII::FusedCompareType Type,
+                           const MachineInstr *MI = nullptr) const;
 
   // Emit code before MBBI in MI to move immediate value Value into
   // physical register Reg.
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index d5dabc2cd6ab..c510ca774be3 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -36,6 +36,22 @@ let hasSideEffects = 0 in {
 let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
   def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
 
+// A conditional return instruction (bcr <cond>, %r14).
+let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in
+  def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+
+// Fused compare and conditional returns.
+let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in {
+  def CRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+  def CGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+  def CIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>;
+  def CGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>;
+  def CLRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+  def CLGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+  def CLIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>;
+  def CLGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
+}
+
 // Unconditional branches.  R1 is the condition-code mask (all 1s).
 let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
   let isIndirectBranch = 1 in
@@ -51,6 +67,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
   def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg\t$I2", []>;
 }
 
+// FIXME: This trap instruction should be marked as isTerminator, but there is
+// currently a general bug that allows non-terminators to be placed between
+// terminators. Temporarily leave this unmarked until the bug is fixed.
+let isBarrier = 1, hasCtrlDep = 1 in {
+  def Trap : Alias<4, (outs), (ins), [(trap)]>;
+}
+
+let isTerminator = 1, hasCtrlDep = 1, Uses = [CC] in {
+  def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>;
+}
+
 // Conditional branches.  It's easier for LLVM to handle these branches
 // in their raw BRC/BRCL form, with the 4-bit condition-code mask being
 // the first operand.  It seems friendlier to use mnemonic forms like
@@ -62,15 +89,25 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in {
                      [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>;
     def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1,
                                            brtarget32:$I2), "jg$R1\t$I2", []>;
+    let isIndirectBranch = 1 in
+      def BCR : InstRR<0x07, (outs), (ins cond4:$valid, cond4:$R1, GR64:$R2),
+                       "b${R1}r\t$R2", []>;
   }
   def AsmBRC : InstRI<0xA74, (outs), (ins imm32zx4:$R1, brtarget16:$I2),
                       "brc\t$R1, $I2", []>;
   def AsmBRCL : InstRIL<0xC04, (outs), (ins imm32zx4:$R1, brtarget32:$I2),
                         "brcl\t$R1, $I2", []>;
-  def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2),
-                      "bcr\t$R1, $R2", []>;
+  let isIndirectBranch = 1 in {
+    def AsmBC : InstRX<0x47, (outs), (ins imm32zx4:$R1, bdxaddr12only:$XBD2),
+                       "bc\t$R1, $XBD2", []>;
+    def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2),
+                        "bcr\t$R1, $R2", []>;
+  }
 }
 
+def AsmNop  : InstAlias<"nop\t$XBD", (AsmBC 0, bdxaddr12only:$XBD), 0>;
+def AsmNopR : InstAlias<"nopr\t$R", (AsmBCR 0, GR64:$R), 0>;
+
 // Fused compare-and-branch instructions.  As for normal branches,
 // we handle these instructions internally in their raw CRJ-like form,
 // but use assembly macros like CRJE when writing them out.
@@ -83,38 +120,83 @@ multiclass CompareBranches<Operand ccmask, string pos1, string pos2> {
   let isBranch = 1, isTerminator = 1, Defs = [CC] in {
     def RJ  : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
                                             brtarget16:$RI4),
-                       "crj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+                       "crj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
     def GRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
                                             brtarget16:$RI4),
-                       "cgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+                       "cgrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
     def IJ  : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3,
                                             brtarget16:$RI4),
-                       "cij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+                       "cij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
     def GIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3,
                                             brtarget16:$RI4),
-                       "cgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+                       "cgij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
     def LRJ  : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
                                              brtarget16:$RI4),
-                        "clrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+                        "clrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
     def LGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
                                              brtarget16:$RI4),
-                        "clgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+                        "clgrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
     def LIJ  : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3,
                                              brtarget16:$RI4),
-                        "clij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+                        "clij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
     def LGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3,
                                              brtarget16:$RI4),
-                        "clgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+                        "clgij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
+    let isIndirectBranch = 1 in {
+      def RB  : InstRRS<0xECF6, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
+                                             bdaddr12only:$BD4),
+                        "crb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
+      def GRB : InstRRS<0xECE4, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
+                                             bdaddr12only:$BD4),
+                        "cgrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
+      def IB  : InstRIS<0xECFE, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3,
+                                             bdaddr12only:$BD4),
+                        "cib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
+      def GIB : InstRIS<0xECFC, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3,
+                                             bdaddr12only:$BD4),
+                        "cgib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
+      def LRB  : InstRRS<0xECF7, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
+                                              bdaddr12only:$BD4),
+                         "clrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
+      def LGRB : InstRRS<0xECE5, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
+                                              bdaddr12only:$BD4),
+                         "clgrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
+      def LIB  : InstRIS<0xECFF, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3,
+                                              bdaddr12only:$BD4),
+                         "clib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
+      def LGIB : InstRIS<0xECFD, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3,
+                                              bdaddr12only:$BD4),
+                         "clgib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
+    }
+  }
+
+  let isTerminator = 1, hasCtrlDep = 1 in {
+    def RT   : InstRRFc<0xB972, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3),
+                        "crt"##pos1##"\t$R1, $R2"##pos2, []>;
+    def GRT  : InstRRFc<0xB960, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3),
+                        "cgrt"##pos1##"\t$R1, $R2"##pos2, []>;
+    def LRT  : InstRRFc<0xB973, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3),
+                        "clrt"##pos1##"\t$R1, $R2"##pos2, []>;
+    def LGRT : InstRRFc<0xB961, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3),
+                        "clgrt"##pos1##"\t$R1, $R2"##pos2, []>;
+    def IT   : InstRIEa<0xEC72, (outs), (ins GR32:$R1, imm32sx16:$I2, ccmask:$M3),
+                         "cit"##pos1##"\t$R1, $I2"##pos2, []>;
+    def GIT  : InstRIEa<0xEC70, (outs), (ins GR64:$R1, imm32sx16:$I2, ccmask:$M3),
+                         "cgit"##pos1##"\t$R1, $I2"##pos2, []>;
+    def LFIT : InstRIEa<0xEC73, (outs), (ins GR32:$R1, imm32zx16:$I2, ccmask:$M3),
+                         "clfit"##pos1##"\t$R1, $I2"##pos2, []>;
+    def LGIT : InstRIEa<0xEC71, (outs), (ins GR64:$R1, imm32zx16:$I2, ccmask:$M3),
+                         "clgit"##pos1##"\t$R1, $I2"##pos2, []>;
   }
 }
 let isCodeGenOnly = 1 in
   defm C : CompareBranches<cond4, "$M3", "">;
-defm AsmC : CompareBranches<imm32zx4, "", "$M3, ">;
+defm AsmC : CompareBranches<imm32zx4, "", ", $M3">;
 
 // Define AsmParser mnemonics for each general condition-code mask
 // (integer or floating-point)
-multiclass CondExtendedMnemonic<bits<4> ccmask, string name> {
-  let R1 = ccmask in {
+multiclass CondExtendedMnemonicA<bits<4> ccmask, string name> {
+  let isBranch = 1, isTerminator = 1, R1 = ccmask in {
     def J : InstRI<0xA74, (outs), (ins brtarget16:$I2),
                    "j"##name##"\t$I2", []>;
     def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2),
@@ -123,25 +205,36 @@ multiclass CondExtendedMnemonic<bits<4> ccmask, string name> {
   }
   def LOCR  : FixedCondUnaryRRF<"locr"##name,  0xB9F2, GR32, GR32, ccmask>;
   def LOCGR : FixedCondUnaryRRF<"locgr"##name, 0xB9E2, GR64, GR64, ccmask>;
+  def LOCHI : FixedCondUnaryRIE<"lochi"##name,  0xEC42, GR64, imm32sx16,
+                                ccmask>;
+  def LOCGHI: FixedCondUnaryRIE<"locghi"##name, 0xEC46, GR64, imm64sx16,
+                                ccmask>;
   def LOC   : FixedCondUnaryRSY<"loc"##name,   0xEBF2, GR32, ccmask, 4>;
   def LOCG  : FixedCondUnaryRSY<"locg"##name,  0xEBE2, GR64, ccmask, 8>;
   def STOC  : FixedCondStoreRSY<"stoc"##name,  0xEBF3, GR32, ccmask, 4>;
   def STOCG : FixedCondStoreRSY<"stocg"##name, 0xEBE3, GR64, ccmask, 8>;
 }
-defm AsmO   : CondExtendedMnemonic<1,  "o">;
-defm AsmH   : CondExtendedMnemonic<2,  "h">;
-defm AsmNLE : CondExtendedMnemonic<3,  "nle">;
-defm AsmL   : CondExtendedMnemonic<4,  "l">;
-defm AsmNHE : CondExtendedMnemonic<5,  "nhe">;
-defm AsmLH  : CondExtendedMnemonic<6,  "lh">;
-defm AsmNE  : CondExtendedMnemonic<7,  "ne">;
-defm AsmE   : CondExtendedMnemonic<8,  "e">;
-defm AsmNLH : CondExtendedMnemonic<9,  "nlh">;
-defm AsmHE  : CondExtendedMnemonic<10, "he">;
-defm AsmNL  : CondExtendedMnemonic<11, "nl">;
-defm AsmLE  : CondExtendedMnemonic<12, "le">;
-defm AsmNH  : CondExtendedMnemonic<13, "nh">;
-defm AsmNO  : CondExtendedMnemonic<14, "no">;
+
+multiclass CondExtendedMnemonic<bits<4> ccmask, string name1, string name2>
+  : CondExtendedMnemonicA<ccmask, name1> {
+  let isAsmParserOnly = 1 in
+    defm Alt : CondExtendedMnemonicA<ccmask, name2>;
+}
+
+defm AsmO   : CondExtendedMnemonicA<1,  "o">;
+defm AsmH   : CondExtendedMnemonic<2,  "h", "p">;
+defm AsmNLE : CondExtendedMnemonicA<3,  "nle">;
+defm AsmL   : CondExtendedMnemonic<4,  "l", "m">;
+defm AsmNHE : CondExtendedMnemonicA<5,  "nhe">;
+defm AsmLH  : CondExtendedMnemonicA<6,  "lh">;
+defm AsmNE  : CondExtendedMnemonic<7,  "ne", "nz">;
+defm AsmE   : CondExtendedMnemonic<8,  "e", "z">;
+defm AsmNLH : CondExtendedMnemonicA<9,  "nlh">;
+defm AsmHE  : CondExtendedMnemonicA<10, "he">;
+defm AsmNL  : CondExtendedMnemonic<11, "nl", "nm">;
+defm AsmLE  : CondExtendedMnemonicA<12, "le">;
+defm AsmNH  : CondExtendedMnemonic<13, "nh", "np">;
+defm AsmNO  : CondExtendedMnemonicA<14, "no">;
 
 // Define AsmParser mnemonics for each integer condition-code mask.
 // This is like the list above, except that condition 3 is not possible
@@ -151,31 +244,76 @@ defm AsmNO  : CondExtendedMnemonic<14, "no">;
 // We don't make one of the two names an alias of the other because
 // we need the custom parsing routines to select the correct register class.
 multiclass IntCondExtendedMnemonicA<bits<4> ccmask, string name> {
-  let M3 = ccmask in {
-    def CR  : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2,
-                                            brtarget16:$RI4),
-                       "crj"##name##"\t$R1, $R2, $RI4", []>;
-    def CGR : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2,
-                                            brtarget16:$RI4),
-                       "cgrj"##name##"\t$R1, $R2, $RI4", []>;
-    def CI  : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2,
-                                            brtarget16:$RI4),
-                       "cij"##name##"\t$R1, $I2, $RI4", []>;
-    def CGI : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2,
-                                            brtarget16:$RI4),
-                       "cgij"##name##"\t$R1, $I2, $RI4", []>;
-    def CLR  : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2,
-                                            brtarget16:$RI4),
-                        "clrj"##name##"\t$R1, $R2, $RI4", []>;
-    def CLGR : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2,
+  let isBranch = 1, isTerminator = 1, M3 = ccmask in {
+    def CRJ  : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2,
+                                             brtarget16:$RI4),
+                        "crj"##name##"\t$R1, $R2, $RI4", []>;
+    def CGRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2,
+                                             brtarget16:$RI4),
+                        "cgrj"##name##"\t$R1, $R2, $RI4", []>;
+    def CIJ  : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2,
                                              brtarget16:$RI4),
-                        "clgrj"##name##"\t$R1, $R2, $RI4", []>;
-    def CLI  : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2,
+                        "cij"##name##"\t$R1, $I2, $RI4", []>;
+    def CGIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2,
                                              brtarget16:$RI4),
-                        "clij"##name##"\t$R1, $I2, $RI4", []>;
-    def CLGI : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2,
+                        "cgij"##name##"\t$R1, $I2, $RI4", []>;
+    def CLRJ  : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2,
                                              brtarget16:$RI4),
-                        "clgij"##name##"\t$R1, $I2, $RI4", []>;
+                         "clrj"##name##"\t$R1, $R2, $RI4", []>;
+    def CLGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2,
+                                              brtarget16:$RI4),
+                         "clgrj"##name##"\t$R1, $R2, $RI4", []>;
+    def CLIJ  : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2,
+                                              brtarget16:$RI4),
+                         "clij"##name##"\t$R1, $I2, $RI4", []>;
+    def CLGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2,
+                                              brtarget16:$RI4),
+                         "clgij"##name##"\t$R1, $I2, $RI4", []>;
+    let isIndirectBranch = 1 in {
+      def CRB  : InstRRS<0xECF6, (outs), (ins GR32:$R1, GR32:$R2,
+                                              bdaddr12only:$BD4),
+                         "crb"##name##"\t$R1, $R2, $BD4", []>;
+      def CGRB : InstRRS<0xECE4, (outs), (ins GR64:$R1, GR64:$R2,
+                                              bdaddr12only:$BD4),
+                         "cgrb"##name##"\t$R1, $R2, $BD4", []>;
+      def CIB  : InstRIS<0xECFE, (outs), (ins GR32:$R1, imm32sx8:$I2,
+                                              bdaddr12only:$BD4),
+                         "cib"##name##"\t$R1, $I2, $BD4", []>;
+      def CGIB : InstRIS<0xECFC, (outs), (ins GR64:$R1, imm64sx8:$I2,
+                                              bdaddr12only:$BD4),
+                         "cgib"##name##"\t$R1, $I2, $BD4", []>;
+      def CLRB  : InstRRS<0xECF7, (outs), (ins GR32:$R1, GR32:$R2,
+                                              bdaddr12only:$BD4),
+                          "clrb"##name##"\t$R1, $R2, $BD4", []>;
+      def CLGRB : InstRRS<0xECE5, (outs), (ins GR64:$R1, GR64:$R2,
+                                               bdaddr12only:$BD4),
+                          "clgrb"##name##"\t$R1, $R2, $BD4", []>;
+      def CLIB  : InstRIS<0xECFF, (outs), (ins GR32:$R1, imm32zx8:$I2,
+                                               bdaddr12only:$BD4),
+                          "clib"##name##"\t$R1, $I2, $BD4", []>;
+      def CLGIB : InstRIS<0xECFD, (outs), (ins GR64:$R1, imm64zx8:$I2,
+                                               bdaddr12only:$BD4),
+                          "clgib"##name##"\t$R1, $I2, $BD4", []>;
+    }
+  }
+
+  let hasCtrlDep = 1, isTerminator = 1, M3 = ccmask in {
+      def CRT   : InstRRFc<0xB972, (outs), (ins GR32:$R1, GR32:$R2),
+                          "crt"##name##"\t$R1, $R2", []>;
+      def CGRT  : InstRRFc<0xB960, (outs), (ins GR64:$R1, GR64:$R2),
+                          "cgrt"##name##"\t$R1, $R2", []>;
+      def CLRT  : InstRRFc<0xB973, (outs), (ins GR32:$R1, GR32:$R2),
+                          "clrt"##name##"\t$R1, $R2", []>;
+      def CLGRT : InstRRFc<0xB961, (outs), (ins GR64:$R1, GR64:$R2),
+                          "clgrt"##name##"\t$R1, $R2", []>;
+      def CIT   : InstRIEa<0xEC72, (outs), (ins GR32:$R1, imm32sx16:$I2),
+                           "cit"##name##"\t$R1, $I2", []>;
+      def CGIT  : InstRIEa<0xEC70, (outs), (ins GR64:$R1, imm32sx16:$I2),
+                           "cgit"##name##"\t$R1, $I2", []>;
+      def CLFIT : InstRIEa<0xEC73, (outs), (ins GR32:$R1, imm32zx16:$I2),
+                           "clfit"##name##"\t$R1, $I2", []>;
+      def CLGIT : InstRIEa<0xEC71, (outs), (ins GR64:$R1, imm32zx16:$I2),
+                           "clgit"##name##"\t$R1, $I2", []>;
   }
 }
 multiclass IntCondExtendedMnemonic<bits<4> ccmask, string name1, string name2>
@@ -249,6 +387,26 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
     def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
 }
 
+let CCMaskFirst = 1, isCall = 1, isTerminator = 1, isReturn = 1 in {
+  def CallBRCL : Alias<6, (outs), (ins cond4:$valid, cond4:$R1,
+                                   pcrel32:$I2), []>;
+
+  let Uses = [R1D] in
+    def CallBCR : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+}
+
+// Fused compare and conditional sibling calls.
+let isCall = 1, isTerminator = 1, isReturn = 1, Uses = [R1D] in {
+  def CRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+  def CGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+  def CIBCall : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>;
+  def CGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>;
+  def CLRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+  def CLGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+  def CLIBCall : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>;
+  def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
+}
+
 // TLS calls.  These will be lowered into a call to __tls_get_offset,
 // with an extra relocation specifying the TLS symbol.
 let isCall = 1, Defs = [R14D, CC] in {
@@ -261,12 +419,14 @@ let isCall = 1, Defs = [R14D, CC] in {
 // Define the general form of the call instructions for the asm parser.
 // These instructions don't hard-code %r14 as the return address register.
 // Allow an optional TLS marker symbol to generate TLS call relocations.
-def BRAS  : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2),
-                   "bras\t$R1, $I2", []>;
-def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2),
-                    "brasl\t$R1, $I2", []>;
-def BASR  : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
-                   "basr\t$R1, $R2", []>;
+let isCall = 1, Defs = [CC] in {
+  def BRAS  : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2),
+                     "bras\t$R1, $I2", []>;
+  def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2),
+                      "brasl\t$R1, $I2", []>;
+  def BASR  : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
+                     "basr\t$R1, $R2", []>;
+}
 
 //===----------------------------------------------------------------------===//
 // Move instructions
@@ -294,6 +454,14 @@ let Uses = [CC] in {
   def AsmLOCR  : AsmCondUnaryRRF<"loc",  0xB9F2, GR32, GR32>;
   def AsmLOCGR : AsmCondUnaryRRF<"locg", 0xB9E2, GR64, GR64>;
 }
+let isCodeGenOnly = 1, Uses = [CC] in {
+  def LOCHI  : CondUnaryRIE<"lochi",  0xEC42, GR32, imm32sx16>;
+  def LOCGHI : CondUnaryRIE<"locghi", 0xEC46, GR64, imm64sx16>;
+}
+let Uses = [CC] in {
+  def AsmLOCHI  : AsmCondUnaryRIE<"lochi",  0xEC42, GR32, imm32sx16>;
+  def AsmLOCGHI : AsmCondUnaryRIE<"locghi", 0xEC46, GR64, imm64sx16>;
+}
 
 // Immediate moves.
 let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
@@ -546,10 +714,14 @@ def  : StoreGR64PC<STRL, aligned_truncstorei32>;
 //===----------------------------------------------------------------------===//
 
 // Multi-register loads.
+defm LM : LoadMultipleRSPair<"lm", 0x98, 0xEB98, GR32>;
 def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>;
+def LMH : LoadMultipleRSY<"lmh", 0xEB96, GRH32>;
 
 // Multi-register stores.
+defm STM : StoreMultipleRSPair<"stm", 0x90, 0xEB90, GR32>;
 def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>;
+def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>;
 
 //===----------------------------------------------------------------------===//
 // Byte swaps
@@ -563,13 +735,14 @@ let hasSideEffects = 0 in {
 
 // Byte-swapping loads.  Unlike normal loads, these instructions are
 // allowed to access storage more than once.
-def LRV  : UnaryRXY<"lrv",  0xE31E, loadu<bswap, nonvolatile_load>, GR32, 4>;
-def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64, 8>;
+def LRVH : UnaryRXY<"lrvh", 0xE31F, z_lrvh, GR32, 2>;
+def LRV  : UnaryRXY<"lrv",  0xE31E, z_lrv,  GR32, 4>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, z_lrvg, GR64, 8>;
 
 // Likewise byte-swapping stores.
-def STRV  : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32, 4>;
-def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>,
-                     GR64, 8>;
+def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>;
+def STRV  : StoreRXY<"strv",  0xE33E, z_strv,  GR32, 4>;
+def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>;
 
 //===----------------------------------------------------------------------===//
 // Load address instructions
@@ -657,6 +830,11 @@ defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>;
 defm : InsertMem<"inserti8", IC,  GR64, azextloadi8, bdxaddr12pair>;
 defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>;
 
+let Defs = [CC] in {
+  defm ICM : TernaryRSPair<"icm", 0xBF, 0xEB81, GR32, 0>;
+  def ICMH : TernaryRSY<"icmh", 0xEB80, GRH32, 0>;
+}
+
 // Insertions of a 16-bit immediate, leaving other bits unaffected.
 // We don't have or_as_insert equivalents of these operations because
 // OI is available instead.
@@ -812,7 +990,7 @@ defm : ZXB<subc, GR64, SLGFR>;
 let Defs = [CC], Uses = [CC] in {
   // Subtraction of a register.
   def SLBR  : BinaryRRE<"slb",  0xB999, sube, GR32, GR32>;
-  def SLGBR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>;
+  def SLBGR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>;
 
   // Subtraction of memory.
   def SLB  : BinaryRXY<"slb",  0xE399, sube, GR32, load, 4>;
@@ -865,7 +1043,7 @@ let Defs = [CC] in {
   // ANDs of memory.
   let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
     defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
-    def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; 
+    def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
   }
 
   // AND to memory
@@ -1030,6 +1208,7 @@ def DLG  : BinaryRXY<"dlg",  0xE387, z_udivrem64, GR128, load, 8>;
 // Shift left.
 let hasSideEffects = 0 in {
   defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
+  defm SLA : BinaryRSAndK<"sla", 0x8B, 0xEBDD, null_frag, GR32>;
   def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
 }
 
@@ -1208,6 +1387,9 @@ let Defs = [CC] in {
   defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>;
 }
 
+def TML : InstAlias<"tml\t$R, $I", (TMLL GR32:$R, imm32ll16:$I), 0>;
+def TMH : InstAlias<"tmh\t$R, $I", (TMLH GR32:$R, imm32lh16:$I), 0>;
+
 //===----------------------------------------------------------------------===//
 // Prefetch
 //===----------------------------------------------------------------------===//
@@ -1224,6 +1406,10 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
 let hasSideEffects = 1 in
 def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
 
+// A pseudo instruction that serves as a compiler barrier.
+let hasSideEffects = 1 in
+def MemBarrier : Pseudo<(outs), (ins), [(z_membarrier)]>;
+
 let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
   def LAA   : LoadAndOpRSY<"laa",   0xEBF8, atomic_load_add_32, GR32>;
   def LAAG  : LoadAndOpRSY<"laag",  0xEBE8, atomic_load_add_64, GR64>;
@@ -1466,6 +1652,10 @@ let mayLoad = 1, Defs = [CC] in
   defm SRST : StringRRE<"srst", 0xb25e, z_search_string>;
 
 // Other instructions for inline assembly
+let hasSideEffects = 1, Defs = [CC], isCall = 1 in
+  def SVC : InstI<0x0A, (outs), (ins imm32zx8:$I1),
+                  "svc\t$I1",
+                  []>;
 let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
   def STCK : InstS<0xB205, (outs), (ins bdaddr12only:$BD2),
                        "stck\t$BD2",
@@ -1483,6 +1673,12 @@ let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
                        "stfle\t$BD2",
                        []>;
 
+let hasSideEffects = 1 in {
+  def EX   : InstRX<0x44, (outs), (ins GR64:$R1, bdxaddr12only:$XBD2),
+                  "ex\t$R1, $XBD2", []>;
+  def EXRL : InstRIL<0xC60, (outs), (ins GR64:$R1, pcrel32:$I2),
+                     "exrl\t$R1, $I2", []>;
+}
 
 
 //===----------------------------------------------------------------------===//
@@ -1515,6 +1711,42 @@ def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid,
                (i32 63)),
           (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>;
 
+// Avoid generating 2 XOR instructions. (xor (and x, y), y) is
+// equivalent to (and (xor x, -1), y)
+def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
+                          (XGR GR64:$y, (NGR GR64:$y, GR64:$x))>;
+
+// Shift/rotate instructions only use the last 6 bits of the second operand
+// register, so we can safely use NILL (16 fewer bits than NILF) to only AND the
+// last 16 bits.
+// Complexity is added so that we match this before we match NILF on the AND
+// operation alone.
+let AddedComplexity = 4 in {
+  def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+}
+
 // Peepholes for turning scalar operations into block operations.
 defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
                       XCSequence, 1>;
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
index 24165be29ae7..2cdf2f9bf990 100644
--- a/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -64,6 +64,9 @@ void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) {
+  if (skipFunction(*F.getFunction()))
+    return false;
+
   TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
   MF = &F;
 
@@ -92,9 +95,9 @@ bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node,
     switch (I->getOpcode()) {
       case SystemZ::TLS_LDCALL:
         if (TLSBaseAddrReg)
-          I = ReplaceTLSCall(I, TLSBaseAddrReg);
+          I = ReplaceTLSCall(&*I, TLSBaseAddrReg);
         else
-          I = SetRegister(I, &TLSBaseAddrReg);
+          I = SetRegister(&*I, &TLSBaseAddrReg);
         Changed = true;
         break;
       default:
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 8dab44e7f8af..a24d47d2d16b 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -58,7 +58,6 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/IR/Function.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -139,12 +138,16 @@ public:
   }
 
   bool runOnMachineFunction(MachineFunction &F) override;
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::AllVRegsAllocated);
+  }
 
 private:
   void skipNonTerminators(BlockPosition &Position, MBBInfo &Block);
   void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
                       bool AssumeRelaxed);
-  TerminatorInfo describeTerminator(MachineInstr *MI);
+  TerminatorInfo describeTerminator(MachineInstr &MI);
   uint64_t initMBBInfo();
   bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
   bool mustRelaxABranch();
@@ -207,11 +210,11 @@ void SystemZLongBranch::skipTerminator(BlockPosition &Position,
 }
 
 // Return a description of terminator instruction MI.
-TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
+TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) {
   TerminatorInfo Terminator;
   Terminator.Size = TII->getInstSizeInBytes(MI);
-  if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) {
-    switch (MI->getOpcode()) {
+  if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) {
+    switch (MI.getOpcode()) {
     case SystemZ::J:
       // Relaxes to JG, which is 2 bytes longer.
       Terminator.ExtraRelaxSize = 2;
@@ -248,7 +251,7 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
     default:
       llvm_unreachable("Unrecognized branch instruction");
     }
-    Terminator.Branch = MI;
+    Terminator.Branch = &MI;
     Terminator.TargetBlock =
       TII->getBranchInfo(MI).Target->getMBB()->getNumber();
   }
@@ -280,7 +283,7 @@ uint64_t SystemZLongBranch::initMBBInfo() {
     MachineBasicBlock::iterator MI = MBB->begin();
     MachineBasicBlock::iterator End = MBB->end();
     while (MI != End && !MI->isTerminator()) {
-      Block.Size += TII->getInstSizeInBytes(MI);
+      Block.Size += TII->getInstSizeInBytes(*MI);
       ++MI;
     }
     skipNonTerminators(Position, Block);
@@ -289,7 +292,7 @@ uint64_t SystemZLongBranch::initMBBInfo() {
     while (MI != End) {
       if (!MI->isDebugValue()) {
         assert(MI->isTerminator() && "Terminator followed by non-terminator");
-        Terminators.push_back(describeTerminator(MI));
+        Terminators.push_back(describeTerminator(*MI));
         skipTerminator(Position, Terminators.back(), false);
         ++Block.NumTerminators;
       }
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index f4a517bd54df..4f64f4c65f1d 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -22,14 +22,15 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
   unsigned VarArgsFirstFPR;
   unsigned VarArgsFrameIndex;
   unsigned RegSaveFrameIndex;
+  int FramePointerSaveIndex;
   bool ManipulatesSP;
   unsigned NumLocalDynamics;
 
 public:
   explicit SystemZMachineFunctionInfo(MachineFunction &MF)
     : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
-      VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false),
-      NumLocalDynamics(0) {}
+      VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0),
+      ManipulatesSP(false), NumLocalDynamics(0) {}
 
   // Get and set the first call-saved GPR that should be saved and restored
   // by this function.  This is 0 if no GPRs need to be saved or restored.
@@ -59,6 +60,10 @@ public:
   unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
   void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; }
 
+  // Get and set the frame index of where the old frame pointer is stored.
+  int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+  void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
   // Get and set whether the function directly manipulates the stack pointer,
   // e.g. through STACKSAVE or STACKRESTORE.
   bool getManipulatesSP() const { return ManipulatesSP; }
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 9af90d492cf8..17b076d88a34 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -451,11 +451,11 @@ def PCRelTLS32 : PCRelTLSAsmOperand<"32">;
 // and multiplied by 2.
 def brtarget16 : PCRelOperand<OtherVT, PCRel16> {
   let EncoderMethod = "getPC16DBLEncoding";
-  let DecoderMethod = "decodePC16DBLOperand";
+  let DecoderMethod = "decodePC16DBLBranchOperand";
 }
 def brtarget32 : PCRelOperand<OtherVT, PCRel32> {
   let EncoderMethod = "getPC32DBLEncoding";
-  let DecoderMethod = "decodePC32DBLOperand";
+  let DecoderMethod = "decodePC32DBLBranchOperand";
 }
 
 // Variants of brtarget16/32 with an optional additional TLS symbol.
@@ -464,12 +464,12 @@ def tlssym : Operand<i64> { }
 def brtarget16tls : PCRelTLSOperand<OtherVT, PCRelTLS16> {
   let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym);
   let EncoderMethod = "getPC16DBLTLSEncoding";
-  let DecoderMethod = "decodePC16DBLOperand";
+  let DecoderMethod = "decodePC16DBLBranchOperand";
 }
 def brtarget32tls : PCRelTLSOperand<OtherVT, PCRelTLS32> {
   let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym);
   let EncoderMethod = "getPC32DBLTLSEncoding";
-  let DecoderMethod = "decodePC32DBLOperand";
+  let DecoderMethod = "decodePC32DBLBranchOperand";
 }
 
 // A PC-relative offset of a global value.  The offset is sign-extended
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 3c95a1e11b45..8d031f1ea05d 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -79,6 +79,14 @@ def SDT_ZI32Intrinsic       : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
 def SDT_ZPrefetch           : SDTypeProfile<0, 2,
                                             [SDTCisVT<0, i32>,
                                              SDTCisPtrTy<1>]>;
+def SDT_ZLoadBSwap          : SDTypeProfile<1, 2,
+                                            [SDTCisInt<0>,
+                                             SDTCisPtrTy<1>,
+                                             SDTCisVT<2, OtherVT>]>;
+def SDT_ZStoreBSwap         : SDTypeProfile<0, 3,
+                                            [SDTCisInt<0>,
+                                             SDTCisPtrTy<1>,
+                                             SDTCisVT<2, OtherVT>]>;
 def SDT_ZTBegin             : SDTypeProfile<0, 2,
                                             [SDTCisPtrTy<0>,
                                              SDTCisVT<1, i32>]>;
@@ -137,6 +145,7 @@ def SDT_ZVecQuaternaryInt   : SDTypeProfile<1, 4,
                                              SDTCisSameAs<0, 2>,
                                              SDTCisSameAs<0, 3>,
                                              SDTCisVT<4, i32>]>;
+def SDT_ZTest               : SDTypeProfile<0, 2, [SDTCisVT<1, i64>]>;
 
 //===----------------------------------------------------------------------===//
 // Node definitions
@@ -188,6 +197,15 @@ def z_udivrem64         : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
 
 def z_serialize         : SDNode<"SystemZISD::SERIALIZE", SDTNone,
                                  [SDNPHasChain, SDNPMayStore]>;
+def z_membarrier        : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
+                                 [SDNPHasChain, SDNPSideEffect]>;
+
+def z_loadbswap        : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap,
+                                 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def z_storebswap       : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap,
+                                 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def z_tdc               : SDNode<"SystemZISD::TDC", SDT_ZTest, [SDNPOutGlue]>;
 
 // Defined because the index is an i32 rather than a pointer.
 def z_vector_insert     : SDNode<"ISD::INSERT_VECTOR_ELT",
@@ -329,6 +347,17 @@ def z_vsrl              : SDNode<"ISD::SRL", SDT_ZVecBinary>;
 // Pattern fragments
 //===----------------------------------------------------------------------===//
 
+def z_lrvh  : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>;
+def z_lrv   : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>;
+def z_lrvg  : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>;
+
+def z_strvh : PatFrag<(ops node:$src, node:$addr),
+                      (z_storebswap node:$src, node:$addr, i16)>;
+def z_strv  : PatFrag<(ops node:$src, node:$addr),
+                      (z_storebswap node:$src, node:$addr, i32)>;
+def z_strvg : PatFrag<(ops node:$src, node:$addr),
+                      (z_storebswap node:$src, node:$addr, i64)>;
+
 // Signed and unsigned comparisons.
 def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
   unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index 32fbe5ae9ef9..9adc0189e650 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -29,6 +29,11 @@ def FeatureLoadStoreOnCond : SystemZFeature<
   "Assume that the load/store-on-condition facility is installed"
 >;
 
+def FeatureLoadStoreOnCond2 : SystemZFeature<
+  "load-store-on-cond-2", "LoadStoreOnCond2",
+  "Assume that the load/store-on-condition facility 2 is installed"
+>;
+
 def FeatureHighWord : SystemZFeature<
   "high-word", "HighWord",
   "Assume that the high-word facility is installed"
@@ -92,5 +97,6 @@ def : Processor<"z13", NoItineraries,
                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
                  FeatureFPExtension, FeaturePopulationCount,
                  FeatureFastSerialization, FeatureInterlockedAccess1,
+                 FeatureMiscellaneousExtensions,
                  FeatureTransactionalExecution, FeatureProcessorAssist,
-                 FeatureVector]>;
+                 FeatureVector, FeatureLoadStoreOnCond2]>;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 6fd24e3df625..b5e5fd4bfc4f 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -24,12 +24,20 @@ SystemZRegisterInfo::SystemZRegisterInfo()
 
 const MCPhysReg *
 SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  if (MF->getSubtarget().getTargetLowering()->supportSwiftError() &&
+      MF->getFunction()->getAttributes().hasAttrSomewhere(
+          Attribute::SwiftError))
+    return CSR_SystemZ_SwiftError_SaveList;
   return CSR_SystemZ_SaveList;
 }
 
 const uint32_t *
 SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                           CallingConv::ID CC) const {
+  if (MF.getSubtarget().getTargetLowering()->supportSwiftError() &&
+      MF.getFunction()->getAttributes().hasAttrSomewhere(
+          Attribute::SwiftError))
+    return CSR_SystemZ_SwiftError_RegMask;
   return CSR_SystemZ_RegMask;
 }
 
@@ -84,8 +92,14 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   // accepts the offset exists.
   unsigned Opcode = MI->getOpcode();
   unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset);
-  if (OpcodeForOffset)
+  if (OpcodeForOffset) {
+    if (OpcodeForOffset == SystemZ::LE &&
+        MF.getSubtarget<SystemZSubtarget>().hasVector()) {
+      // If LE is ok for offset, use LDE instead on z13.
+      OpcodeForOffset = SystemZ::LDE32;
+    }
     MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+  }
   else {
     // Create an anchor point that is in range.  Start at 0xffff so that
     // can use LLILH to load the immediate.
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index a0db5a9c188f..e41c06c98af2 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -33,6 +33,15 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
 public:
   SystemZRegisterInfo();
 
+  /// getPointerRegClass - Return the register class to use to hold pointers.
+  /// This is currently only used by LOAD_STACK_GUARD, which requires a non-%r0
+  /// register, hence ADDR64.
+  const TargetRegisterClass *
+  getPointerRegClass(const MachineFunction &MF,
+                     unsigned Kind=0) const override {
+    return &SystemZ::ADDR64BitRegClass;
+  }
+
   // Override TargetRegisterInfo.h.
   bool requiresRegisterScavenging(const MachineFunction &MF) const override {
     return true;
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 178aa3817311..657482504045 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
 // address Dest.  Sequence is the opcode to use for straight-line code
 // (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
 // Return the chain for the completed operation.
-static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
+static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,
                           unsigned Loop, SDValue Chain, SDValue Dst,
                           SDValue Src, uint64_t Size) {
   EVT PtrVT = Src.getValueType();
@@ -46,12 +46,10 @@ static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
                      DAG.getConstant(Size, DL, PtrVT));
 }
 
-SDValue SystemZSelectionDAGInfo::
-EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
-                        SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
-                        bool IsVolatile, bool AlwaysInline,
-                        MachinePointerInfo DstPtrInfo,
-                        MachinePointerInfo SrcPtrInfo) const {
+SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
   if (IsVolatile)
     return SDValue();
 
@@ -64,24 +62,21 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
 // Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
 // Chain, Dst, ByteVal and Size.  These cases are expected to use
 // MVI, MVHHI, MVHI and MVGHI respectively.
-static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
                            SDValue Dst, uint64_t ByteVal, uint64_t Size,
-                           unsigned Align,
-                           MachinePointerInfo DstPtrInfo) {
+                           unsigned Align, MachinePointerInfo DstPtrInfo) {
   uint64_t StoreVal = ByteVal;
   for (unsigned I = 1; I < Size; ++I)
     StoreVal |= ByteVal << (I * 8);
-  return DAG.getStore(Chain, DL,
-                      DAG.getConstant(StoreVal, DL,
-                                      MVT::getIntegerVT(Size * 8)),
-                      Dst, DstPtrInfo, false, false, Align);
+  return DAG.getStore(
+      Chain, DL, DAG.getConstant(StoreVal, DL, MVT::getIntegerVT(Size * 8)),
+      Dst, DstPtrInfo, Align);
 }
 
-SDValue SystemZSelectionDAGInfo::
-EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
-                        SDValue Dst, SDValue Byte, SDValue Size,
-                        unsigned Align, bool IsVolatile,
-                        MachinePointerInfo DstPtrInfo) const {
+SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst,
+    SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile,
+    MachinePointerInfo DstPtrInfo) const {
   EVT PtrVT = Dst.getValueType();
 
   if (IsVolatile)
@@ -116,15 +111,14 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
     } else {
       // Handle one and two bytes using STC.
       if (Bytes <= 2) {
-        SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
-                                      false, false, Align);
+        SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align);
         if (Bytes == 1)
           return Chain1;
         SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
                                    DAG.getConstant(1, DL, PtrVT));
-        SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2,
-                                      DstPtrInfo.getWithOffset(1),
-                                      false, false, 1);
+        SDValue Chain2 =
+            DAG.getStore(Chain, DL, Byte, Dst2, DstPtrInfo.getWithOffset(1),
+                         /* Alignment = */ 1);
         return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
       }
     }
@@ -138,8 +132,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
 
     // Copy the byte to the first location and then use MVC to copy
     // it to the rest.
-    Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
-                         false, false, Align);
+    Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align);
     SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
                                    DAG.getConstant(1, DL, PtrVT));
     return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
@@ -150,7 +143,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
 
 // Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
 // deciding whether to use a loop or straight-line code.
-static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
                        SDValue Src1, SDValue Src2, uint64_t Size) {
   SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
   EVT PtrVT = Src1.getValueType();
@@ -174,7 +167,8 @@ static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
 // less than zero if CC == 1 and greater than zero if CC >= 2.
 // The sequence starts with IPM, which puts CC into bits 29 and 28
 // of an integer and clears bits 30 and 31.
-static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
+static SDValue addIPMSequence(const SDLoc &DL, SDValue Glue,
+                              SelectionDAG &DAG) {
   SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
   SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
                             DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
@@ -183,11 +177,10 @@ static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
   return ROTL;
 }
 
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
-                        SDValue Src1, SDValue Src2, SDValue Size,
-                        MachinePointerInfo Op1PtrInfo,
-                        MachinePointerInfo Op2PtrInfo) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
+    SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo,
+    MachinePointerInfo Op2PtrInfo) const {
   if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
     uint64_t Bytes = CSize->getZExtValue();
     assert(Bytes > 0 && "Caller should have handled 0-size case");
@@ -198,10 +191,9 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
   return std::make_pair(SDValue(), SDValue());
 }
 
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
-                        SDValue Src, SDValue Char, SDValue Length,
-                        MachinePointerInfo SrcPtrInfo) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src,
+    SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const {
   // Use SRST to find the character.  End is its address on success.
   EVT PtrVT = Src.getValueType();
   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
@@ -226,22 +218,20 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
   return std::make_pair(End, Chain);
 }
 
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
-                        SDValue Dest, SDValue Src,
-                        MachinePointerInfo DestPtrInfo,
-                        MachinePointerInfo SrcPtrInfo, bool isStpcpy) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcpy(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest,
+    SDValue Src, MachinePointerInfo DestPtrInfo, MachinePointerInfo SrcPtrInfo,
+    bool isStpcpy) const {
   SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other);
   SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src,
                                 DAG.getConstant(0, DL, MVT::i32));
   return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1));
 }
 
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
-                        SDValue Src1, SDValue Src2,
-                        MachinePointerInfo Op1PtrInfo,
-                        MachinePointerInfo Op2PtrInfo) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
+    SDValue Src2, MachinePointerInfo Op1PtrInfo,
+    MachinePointerInfo Op2PtrInfo) const {
   SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue);
   SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
                                DAG.getConstant(0, DL, MVT::i32));
@@ -255,7 +245,8 @@ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
 // and the second being the out chain.
 //
 // This can be used for strlen by setting Limit to 0.
-static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
+static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG,
+                                                    const SDLoc &DL,
                                                     SDValue Chain, SDValue Src,
                                                     SDValue Limit) {
   EVT PtrVT = Src.getValueType();
@@ -265,19 +256,18 @@ static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
   Chain = End.getValue(1);
   SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src);
   return std::make_pair(Len, Chain);
-}    
+}
 
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
-                        SDValue Src, MachinePointerInfo SrcPtrInfo) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrlen(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src,
+    MachinePointerInfo SrcPtrInfo) const {
   EVT PtrVT = Src.getValueType();
   return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT));
 }
 
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
-                         SDValue Src, SDValue MaxLength,
-                         MachinePointerInfo SrcPtrInfo) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrnlen(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src,
+    SDValue MaxLength, MachinePointerInfo SrcPtrInfo) const {
   EVT PtrVT = Src.getValueType();
   MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT);
   SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength);
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 246fa3e5e656..93cd970c30c6 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -7,66 +7,64 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
+// This file defines the SystemZ subclass for SelectionDAGTargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H
 #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H
 
-#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 
 namespace llvm {
 
 class SystemZTargetMachine;
 
-class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
+class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo {
 public:
   explicit SystemZSelectionDAGInfo() = default;
 
-  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
-                                  SDValue Dst, SDValue Src,
-                                  SDValue Size, unsigned Align,
-                                  bool IsVolatile, bool AlwaysInline,
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL,
+                                  SDValue Chain, SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align, bool IsVolatile,
+                                  bool AlwaysInline,
                                   MachinePointerInfo DstPtrInfo,
                                   MachinePointerInfo SrcPtrInfo) const override;
 
-  SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL,
+  SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
                                   SDValue Chain, SDValue Dst, SDValue Byte,
                                   SDValue Size, unsigned Align, bool IsVolatile,
                                   MachinePointerInfo DstPtrInfo) const override;
 
   std::pair<SDValue, SDValue>
-  EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+  EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
                           SDValue Src1, SDValue Src2, SDValue Size,
                           MachinePointerInfo Op1PtrInfo,
                           MachinePointerInfo Op2PtrInfo) const override;
 
   std::pair<SDValue, SDValue>
-  EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+  EmitTargetCodeForMemchr(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
                           SDValue Src, SDValue Char, SDValue Length,
                           MachinePointerInfo SrcPtrInfo) const override;
 
-  std::pair<SDValue, SDValue>
-  EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
-                          SDValue Dest, SDValue Src,
-                          MachinePointerInfo DestPtrInfo,
-                          MachinePointerInfo SrcPtrInfo,
-                          bool isStpcpy) const override;
+  std::pair<SDValue, SDValue> EmitTargetCodeForStrcpy(
+      SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest,
+      SDValue Src, MachinePointerInfo DestPtrInfo,
+      MachinePointerInfo SrcPtrInfo, bool isStpcpy) const override;
 
   std::pair<SDValue, SDValue>
-  EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+  EmitTargetCodeForStrcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
                           SDValue Src1, SDValue Src2,
                           MachinePointerInfo Op1PtrInfo,
                           MachinePointerInfo Op2PtrInfo) const override;
 
   std::pair<SDValue, SDValue>
-  EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+  EmitTargetCodeForStrlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
                           SDValue Src,
                           MachinePointerInfo SrcPtrInfo) const override;
 
   std::pair<SDValue, SDValue>
-  EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+  EmitTargetCodeForStrnlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
                            SDValue Src, SDValue MaxLength,
                            MachinePointerInfo SrcPtrInfo) const override;
 };
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 846edd51341a..7f26a3519e50 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -35,6 +35,10 @@ public:
 
   bool processBlock(MachineBasicBlock &MBB);
   bool runOnMachineFunction(MachineFunction &F) override;
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::AllVRegsAllocated);
+  }
 
 private:
   bool shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH);
@@ -68,18 +72,20 @@ static void tieOpsIfNeeded(MachineInstr &MI) {
 
 // MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH
 // are the halfword immediate loads for the same word.  Try to use one of them
-// instead of IIxF. 
-bool SystemZShortenInst::shortenIIF(MachineInstr &MI,
-                                    unsigned LLIxL, unsigned LLIxH) {
+// instead of IIxF.
+bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL,
+                                    unsigned LLIxH) {
   unsigned Reg = MI.getOperand(0).getReg();
   // The new opcode will clear the other half of the GR64 reg, so
   // cancel if that is live.
-  unsigned thisSubRegIdx = (SystemZ::GRH32BitRegClass.contains(Reg) ?
-			    SystemZ::subreg_h32 : SystemZ::subreg_l32);
-  unsigned otherSubRegIdx = (thisSubRegIdx == SystemZ::subreg_l32 ?
-			     SystemZ::subreg_h32 : SystemZ::subreg_l32);
-  unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx,
-						 &SystemZ::GR64BitRegClass);
+  unsigned thisSubRegIdx =
+      (SystemZ::GRH32BitRegClass.contains(Reg) ? SystemZ::subreg_h32
+                                               : SystemZ::subreg_l32);
+  unsigned otherSubRegIdx =
+      (thisSubRegIdx == SystemZ::subreg_l32 ? SystemZ::subreg_h32
+                                            : SystemZ::subreg_l32);
+  unsigned GR64BitReg =
+      TRI->getMatchingSuperReg(Reg, thisSubRegIdx, &SystemZ::GR64BitRegClass);
   unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx);
   if (LiveRegs.contains(OtherReg))
     return false;
@@ -135,11 +141,10 @@ bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
 
 // Calls shortenOn001 if CCLive is false. CC def operand is added in
 // case of success.
-bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI,
-					   unsigned Opcode) {
+bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI, unsigned Opcode) {
   if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) {
     MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
-      .addReg(SystemZ::CC, RegState::ImplicitDefine);
+      .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
     return true;
   }
   return false;
@@ -177,7 +182,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
 
   // Set up the set of live registers at the end of MBB (live out)
   LiveRegs.clear();
-  LiveRegs.addLiveOuts(&MBB);
+  LiveRegs.addLiveOuts(MBB);
 
   // Iterate backwards through the block looking for instructions to change.
   for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
@@ -264,6 +269,9 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
 }
 
 bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
+  if (skipFunction(*F.getFunction()))
+    return false;
+
   const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>();
   TII = ST.getInstrInfo();
   TRI = ST.getRegisterInfo();
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 0b49fcdd8f78..67d5e0179fe2 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -40,21 +40,11 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
       HasPopulationCount(false), HasFastSerialization(false),
       HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
       HasTransactionalExecution(false), HasProcessorAssist(false),
-      HasVector(false), TargetTriple(TT),
+      HasVector(false), HasLoadStoreOnCond2(false), TargetTriple(TT),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
       TSInfo(), FrameLowering() {}
 
-// Return true if GV binds locally under reloc model RM.
-static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
-  // For non-PIC, all symbols bind locally.
-  if (RM == Reloc::Static)
-    return true;
-
-  return GV->hasLocalLinkage() || !GV->hasDefaultVisibility();
-}
-
 bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
-                                       Reloc::Model RM,
                                        CodeModel::Model CM) const {
   // PC32DBL accesses require the low bit to be clear.  Note that a zero
   // value selects the default alignment and is therefore OK.
@@ -63,7 +53,7 @@ bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
 
   // For the small model, all locally-binding symbols are in range.
   if (CM == CodeModel::Small)
-    return bindsLocally(GV, RM);
+    return TLInfo.getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
 
   // For Medium and above, assume that the symbol is not within the 4GB range.
   // Taking the address of locally-defined text would be OK, but that
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index f7eaf01cb77e..6007f6fc9c4c 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -45,6 +45,7 @@ protected:
   bool HasTransactionalExecution;
   bool HasProcessorAssist;
   bool HasVector;
+  bool HasLoadStoreOnCond2;
 
 private:
   Triple TargetTriple;
@@ -69,7 +70,7 @@ public:
   const SystemZTargetLowering *getTargetLowering() const override {
     return &TLInfo;
   }
-  const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
+  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
 
@@ -85,6 +86,9 @@ public:
   // Return true if the target has the load/store-on-condition facility.
   bool hasLoadStoreOnCond() const { return HasLoadStoreOnCond; }
 
+  // Return true if the target has the load/store-on-condition facility 2.
+  bool hasLoadStoreOnCond2() const { return HasLoadStoreOnCond2; }
+
   // Return true if the target has the high-word facility.
   bool hasHighWord() const { return HasHighWord; }
 
@@ -116,8 +120,7 @@ public:
 
   // Return true if GV can be accessed using LARL for reloc model RM
   // and code model CM.
-  bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
-                       CodeModel::Model CM) const;
+  bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
 
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
 };
diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp
new file mode 100644
index 000000000000..96a9ef82c125
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTDC.cpp
@@ -0,0 +1,382 @@
+//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for instructions that can be replaced by a Test Data Class
+// instruction, and replaces them when profitable.
+//
+// Roughly, the following rules are recognized:
+//
+// 1: fcmp pred X, 0 -> tdc X, mask
+// 2: fcmp pred X, +-inf -> tdc X, mask
+// 3: fcmp pred X, +-minnorm -> tdc X, mask
+// 4: tdc (fabs X), mask -> tdc X, newmask
+// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
+// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
+// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
+// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
+// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
+// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
+//
+// The pass works in 4 steps:
+//
+// 1. All fcmp and icmp instructions in a function are checked for a match
+//    with rules 1-3 and 5-7.  Their TDC equivalents are stored in
+//    the ConvertedInsts mapping.  If the operand of a fcmp instruction is
+//    a fabs, it's also folded according to rule 4.
+// 2. All and/or/xor i1 instructions whose both operands have been already
+//    mapped are mapped according to rules 8-10.  LogicOpsWorklist is used
+//    as a queue of instructions to check.
+// 3. All mapped instructions that are considered worthy of conversion (ie.
+//    replacing them will actually simplify the final code) are replaced
+//    with a call to the s390.tdc intrinsic.
+// 4. All intermediate results of replaced instructions are removed if unused.
+//
+// Instructions that match rules 1-3 are considered unworthy of conversion
+// on their own (since a comparison instruction is superior), but are mapped
+// in the hopes of folding the result using rules 4 and 8-10 (likely removing
+// the original comparison in the process).
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include <deque>
+#include <set>
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeSystemZTDCPassPass(PassRegistry&);
+}
+
+namespace {
+
+class SystemZTDCPass : public FunctionPass {
+public:
+  static char ID;
+  SystemZTDCPass() : FunctionPass(ID) {
+    initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+private:
+  // Maps seen instructions that can be mapped to a TDC, values are
+  // (TDC operand, TDC mask, worthy flag) triples.
+  MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
+  // The queue of and/or/xor i1 instructions to be potentially folded.
+  std::vector<BinaryOperator *> LogicOpsWorklist;
+  // Instructions matched while folding, to be removed at the end if unused.
+  std::set<Instruction *> PossibleJunk;
+
+  // Tries to convert a fcmp instruction.
+  void convertFCmp(CmpInst &I);
+
+  // Tries to convert an icmp instruction.
+  void convertICmp(CmpInst &I);
+
+  // Tries to convert an i1 and/or/xor instruction, whose both operands
+  // have been already converted.
+  void convertLogicOp(BinaryOperator &I);
+
+  // Marks an instruction as converted - adds it to ConvertedInsts and adds
+  // any and/or/xor i1 users to the queue.
+  void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
+    ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
+    auto &M = *I->getFunction()->getParent();
+    auto &Ctx = M.getContext();
+    for (auto *U : I->users()) {
+      auto *LI = dyn_cast<BinaryOperator>(U);
+      if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
+          (LI->getOpcode() == Instruction::And ||
+           LI->getOpcode() == Instruction::Or ||
+           LI->getOpcode() == Instruction::Xor)) {
+        LogicOpsWorklist.push_back(LI);
+      }
+    }
+  }
+};
+
+} // end anonymous namespace
+
+char SystemZTDCPass::ID = 0;
+INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
+                "SystemZ Test Data Class optimization", false, false)
+
+FunctionPass *llvm::createSystemZTDCPass() {
+  return new SystemZTDCPass();
+}
+
+void SystemZTDCPass::convertFCmp(CmpInst &I) {
+  Value *Op0 = I.getOperand(0);
+  auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
+  auto Pred = I.getPredicate();
+  // Only comparisons with consts are interesting.
+  if (!Const)
+    return;
+  // Compute the smallest normal number (and its negation).
+  auto &Sem = Op0->getType()->getFltSemantics();
+  APFloat Smallest = APFloat::getSmallestNormalized(Sem);
+  APFloat NegSmallest = Smallest;
+  NegSmallest.changeSign();
+  // Check if Const is one of our recognized consts.
+  int WhichConst;
+  if (Const->isZero()) {
+    // All comparisons with 0 can be converted.
+    WhichConst = 0;
+  } else if (Const->isInfinity()) {
+    // Likewise for infinities.
+    WhichConst = Const->isNegative() ? 2 : 1;
+  } else if (Const->isExactlyValue(Smallest)) {
+    // For Smallest, we cannot do EQ separately from GT.
+    if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
+        (Pred & CmpInst::FCMP_OGE) != 0)
+      return;
+    WhichConst = 3;
+  } else if (Const->isExactlyValue(NegSmallest)) {
+    // Likewise for NegSmallest, we cannot do EQ separately from LT.
+    if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
+        (Pred & CmpInst::FCMP_OLE) != 0)
+      return;
+    WhichConst = 4;
+  } else {
+    // Not one of our special constants.
+    return;
+  }
+  // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
+  static const int Masks[][4] = {
+    { // 0
+      SystemZ::TDCMASK_ZERO,              // eq
+      SystemZ::TDCMASK_POSITIVE,          // gt
+      SystemZ::TDCMASK_NEGATIVE,          // lt
+      SystemZ::TDCMASK_NAN,               // un
+    },
+    { // inf
+      SystemZ::TDCMASK_INFINITY_PLUS,     // eq
+      0,                                  // gt
+      (SystemZ::TDCMASK_ZERO |
+       SystemZ::TDCMASK_NEGATIVE |
+       SystemZ::TDCMASK_NORMAL_PLUS |
+       SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
+      SystemZ::TDCMASK_NAN,               // un
+    },
+    { // -inf
+      SystemZ::TDCMASK_INFINITY_MINUS,    // eq
+      (SystemZ::TDCMASK_ZERO |
+       SystemZ::TDCMASK_POSITIVE |
+       SystemZ::TDCMASK_NORMAL_MINUS |
+       SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
+      0,                                  // lt
+      SystemZ::TDCMASK_NAN,               // un
+    },
+    { // minnorm
+      0,                                  // eq (unsupported)
+      (SystemZ::TDCMASK_NORMAL_PLUS |
+       SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge)
+      (SystemZ::TDCMASK_ZERO |
+       SystemZ::TDCMASK_NEGATIVE |
+       SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
+      SystemZ::TDCMASK_NAN,               // un
+    },
+    { // -minnorm
+      0,                                  // eq (unsupported)
+      (SystemZ::TDCMASK_ZERO |
+       SystemZ::TDCMASK_POSITIVE |
+       SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
+      (SystemZ::TDCMASK_NORMAL_MINUS |
+       SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le)
+      SystemZ::TDCMASK_NAN,               // un
+    }
+  };
+  // Construct the mask as a combination of the partial masks.
+  int Mask = 0;
+  if (Pred & CmpInst::FCMP_OEQ)
+    Mask |= Masks[WhichConst][0];
+  if (Pred & CmpInst::FCMP_OGT)
+    Mask |= Masks[WhichConst][1];
+  if (Pred & CmpInst::FCMP_OLT)
+    Mask |= Masks[WhichConst][2];
+  if (Pred & CmpInst::FCMP_UNO)
+    Mask |= Masks[WhichConst][3];
+  // A lone fcmp is unworthy of tdc conversion on its own, but may become
+  // worthy if combined with fabs.
+  bool Worthy = false;
+  if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
+    Function *F = CI->getCalledFunction();
+    if (F && F->getIntrinsicID() == Intrinsic::fabs) {
+      // Fold with fabs - adjust the mask appropriately.
+      Mask &= SystemZ::TDCMASK_PLUS;
+      Mask |= Mask >> 1;
+      Op0 = CI->getArgOperand(0);
+      // A combination of fcmp with fabs is a win, unless the constant
+      // involved is 0 (which is handled by later passes).
+      Worthy = WhichConst != 0;
+      PossibleJunk.insert(CI);
+    }
+  }
+  converted(&I, Op0, Mask, Worthy);
+}
+
+void SystemZTDCPass::convertICmp(CmpInst &I) {
+  Value *Op0 = I.getOperand(0);
+  auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
+  auto Pred = I.getPredicate();
+  // All our icmp rules involve comparisons with consts.
+  if (!Const)
+    return;
+  if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
+    // Check for icmp+bitcast used for signbit.
+    if (!Cast->getSrcTy()->isFloatTy() &&
+        !Cast->getSrcTy()->isDoubleTy() &&
+        !Cast->getSrcTy()->isFP128Ty())
+      return;
+    Value *V = Cast->getOperand(0);
+    int Mask;
+    if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
+      // icmp slt (bitcast X), 0 - set if sign bit true
+      Mask = SystemZ::TDCMASK_MINUS;
+    } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
+      // icmp sgt (bitcast X), -1 - set if sign bit false
+      Mask = SystemZ::TDCMASK_PLUS;
+    } else {
+      // Not a sign bit check.
+      return;
+    }
+    PossibleJunk.insert(Cast);
+    converted(&I, V, Mask, true);
+  } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
+    // Check if this is a pre-existing call of our tdc intrinsic.
+    Function *F = CI->getCalledFunction();
+    if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
+      return;
+    if (!Const->isZero())
+      return;
+    Value *V = CI->getArgOperand(0);
+    auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    // Bail if the mask is not a constant.
+    if (!MaskC)
+      return;
+    int Mask = MaskC->getZExtValue();
+    Mask &= SystemZ::TDCMASK_ALL;
+    if (Pred == CmpInst::ICMP_NE) {
+      // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
+    } else if (Pred == CmpInst::ICMP_EQ) {
+      // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
+      Mask ^= SystemZ::TDCMASK_ALL;
+    } else {
+      // An unknown comparison - ignore.
+      return;
+    }
+    PossibleJunk.insert(CI);
+    converted(&I, V, Mask, false);
+  }
+}
+
+void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
+  Value *Op0, *Op1;
+  int Mask0, Mask1;
+  bool Worthy0, Worthy1;
+  std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
+  std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
+  if (Op0 != Op1)
+    return;
+  int Mask;
+  switch (I.getOpcode()) {
+    case Instruction::And:
+      Mask = Mask0 & Mask1;
+      break;
+    case Instruction::Or:
+      Mask = Mask0 | Mask1;
+      break;
+    case Instruction::Xor:
+      Mask = Mask0 ^ Mask1;
+      break;
+    default:
+      llvm_unreachable("Unknown op in convertLogicOp");
+  }
+  converted(&I, Op0, Mask, true);
+}
+
+bool SystemZTDCPass::runOnFunction(Function &F) {
+  ConvertedInsts.clear();
+  LogicOpsWorklist.clear();
+  PossibleJunk.clear();
+
+  // Look for icmp+fcmp instructions.
+  for (auto &I : instructions(F)) {
+    if (I.getOpcode() == Instruction::FCmp)
+      convertFCmp(cast<CmpInst>(I));
+    else if (I.getOpcode() == Instruction::ICmp)
+      convertICmp(cast<CmpInst>(I));
+  }
+
+  // If none found, bail already.
+  if (ConvertedInsts.empty())
+    return false;
+
+  // Process the queue of logic instructions.
+  while (!LogicOpsWorklist.empty()) {
+    BinaryOperator *Op = LogicOpsWorklist.back();
+    LogicOpsWorklist.pop_back();
+    // If both operands mapped, and the instruction itself not yet mapped,
+    // convert it.
+    if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
+        ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
+        !ConvertedInsts.count(Op))
+      convertLogicOp(*Op);
+  }
+
+  // Time to actually replace the instructions.  Do it in the reverse order
+  // of finding them, since there's a good chance the earlier ones will be
+  // unused (due to being folded into later ones).
+  Module &M = *F.getParent();
+  auto &Ctx = M.getContext();
+  Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+  bool MadeChange = false;
+  for (auto &It : reverse(ConvertedInsts)) {
+    Instruction *I = It.first;
+    Value *V;
+    int Mask;
+    bool Worthy;
+    std::tie(V, Mask, Worthy) = It.second;
+    if (!I->user_empty()) {
+      // If used and unworthy of conversion, skip it.
+      if (!Worthy)
+        continue;
+      // Call the intrinsic, compare result with 0.
+      Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc,
+                                                 V->getType());
+      IRBuilder<> IRB(I);
+      Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
+      Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
+      Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
+      I->replaceAllUsesWith(ICmp);
+    }
+    // If unused, or used and converted, remove it.
+    I->eraseFromParent();
+    MadeChange = true;
+  }
+
+  if (!MadeChange)
+    return false;
+
+  // We've actually done something - now clear misc accumulated junk (fabs,
+  // bitcast).
+  for (auto *I : PossibleJunk)
+    if (I->user_empty())
+      I->eraseFromParent();
+
+  return true;
+}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index f305e85f6cfe..85a3f6f4a8be 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -10,6 +10,7 @@
 #include "SystemZTargetMachine.h"
 #include "SystemZTargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -79,13 +80,22 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
   return Ret;
 }
 
+static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
+  // Static code is suitable for use in a dynamic executable; there is no
+  // separate DynamicNoPIC model.
+  if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
+    return Reloc::Static;
+  return *RM;
+}
+
 SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
-                                           Reloc::Model RM, CodeModel::Model CM,
+                                           Optional<Reloc::Model> RM,
+                                           CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
     : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
-                        RM, CM, OL),
+                        getEffectiveRelocModel(RM), CM, OL),
       TLOF(make_unique<TargetLoweringObjectFileELF>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
@@ -112,6 +122,9 @@ public:
 } // end anonymous namespace
 
 void SystemZPassConfig::addIRPasses() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createSystemZTDCPass());
+
   TargetPassConfig::addIRPasses();
 }
 
@@ -125,8 +138,7 @@ bool SystemZPassConfig::addInstSelector() {
 }
 
 void SystemZPassConfig::addPreSched2() {
-  if (getOptLevel() != CodeGenOpt::None &&
-      getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond())
+  if (getOptLevel() != CodeGenOpt::None)
     addPass(&IfConverterID);
 }
 
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 1a8f1f7f3aaa..69cf9bc6e525 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -29,7 +29,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
 public:
   SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                        StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
+                       Optional<Reloc::Model> RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
   ~SystemZTargetMachine() override;
 
diff --git a/lib/Target/SystemZ/TargetInfo/Makefile b/lib/Target/SystemZ/TargetInfo/Makefile
deleted file mode 100644
index 0be80eb4e6ad..000000000000
--- a/lib/Target/SystemZ/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/SystemZ/TargetInfo/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common