33 files changed, 1093 insertions, 416 deletions
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 93c4ce4b5ccc..607266d552a6 100644
--- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -1304,14 +1304,23 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
   if (getParser().parseExpression(Expr))
     return MatchOperand_NoMatch;
 
+  auto isOutOfRangeConstant = [&](const MCExpr *E) -> bool {
+    if (auto *CE = dyn_cast<MCConstantExpr>(E)) {
+      int64_t Value = CE->getValue();
+      if ((Value & 1) || Value < MinVal || Value > MaxVal)
+        return true;
+    }
+    return false;
+  };
+
   // For consistency with the GNU assembler, treat immediates as offsets
   // from ".".
   if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
-    int64_t Value = CE->getValue();
-    if ((Value & 1) || Value < MinVal || Value > MaxVal) {
+    if (isOutOfRangeConstant(CE)) {
       Error(StartLoc, "offset out of range");
       return MatchOperand_ParseFail;
     }
+    int64_t Value = CE->getValue();
     MCSymbol *Sym = Ctx.createTempSymbol();
     Out.EmitLabel(Sym);
     const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
@@ -1319,6 +1328,15 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
     Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx);
   }
 
+  // For consistency with the GNU assembler, conservatively assume that a
+  // constant offset must by itself be within the given size range.
+  if (const auto *BE = dyn_cast<MCBinaryExpr>(Expr))
+    if (isOutOfRangeConstant(BE->getLHS()) ||
+        isOutOfRangeConstant(BE->getRHS())) {
+      Error(StartLoc, "offset out of range");
+      return MatchOperand_ParseFail;
+    }
+
   // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol.
   const MCExpr *Sym = nullptr;
   if (AllowTLS && getLexer().is(AsmToken::Colon)) {
@@ -1371,6 +1389,6 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
 }
 
 // Force static initialization.
-extern "C" void LLVMInitializeSystemZAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZAsmParser() {
   RegisterMCAsmParser<SystemZAsmParser> X(getTheSystemZTarget());
 }
diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 70c26db33ced..e42aa14fe589 100644
--- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -34,7 +34,6 @@ public:
 
   DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
-                              raw_ostream &VStream,
                               raw_ostream &CStream) const override;
 };
 
@@ -46,7 +45,7 @@ static MCDisassembler *createSystemZDisassembler(const Target &T,
   return new SystemZDisassembler(STI, Ctx);
 }
 
-extern "C" void LLVMInitializeSystemZDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZDisassembler() {
   // Register the disassembler.
   TargetRegistry::RegisterMCDisassembler(getTheSystemZTarget(),
                                          createSystemZDisassembler);
@@ -449,7 +448,6 @@ static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
 DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                                  ArrayRef<uint8_t> Bytes,
                                                  uint64_t Address,
-                                                 raw_ostream &OS,
                                                  raw_ostream &CS) const {
   // Get the first two bytes of the instruction.
   Size = 0;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
index 91cb35dd72f2..5893b227c08c 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -41,8 +41,12 @@ void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
 
 void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
                                       raw_ostream &O) {
-  if (MO.isReg())
-    O << '%' << getRegisterName(MO.getReg());
+  if (MO.isReg()) {
+    if (!MO.getReg())
+      O << '0';
+    else
+      O << '%' << getRegisterName(MO.getReg());
+  }
   else if (MO.isImm())
     O << MO.getImm();
   else if (MO.isExpr())
@@ -51,10 +55,10 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
     llvm_unreachable("Invalid operand");
 }
 
-void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                   StringRef Annot,
-                                   const MCSubtargetInfo &STI) {
-  printInstruction(MI, O);
+void SystemZInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+                                   StringRef Annot, const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
+  printInstruction(MI, Address, O);
   printAnnotation(O, Annot);
 }
 
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
index 4235d4e21792..5628e9252f03 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -27,7 +27,7 @@ public:
     : MCInstPrinter(MAI, MII, MRI) {}
 
   // Automatically generated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
+  void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
   // Print an address with the given base, displacement and index.
@@ -40,8 +40,8 @@ public:
 
   // Override MCInstPrinter.
   void printRegName(raw_ostream &O, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
+  void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+                 const MCSubtargetInfo &STI, raw_ostream &O) override;
 
 private:
   // Print various types of operand.
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 3c0300cfd8f0..eb2112674a12 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -147,7 +147,8 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) {
 }
 
 static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
-                                         const Triple &TT) {
+                                         const Triple &TT,
+                                         const MCTargetOptions &Options) {
   MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
   MCCFIInstruction Inst =
       MCCFIInstruction::createDefCfa(nullptr,
@@ -182,7 +183,7 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
   return new SystemZInstPrinter(MAI, MII, MRI);
 }
 
-extern "C" void LLVMInitializeSystemZTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetMC() {
   // Register the MCAsmInfo.
   TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(),
                                     createSystemZMCAsmInfo);
diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h
index 88cf589a3f10..0808160f627c 100644
--- a/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/llvm/lib/Target/SystemZ/SystemZ.h
@@ -55,7 +55,7 @@ const unsigned CCMASK_ARITH          = CCMASK_ANY;
 
 // Condition-code mask assignments for logical operations.
 const unsigned CCMASK_LOGICAL_ZERO     = CCMASK_0 | CCMASK_2;
-const unsigned CCMASK_LOGICAL_NONZERO  = CCMASK_1 | CCMASK_2;
+const unsigned CCMASK_LOGICAL_NONZERO  = CCMASK_1 | CCMASK_3;
 const unsigned CCMASK_LOGICAL_CARRY    = CCMASK_2 | CCMASK_3;
 const unsigned CCMASK_LOGICAL_NOCARRY  = CCMASK_0 | CCMASK_1;
 const unsigned CCMASK_LOGICAL_BORROW   = CCMASK_LOGICAL_NOCARRY;
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 10023e9e169c..67c4aa08f90d 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -16,11 +16,13 @@
 #include "SystemZConstantPoolValue.h"
 #include "SystemZMCInstLower.h"
 #include "TargetInfo/SystemZTargetInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -543,9 +545,9 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
   else {
     MCSymbol *DotSym = OutContext.createTempSymbol();
     const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext);
+    OutStreamer.EmitLabel(DotSym);
     OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BRCLAsm)
                                   .addImm(0).addExpr(Dot), STI);
-    OutStreamer.EmitLabel(DotSym);
     return 6;
   }
 }
@@ -553,8 +555,17 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
 void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
                                          SystemZMCInstLower &Lower) {
   MCContext &Ctx = MF->getContext();
-  if (MF->getFunction().getFnAttribute("mnop-mcount")
-                       .getValueAsString() == "true") {
+  if (MF->getFunction().hasFnAttribute("mrecord-mcount")) {
+    MCSymbol *DotSym = OutContext.createTempSymbol();
+    OutStreamer->PushSection();
+    OutStreamer->SwitchSection(
+        Ctx.getELFSection("__mcount_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC));
+    OutStreamer->EmitSymbolValue(DotSym, 8);
+    OutStreamer->PopSection();
+    OutStreamer->EmitLabel(DotSym);
+  }
+
+  if (MF->getFunction().hasFnAttribute("mnop-mcount")) {
     EmitNop(Ctx, *OutStreamer, 6, getSubtargetInfo());
     return;
   }
@@ -572,7 +583,11 @@ void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
 
   unsigned NumNOPBytes = MI.getOperand(1).getImm();
 
-  SM.recordStackMap(MI);
+  auto &Ctx = OutStreamer->getContext();
+  MCSymbol *MILabel = Ctx.createTempSymbol();
+  OutStreamer->EmitLabel(MILabel);
+  
+  SM.recordStackMap(*MILabel, MI);
   assert(NumNOPBytes % 2 == 0 && "Invalid number of NOP bytes requested!");
 
   // Scan ahead to trim the shadow.
@@ -601,7 +616,11 @@ void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
 // [<def>], <id>, <numBytes>, <target>, <numArgs>
 void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
                                         SystemZMCInstLower &Lower) {
-  SM.recordPatchPoint(MI);
+  auto &Ctx = OutStreamer->getContext();
+  MCSymbol *MILabel = Ctx.createTempSymbol();
+  OutStreamer->EmitLabel(MILabel);
+
+  SM.recordPatchPoint(*MILabel, MI);
   PatchPointOpers Opers(&MI);
 
   unsigned EncodedBytes = 0;
@@ -705,6 +724,6 @@ void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
 }
 
 // Force static initialization.
-extern "C" void LLVMInitializeSystemZAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZAsmPrinter() {
   RegisterAsmPrinter<SystemZAsmPrinter> X(getTheSystemZTarget());
 }
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
index 82f29b6361f1..4432adc6a269 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -124,6 +124,13 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
   return true;
 }
 
+inline bool CC_SystemZ_GHC_Error(unsigned &, MVT &, MVT &,
+                                 CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+                                 CCState &) {
+  report_fatal_error("No registers left in GHC calling convention");
+  return false;
+}
+
 } // end namespace llvm
 
 #endif
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index bbd51546ac9f..b1b7ad47671f 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -58,9 +58,34 @@ def RetCC_SystemZ : CallingConv<[
 ]>;
 
 //===----------------------------------------------------------------------===//
+// z/Linux argument calling conventions for GHC
+//===----------------------------------------------------------------------===//
+def CC_SystemZ_GHC : CallingConv<[
+  // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, R8, SpLim
+  CCIfType<[i64], CCAssignToReg<[R7D, R8D, R10D, R11D, R12D, R13D,
+                                 R6D, R2D, R3D, R4D, R5D, R9D]>>,
+
+  // Pass in STG registers: F1, ..., F6
+  CCIfType<[f32], CCAssignToReg<[F8S, F9S, F10S, F11S, F0S, F1S]>>,
+
+  // Pass in STG registers: D1, ..., D6
+  CCIfType<[f64], CCAssignToReg<[F12D, F13D, F14D, F15D, F2D, F3D]>>,
+
+  // Pass in STG registers: XMM1, ..., XMM6
+  CCIfSubtarget<"hasVector()",
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+             CCIfFixed<CCAssignToReg<[V16, V17, V18, V19, V20, V21]>>>>,
+
+  // Fail otherwise
+  CCCustom<"CC_SystemZ_GHC_Error">
+]>;
+
+//===----------------------------------------------------------------------===//
 // z/Linux argument calling conventions
 //===----------------------------------------------------------------------===//
 def CC_SystemZ : CallingConv<[
+  CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_SystemZ_GHC>>,
+
   // Promote i32 to i64 if it has an explicit extension type.
   // The convention is that true integer arguments that are smaller
   // than 64 bits should be marked as extended, but structures that
@@ -128,3 +153,5 @@ def CSR_SystemZ_AllRegs : CalleeSavedRegs<(add (sequence "R%dD", 2, 15),
 def CSR_SystemZ_AllRegs_Vector : CalleeSavedRegs<(add (sequence "R%dD", 2, 15),
                                                       (sequence "V%d", 0, 31))>;
 
+def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>;
+
diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 946eb2ba7c79..2f0cf0317029 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -87,6 +88,8 @@ private:
                             SmallVectorImpl<MachineInstr *> &CCUsers);
   bool convertToLoadAndTest(MachineInstr &MI, MachineInstr &Compare,
                             SmallVectorImpl<MachineInstr *> &CCUsers);
+  bool convertToLogical(MachineInstr &MI, MachineInstr &Compare,
+                        SmallVectorImpl<MachineInstr *> &CCUsers);
   bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare,
                              SmallVectorImpl<MachineInstr *> &CCUsers,
                              unsigned ConvOpc = 0);
@@ -103,14 +106,6 @@ char SystemZElimCompare::ID = 0;
 
 } // end anonymous namespace
 
-// Return true if CC is live out of MBB.
-static bool isCCLiveOut(MachineBasicBlock &MBB) {
-  for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
-    if ((*SI)->isLiveIn(SystemZ::CC))
-      return true;
-  return false;
-}
-
 // Returns true if MI is an instruction whose output equals the value in Reg.
 static bool preservesValueOf(MachineInstr &MI, unsigned Reg) {
   switch (MI.getOpcode()) {
@@ -302,9 +297,60 @@ bool SystemZElimCompare::convertToLoadAndTest(
   MIB.setMemRefs(MI.memoperands());
   MI.eraseFromParent();
 
+  // Mark instruction as not raising an FP exception if applicable.  We already
+  // verified earlier that this move is valid.
+  if (!Compare.mayRaiseFPException())
+    MIB.setMIFlag(MachineInstr::MIFlag::NoFPExcept);
+
+  return true;
+}
+
+// See if MI is an instruction with an equivalent "logical" opcode that can
+// be used and replace MI. This is useful for EQ/NE comparisons where the
+// "nsw" flag is missing since the "logical" opcode always sets CC to reflect
+// the result being zero or non-zero.
+bool SystemZElimCompare::convertToLogical(
+    MachineInstr &MI, MachineInstr &Compare,
+    SmallVectorImpl<MachineInstr *> &CCUsers) {
+
+  unsigned ConvOpc = 0;
+  switch (MI.getOpcode()) {
+  case SystemZ::AR:   ConvOpc = SystemZ::ALR;   break;
+  case SystemZ::ARK:  ConvOpc = SystemZ::ALRK;  break;
+  case SystemZ::AGR:  ConvOpc = SystemZ::ALGR;  break;
+  case SystemZ::AGRK: ConvOpc = SystemZ::ALGRK; break;
+  case SystemZ::A:    ConvOpc = SystemZ::AL;    break;
+  case SystemZ::AY:   ConvOpc = SystemZ::ALY;   break;
+  case SystemZ::AG:   ConvOpc = SystemZ::ALG;   break;
+  default: break;
+  }
+  if (!ConvOpc || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc))
+    return false;
+
+  // Operands should be identical, so just change the opcode and remove the
+  // dead flag on CC.
+  MI.setDesc(TII->get(ConvOpc));
+  MI.clearRegisterDeads(SystemZ::CC);
   return true;
 }
 
+#ifndef NDEBUG
+static bool isAddWithImmediate(unsigned Opcode) {
+  switch(Opcode) {
+  case SystemZ::AHI:
+  case SystemZ::AHIK:
+  case SystemZ::AGHI:
+  case SystemZ::AGHIK:
+  case SystemZ::AFI:
+  case SystemZ::AIH:
+  case SystemZ::AGFI:
+    return true;
+  default: break;
+  }
+  return false;
+}
+#endif
+
 // The CC users in CCUsers are testing the result of a comparison of some
 // value X against zero and we know that any CC value produced by MI would
 // also reflect the value of X.  ConvOpc may be used to pass the transfomed
@@ -315,65 +361,116 @@ bool SystemZElimCompare::adjustCCMasksForInstr(
     MachineInstr &MI, MachineInstr &Compare,
     SmallVectorImpl<MachineInstr *> &CCUsers,
     unsigned ConvOpc) {
+  unsigned CompareFlags = Compare.getDesc().TSFlags;
+  unsigned CompareCCValues = SystemZII::getCCValues(CompareFlags);
   int Opcode = (ConvOpc ? ConvOpc : MI.getOpcode());
   const MCInstrDesc &Desc = TII->get(Opcode);
   unsigned MIFlags = Desc.TSFlags;
 
-  // See which compare-style condition codes are available.
-  unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags);
+  // If Compare may raise an FP exception, we can only eliminate it
+  // if MI itself would have already raised the exception.
+  if (Compare.mayRaiseFPException()) {
+    // If the caller will change MI to use ConvOpc, only test whether
+    // ConvOpc is suitable; it is on the caller to set the MI flag.
+    if (ConvOpc && !Desc.mayRaiseFPException())
+      return false;
+    // If the caller will not change MI, we test the MI flag here.
+    if (!ConvOpc && !MI.mayRaiseFPException())
+      return false;
+  }
 
+  // See which compare-style condition codes are available.
+  unsigned CCValues = SystemZII::getCCValues(MIFlags);
+  unsigned ReusableCCMask = CCValues;
   // For unsigned comparisons with zero, only equality makes sense.
-  unsigned CompareFlags = Compare.getDesc().TSFlags;
   if (CompareFlags & SystemZII::IsLogical)
     ReusableCCMask &= SystemZ::CCMASK_CMP_EQ;
-
+  unsigned OFImplies = 0;
+  bool LogicalMI = false;
+  bool MIEquivalentToCmp = false;
+  if (MI.getFlag(MachineInstr::NoSWrap) &&
+      (MIFlags & SystemZII::CCIfNoSignedWrap)) {
+    // If MI has the NSW flag set in combination with the
+    // SystemZII::CCIfNoSignedWrap flag, all CCValues are valid.
+  }
+  else if ((MIFlags & SystemZII::CCIfNoSignedWrap) &&
+           MI.getOperand(2).isImm()) {
+    // Signed addition of immediate. If adding a positive immediate
+    // overflows, the result must be less than zero. If adding a negative
+    // immediate overflows, the result must be larger than zero (except in
+    // the special case of adding the minimum value of the result range, in
+    // which case we cannot predict whether the result is larger than or
+    // equal to zero).
+    assert(isAddWithImmediate(Opcode) && "Expected an add with immediate.");
+    assert(!MI.mayLoadOrStore() && "Expected an immediate term.");
+    int64_t RHS = MI.getOperand(2).getImm();
+    if (SystemZ::GRX32BitRegClass.contains(MI.getOperand(0).getReg()) &&
+        RHS == INT32_MIN)
+      return false;
+    OFImplies = (RHS > 0 ? SystemZ::CCMASK_CMP_LT : SystemZ::CCMASK_CMP_GT);
+  }
+  else if ((MIFlags & SystemZII::IsLogical) && CCValues) {
+    // Use CCMASK_CMP_EQ to match with CCUsers. On success CCMask:s will be
+    // converted to CCMASK_LOGICAL_ZERO or CCMASK_LOGICAL_NONZERO.
+    LogicalMI = true;
+    ReusableCCMask = SystemZ::CCMASK_CMP_EQ;
+  }
+  else {
+    ReusableCCMask &= SystemZII::getCompareZeroCCMask(MIFlags);
+    assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
+    MIEquivalentToCmp =
+      ReusableCCMask == CCValues && CCValues == CompareCCValues;
+  }
   if (ReusableCCMask == 0)
     return false;
 
-  unsigned CCValues = SystemZII::getCCValues(MIFlags);
-  assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
-
-  bool MIEquivalentToCmp =
-    (ReusableCCMask == CCValues &&
-     CCValues == SystemZII::getCCValues(CompareFlags));
-
   if (!MIEquivalentToCmp) {
     // Now check whether these flags are enough for all users.
     SmallVector<MachineOperand *, 4> AlterMasks;
     for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) {
-      MachineInstr *MI = CCUsers[I];
+      MachineInstr *CCUserMI = CCUsers[I];
 
       // Fail if this isn't a use of CC that we understand.
-      unsigned Flags = MI->getDesc().TSFlags;
+      unsigned Flags = CCUserMI->getDesc().TSFlags;
       unsigned FirstOpNum;
       if (Flags & SystemZII::CCMaskFirst)
         FirstOpNum = 0;
       else if (Flags & SystemZII::CCMaskLast)
-        FirstOpNum = MI->getNumExplicitOperands() - 2;
+        FirstOpNum = CCUserMI->getNumExplicitOperands() - 2;
       else
         return false;
 
       // Check whether the instruction predicate treats all CC values
       // outside of ReusableCCMask in the same way.  In that case it
       // doesn't matter what those CC values mean.
-      unsigned CCValid = MI->getOperand(FirstOpNum).getImm();
-      unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm();
+      unsigned CCValid = CCUserMI->getOperand(FirstOpNum).getImm();
+      unsigned CCMask = CCUserMI->getOperand(FirstOpNum + 1).getImm();
+      assert(CCValid == CompareCCValues && (CCMask & ~CCValid) == 0 &&
+             "Corrupt CC operands of CCUser.");
       unsigned OutValid = ~ReusableCCMask & CCValid;
       unsigned OutMask = ~ReusableCCMask & CCMask;
       if (OutMask != 0 && OutMask != OutValid)
         return false;
 
-      AlterMasks.push_back(&MI->getOperand(FirstOpNum));
-      AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1));
+      AlterMasks.push_back(&CCUserMI->getOperand(FirstOpNum));
+      AlterMasks.push_back(&CCUserMI->getOperand(FirstOpNum + 1));
     }
 
     // All users are OK.  Adjust the masks for MI.
     for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) {
       AlterMasks[I]->setImm(CCValues);
       unsigned CCMask = AlterMasks[I + 1]->getImm();
-      if (CCMask & ~ReusableCCMask)
-        AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) |
-                                  (CCValues & ~ReusableCCMask));
+      if (LogicalMI) {
+        // Translate the CCMask into its "logical" value.
+        CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ?
+                  SystemZ::CCMASK_LOGICAL_ZERO : SystemZ::CCMASK_LOGICAL_NONZERO);
+        CCMask &= CCValues; // Logical subtracts never set CC=0.
+      } else {
+        if (CCMask & ~ReusableCCMask)
+          CCMask = (CCMask & ReusableCCMask) | (CCValues & ~ReusableCCMask);
+        CCMask |= (CCMask & OFImplies) ? SystemZ::CCMASK_ARITH_OVERFLOW : 0;
+      }
+      AlterMasks[I + 1]->setImm(CCMask);
     }
   }
 
@@ -450,7 +547,9 @@ bool SystemZElimCompare::optimizeCompareZero(
       }
       // Try to eliminate Compare by reusing a CC result from MI.
       if ((!CCRefs && convertToLoadAndTest(MI, Compare, CCUsers)) ||
-          (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) {
+          (!CCRefs.Def &&
+           (adjustCCMasksForInstr(MI, Compare, CCUsers) ||
+            convertToLogical(MI, Compare, CCUsers)))) {
         EliminatedComparisons += 1;
         return true;
       }
@@ -461,6 +560,12 @@ bool SystemZElimCompare::optimizeCompareZero(
     CCRefs |= getRegReferences(MI, SystemZ::CC);
     if (CCRefs.Use && CCRefs.Def)
       break;
+    // Eliminating a Compare that may raise an FP exception will move
+    // raising the exception to some earlier MI.  We cannot do this if
+    // there is anything in between that might change exception flags.
+    if (Compare.mayRaiseFPException() &&
+        (MI.isCall() || MI.hasUnmodeledSideEffects()))
+      break;
   }
 
   // Also do a forward search to handle cases where an instruction after the
@@ -595,7 +700,9 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
   // Walk backwards through the block looking for comparisons, recording
   // all CC users as we go.  The subroutines can delete Compare and
   // instructions before it.
-  bool CompleteCCUsers = !isCCLiveOut(MBB);
+  LivePhysRegs LiveRegs(*TRI);
+  LiveRegs.addLiveOuts(MBB);
+  bool CompleteCCUsers = !LiveRegs.contains(SystemZ::CC);
   SmallVector<MachineInstr *, 4> CCUsers;
   MachineBasicBlock::iterator MBBI = MBB.end();
   while (MBBI != MBB.begin()) {
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 0b8b6880accc..3cdf6bf98ee0 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -21,8 +21,8 @@
 using namespace llvm;
 
 namespace {
-// The ABI-defined register save slots, relative to the incoming stack
-// pointer.
+// The ABI-defined register save slots, relative to the CFA (i.e.
+// incoming stack pointer + SystemZMC::CallFrameSize).
 static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
   { SystemZ::R2D,  0x10 },
   { SystemZ::R3D,  0x18 },
@@ -47,18 +47,125 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
 
 SystemZFrameLowering::SystemZFrameLowering()
     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8),
-                          -SystemZMC::CallFrameSize, Align(8),
-                          false /* StackRealignable */) {
+                          0, Align(8), false /* StackRealignable */),
+      RegSpillOffsets(0) {
+  // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not
+  // equal to the incoming stack pointer, but to incoming stack pointer plus
+  // 160.  Instead of using a Local Area Offset, the Register save area will
+  // be occupied by fixed frame objects, and all offsets are actually
+  // relative to CFA.
+
   // Create a mapping from register number to save slot offset.
+  // These offsets are relative to the start of the register save area.
   RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
   for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
     RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset;
 }
 
-const TargetFrameLowering::SpillSlot *
-SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
-  NumEntries = array_lengthof(SpillOffsetTable);
-  return SpillOffsetTable;
+static bool usePackedStack(MachineFunction &MF) {
+  bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack");
+  bool IsVarArg = MF.getFunction().isVarArg();
+  bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC;
+  bool BackChain = MF.getFunction().hasFnAttribute("backchain");
+  bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken();
+  if (HasPackedStackAttr && BackChain)
+    report_fatal_error("packed-stack with backchain is currently unsupported.");
+  return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain &&
+         !FramAddressTaken;
+}
+
+bool SystemZFrameLowering::
+assignCalleeSavedSpillSlots(MachineFunction &MF,
+                            const TargetRegisterInfo *TRI,
+                            std::vector<CalleeSavedInfo> &CSI) const {
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  MachineFrameInfo &MFFrame = MF.getFrameInfo();
+  bool IsVarArg = MF.getFunction().isVarArg();
+  if (CSI.empty())
+    return true; // Early exit if no callee saved registers are modified!
+
+  unsigned LowGPR = 0;
+  unsigned HighGPR = SystemZ::R15D;
+  int StartSPOffset = SystemZMC::CallFrameSize;
+  int CurrOffset;
+  if (!usePackedStack(MF)) {
+    for (auto &CS : CSI) {
+      unsigned Reg = CS.getReg();
+      int Offset = RegSpillOffsets[Reg];
+      if (Offset) {
+        if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) {
+          LowGPR = Reg;
+          StartSPOffset = Offset;
+        }
+        Offset -= SystemZMC::CallFrameSize;
+        int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
+        CS.setFrameIdx(FrameIdx);
+      } else
+        CS.setFrameIdx(INT32_MAX);
+    }
+
+    // Save the range of call-saved registers, for use by the
+    // prologue/epilogue inserters.
+    ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
+    if (IsVarArg) {
+      // Also save the GPR varargs, if any.  R6D is call-saved, so would
+      // already be included, but we also need to handle the call-clobbered
+      // argument registers.
+      unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
+      if (FirstGPR < SystemZ::NumArgGPRs) {
+        unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
+        int Offset = RegSpillOffsets[Reg];
+        if (StartSPOffset > Offset) {
+          LowGPR = Reg; StartSPOffset = Offset;
+        }
+      }
+    }
+    ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
+
+    CurrOffset = -SystemZMC::CallFrameSize;
+  } else {
+    // Packed stack: put all the GPRs at the top of the Register save area.
+    uint32_t LowGR64Num = UINT32_MAX;
+    for (auto &CS : CSI) {
+      unsigned Reg = CS.getReg();
+      if (SystemZ::GR64BitRegClass.contains(Reg)) {
+        unsigned GR64Num = SystemZMC::getFirstReg(Reg);
+        int Offset = -8 * (15 - GR64Num + 1);
+        if (LowGR64Num > GR64Num) {
+          LowGR64Num = GR64Num;
+          StartSPOffset = SystemZMC::CallFrameSize + Offset;
+        }
+        int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
+        CS.setFrameIdx(FrameIdx);
+      } else
+        CS.setFrameIdx(INT32_MAX);
+    }
+    if (LowGR64Num < UINT32_MAX)
+      LowGPR = SystemZMC::GR64Regs[LowGR64Num];
+
+    // Save the range of call-saved registers, for use by the
+    // prologue/epilogue inserters.
+    ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
+    ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
+
+    CurrOffset = LowGPR ? -(SystemZMC::CallFrameSize - StartSPOffset) : 0;
+  }
+
+  // Create fixed stack objects for the remaining registers.
+  for (auto &CS : CSI) {
+    if (CS.getFrameIdx() != INT32_MAX)
+      continue;
+    unsigned Reg = CS.getReg();
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    unsigned Size = TRI->getSpillSize(*RC);
+    CurrOffset -= Size;
+    assert(CurrOffset % 8 == 0 &&
+           "8-byte alignment required for for all register save slots");
+    int FrameIdx = MFFrame.CreateFixedSpillStackObject(Size, CurrOffset);
+    CS.setFrameIdx(FrameIdx);
+  }
+
+  return true;
 }
 
 void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
@@ -141,53 +248,21 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   bool IsVarArg = MF.getFunction().isVarArg();
   DebugLoc DL;
 
-  // Scan the call-saved GPRs and find the bounds of the register spill area.
-  unsigned LowGPR = 0;
-  unsigned HighGPR = SystemZ::R15D;
-  unsigned StartOffset = -1U;
-  for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
-    unsigned Reg = CSI[I].getReg();
-    if (SystemZ::GR64BitRegClass.contains(Reg)) {
-      unsigned Offset = RegSpillOffsets[Reg];
-      assert(Offset && "Unexpected GPR save");
-      if (StartOffset > Offset) {
-        LowGPR = Reg;
-        StartOffset = Offset;
-      }
-    }
-  }
-
-  // Save the range of call-saved registers, for use by the epilogue inserter.
-  ZFI->setLowSavedGPR(LowGPR);
-  ZFI->setHighSavedGPR(HighGPR);
-
-  // Include the GPR varargs, if any.  R6D is call-saved, so would
-  // be included by the loop above, but we also need to handle the
-  // call-clobbered argument registers.
-  if (IsVarArg) {
-    unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
-    if (FirstGPR < SystemZ::NumArgGPRs) {
-      unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
-      unsigned Offset = RegSpillOffsets[Reg];
-      if (StartOffset > Offset) {
-        LowGPR = Reg; StartOffset = Offset;
-      }
-    }
-  }
-
   // Save GPRs
-  if (LowGPR) {
-    assert(LowGPR != HighGPR && "Should be saving %r15 and something else");
+  SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs();
+  if (SpillGPRs.LowGPR) {
+    assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR &&
+           "Should be saving %r15 and something else");
 
     // Build an STMG instruction.
     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
 
     // Add the explicit register operands.
-    addSavedGPR(MBB, MIB, LowGPR, false);
-    addSavedGPR(MBB, MIB, HighGPR, false);
+    addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false);
+    addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false);
 
     // Add the address.
-    MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+    MIB.addReg(SystemZ::R15D).addImm(SpillGPRs.GPROffset);
 
     // Make sure all call-saved GPRs are included as operands and are
     // marked as live on entry.
@@ -248,30 +323,29 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   // Restore call-saved GPRs (but not call-clobbered varargs, which at
   // this point might hold return values).
-  unsigned LowGPR = ZFI->getLowSavedGPR();
-  unsigned HighGPR = ZFI->getHighSavedGPR();
-  unsigned StartOffset = RegSpillOffsets[LowGPR];
-  if (LowGPR) {
+  SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs();
+  if (RestoreGPRs.LowGPR) {
     // If we saved any of %r2-%r5 as varargs, we should also be saving
     // and restoring %r6.  If we're saving %r6 or above, we should be
     // restoring it too.
-    assert(LowGPR != HighGPR && "Should be loading %r15 and something else");
+    assert(RestoreGPRs.LowGPR != RestoreGPRs.HighGPR &&
+           "Should be loading %r15 and something else");
 
     // Build an LMG instruction.
     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
 
     // Add the explicit register operands.
-    MIB.addReg(LowGPR, RegState::Define);
-    MIB.addReg(HighGPR, RegState::Define);
+    MIB.addReg(RestoreGPRs.LowGPR, RegState::Define);
+    MIB.addReg(RestoreGPRs.HighGPR, RegState::Define);
 
     // Add the address.
     MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D);
-    MIB.addImm(StartOffset);
+    MIB.addImm(RestoreGPRs.GPROffset);
 
     // Do a second scan adding regs as being defined by instruction
     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
       unsigned Reg = CSI[I].getReg();
-      if (Reg != LowGPR && Reg != HighGPR &&
+      if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR &&
           SystemZ::GR64BitRegClass.contains(Reg))
         MIB.addReg(Reg, RegState::ImplicitDefine);
     }
@@ -284,16 +358,20 @@ void SystemZFrameLowering::
 processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                     RegScavenger *RS) const {
   MachineFrameInfo &MFFrame = MF.getFrameInfo();
+
+  if (!usePackedStack(MF))
+    // Always create the full incoming register save area.
+    getOrCreateFramePointerSaveIndex(MF);
+
   // Get the size of our stack frame to be allocated ...
   uint64_t StackSize = (MFFrame.estimateStackSize(MF) +
                         SystemZMC::CallFrameSize);
   // ... and the maximum offset we may need to reach into the
   // caller's frame to access the save area or stack arguments.
-  int64_t MaxArgOffset = SystemZMC::CallFrameSize;
+  int64_t MaxArgOffset = 0;
   for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I)
     if (MFFrame.getObjectOffset(I) >= 0) {
-      int64_t ArgOffset = SystemZMC::CallFrameSize +
-                          MFFrame.getObjectOffset(I) +
+      int64_t ArgOffset = MFFrame.getObjectOffset(I) +
                           MFFrame.getObjectSize(I);
       MaxArgOffset = std::max(MaxArgOffset, ArgOffset);
     }
@@ -351,6 +429,23 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
   const std::vector<CalleeSavedInfo> &CSI = MFFrame.getCalleeSavedInfo();
   bool HasFP = hasFP(MF);
 
+  // In GHC calling convention C stack space, including the ABI-defined
+  // 160-byte base area, is (de)allocated by GHC itself.  This stack space may
+  // be used by LLVM as spill slots for the tail recursive GHC functions.  Thus
+  // do not allocate stack space here, too.
+  if (MF.getFunction().getCallingConv() == CallingConv::GHC) {
+    if (MFFrame.getStackSize() > 2048 * sizeof(long)) {
+      report_fatal_error(
+          "Pre allocated stack space for GHC function is too small");
+    }
+    if (HasFP) {
+      report_fatal_error(
+          "In GHC calling convention a frame pointer is not supported");
+    }
+    MFFrame.setStackSize(MFFrame.getStackSize() + SystemZMC::CallFrameSize);
+    return;
+  }
+
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
   DebugLoc DL;
@@ -358,7 +453,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
   // The current offset of the stack pointer from the CFA.
   int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
 
-  if (ZFI->getLowSavedGPR()) {
+  if (ZFI->getSpillGPRRegs().LowGPR) {
     // Skip over the GPR saves.
     if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG)
       ++MBBI;
@@ -369,7 +464,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
     for (auto &Save : CSI) {
       unsigned Reg = Save.getReg();
       if (SystemZ::GR64BitRegClass.contains(Reg)) {
-        int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg];
+        int FI = Save.getFrameIdx();
+        int64_t Offset = MFFrame.getObjectOffset(FI);
         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
         BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
@@ -382,10 +478,19 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
   // We need to allocate the ABI-defined 160-byte base area whenever
   // we allocate stack space for our own use and whenever we call another
   // function.
-  if (StackSize || MFFrame.hasVarSizedObjects() || MFFrame.hasCalls()) {
+  bool HasStackObject = false;
+  for (unsigned i = 0, e = MFFrame.getObjectIndexEnd(); i != e; ++i)
+    if (!MFFrame.isDeadObjectIndex(i)) {
+      HasStackObject = true;
+      break;
+    }
+  if (HasStackObject || MFFrame.hasCalls())
     StackSize += SystemZMC::CallFrameSize;
-    MFFrame.setStackSize(StackSize);
-  }
+  // Don't allocate the incoming reg save area.
+  StackSize = StackSize > SystemZMC::CallFrameSize
+                  ? StackSize - SystemZMC::CallFrameSize
+                  : 0;
+  MFFrame.setStackSize(StackSize);
 
   if (StackSize) {
     // Determine if we want to store a backchain.
@@ -410,7 +515,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
 
     if (StoreBackchain)
       BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
-        .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0);
+        .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0)
+        .addReg(0);
   }
 
   if (HasFP) {
@@ -478,11 +584,15 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
   SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
   MachineFrameInfo &MFFrame = MF.getFrameInfo();
 
+  // See SystemZFrameLowering::emitPrologue
+  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+    return;
+
   // Skip the return instruction.
   assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
 
   uint64_t StackSize = MFFrame.getStackSize();
-  if (ZFI->getLowSavedGPR()) {
+  if (ZFI->getRestoreGPRRegs().LowGPR) {
     --MBBI;
     unsigned Opcode = MBBI->getOpcode();
     if (Opcode != SystemZ::LMG)
@@ -527,6 +637,16 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   return true;
 }
 
+int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                                 int FI,
+                                                 unsigned &FrameReg) const {
+  // Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so
+  // add that difference here.
+  int64_t Offset =
+    TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg);
+  return Offset + SystemZMC::CallFrameSize;
+}
+
 MachineBasicBlock::iterator SystemZFrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF,
                               MachineBasicBlock &MBB,
@@ -543,3 +663,15 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
     llvm_unreachable("Unexpected call frame instruction");
   }
 }
+
+int SystemZFrameLowering::
+getOrCreateFramePointerSaveIndex(MachineFunction &MF) const {
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  int FI = ZFI->getFramePointerSaveIndex();
+  if (!FI) {
+    MachineFrameInfo &MFFrame = MF.getFrameInfo();
+    FI = MFFrame.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
+    ZFI->setFramePointerSaveIndex(FI);
+  }
+  return FI;
+}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 71ef3e4dc240..4189a92b8294 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -24,8 +24,10 @@ public:
 
   // Override TargetFrameLowering.
   bool isFPCloseToIncomingSP() const override { return false; }
-  const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const
-    override;
+  bool
+  assignCalleeSavedSpillSlots(MachineFunction &MF,
+                              const TargetRegisterInfo *TRI,
+                              std::vector<CalleeSavedInfo> &CSI) const override;
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS) const override;
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
@@ -43,6 +45,8 @@ public:
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const override;
   MachineBasicBlock::iterator
   eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MI) const override;
@@ -52,6 +56,9 @@ public:
   unsigned getRegSpillOffset(unsigned Reg) const {
     return RegSpillOffsets[Reg];
   }
+
+  // Get or create the frame index of where the old frame pointer is stored.
+  int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const;
 };
 } // end namespace llvm
 
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 751034c2d41a..3927a977e6fc 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -347,9 +347,12 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override {
     const Function &F = MF.getFunction();
-    if (F.getFnAttribute("mnop-mcount").getValueAsString() == "true" &&
-        F.getFnAttribute("fentry-call").getValueAsString() != "true")
-      report_fatal_error("mnop-mcount only supported with fentry-call");
+    if (F.getFnAttribute("fentry-call").getValueAsString() != "true") {
+      if (F.hasFnAttribute("mnop-mcount"))
+        report_fatal_error("mnop-mcount only supported with fentry-call");
+      if (F.hasFnAttribute("mrecord-mcount"))
+        report_fatal_error("mrecord-mcount only supported with fentry-call");
+    }
 
     Subtarget = &MF.getSubtarget<SystemZSubtarget>();
     return SelectionDAGISel::runOnMachineFunction(MF);
@@ -1494,8 +1497,9 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
             if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR ||
                 ChildOpcode == ISD::XOR)
               break;
-          // Check whether this expression matches OR-with-complement.
-          if (Opcode == ISD::OR && ChildOpcode == ISD::XOR) {
+          // Check whether this expression matches OR-with-complement
+          // (or matches an alternate pattern for NXOR).
+          if (ChildOpcode == ISD::XOR) {
             auto Op0 = Node->getOperand(0);
             if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1)))
               if (Op0Op1->getZExtValue() == (uint64_t)-1)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index e0ca9da93561..c73905d3357a 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -19,8 +19,9 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsS390.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/KnownBits.h"
 #include <cctype>
@@ -32,12 +33,16 @@ using namespace llvm;
 namespace {
 // Represents information about a comparison.
 struct Comparison {
-  Comparison(SDValue Op0In, SDValue Op1In)
-    : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
+  Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
+    : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
+      Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
 
   // The operands to the comparison.
   SDValue Op0, Op1;
 
+  // Chain if this is a strict floating-point comparison.
+  SDValue Chain;
+
   // The opcode that should be used to compare Op0 and Op1.
   unsigned Opcode;
 
@@ -132,6 +137,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     if (isTypeLegal(VT)) {
       // Lower SET_CC into an IPM-based sequence.
       setOperationAction(ISD::SETCC, VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
 
       // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
       setOperationAction(ISD::SELECT, VT, Expand);
@@ -212,6 +219,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
       if (Subtarget.hasFPExtension())
         setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
+
+      // And similarly for STRICT_[SU]INT_TO_FP.
+      setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);
+      if (Subtarget.hasFPExtension())
+        setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);
     }
   }
 
@@ -251,6 +263,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   if (!Subtarget.hasFPExtension()) {
     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
   }
 
   // We have native support for a 64-bit CTLZ, via FLOGR.
@@ -373,6 +387,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
       // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
       // and inverting the result as necessary.
       setOperationAction(ISD::SETCC, VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+      if (Subtarget.hasVectorEnhancements1())
+        setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
     }
   }
 
@@ -392,6 +409,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal);
   }
 
   if (Subtarget.hasVectorEnhancements2()) {
@@ -408,6 +429,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal);
   }
 
   // Handle floating-point types.
@@ -558,16 +583,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
                      MVT::v4f32, MVT::v2f64 }) {
       setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
       setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
+      setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal);
+      setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal);
     }
   }
 
-  // We have fused multiply-addition for f32 and f64 but not f128.
-  setOperationAction(ISD::FMA, MVT::f32,  Legal);
-  setOperationAction(ISD::FMA, MVT::f64,  Legal);
-  if (Subtarget.hasVectorEnhancements1())
-    setOperationAction(ISD::FMA, MVT::f128, Legal);
-  else
+  // We only have fused f128 multiply-addition on vector registers.
+  if (!Subtarget.hasVectorEnhancements1()) {
     setOperationAction(ISD::FMA, MVT::f128, Expand);
+    setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
+  }
 
   // We don't have a copysign instruction on vector registers.
   if (Subtarget.hasVectorEnhancements1())
@@ -612,7 +637,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::FP_ROUND);
+  setTargetDAGCombine(ISD::STRICT_FP_ROUND);
   setTargetDAGCombine(ISD::FP_EXTEND);
+  setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
   setTargetDAGCombine(ISD::BSWAP);
   setTargetDAGCombine(ISD::SDIV);
   setTargetDAGCombine(ISD::UDIV);
@@ -634,6 +661,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   // than "STC;MVC".  Handle the choice in target-specific code instead.
   MaxStoresPerMemset = 0;
   MaxStoresPerMemsetOptSize = 0;
+
+  // Default to having -disable-strictnode-mutation on
+  IsStrictFPEnabled = true;
 }
 
 EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
@@ -643,7 +673,8 @@ EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
   return VT.changeVectorElementTypeToInteger();
 }
 
-bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
+    const MachineFunction &MF, EVT VT) const {
   VT = VT.getScalarType();
 
   if (!VT.isSimple())
@@ -1406,7 +1437,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
 
     // ...and a similar frame index for the caller-allocated save area
     // that will be used to store the incoming registers.
-    int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
+    int64_t RegSaveOffset = -SystemZMC::CallFrameSize;
     unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
     FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
 
@@ -1675,6 +1706,9 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   if (RetLocs.empty())
     return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
 
+  if (CallConv == CallingConv::GHC)
+    report_fatal_error("GHC functions return void only");
+
   // Copy the result values into the output registers.
   SDValue Glue;
   SmallVector<SDValue, 4> RetOps;
@@ -2161,6 +2195,10 @@ static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
 // negation to set CC, so avoiding separate LOAD AND TEST and
 // LOAD (NEGATIVE/COMPLEMENT) instructions.
 static void adjustForFNeg(Comparison &C) {
+  // This optimization is invalid for strict comparisons, since FNEG
+  // does not raise any exceptions.
+  if (C.Chain)
+    return;
   auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
   if (C1 && C1->isZero()) {
     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
@@ -2448,7 +2486,7 @@ static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
                                   SDValue Call, unsigned CCValid, uint64_t CC,
                                   ISD::CondCode Cond) {
-  Comparison C(Call, SDValue());
+  Comparison C(Call, SDValue(), SDValue());
   C.Opcode = Opcode;
   C.CCValid = CCValid;
   if (Cond == ISD::SETEQ)
@@ -2479,8 +2517,11 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
 
 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
-                         ISD::CondCode Cond, const SDLoc &DL) {
+                         ISD::CondCode Cond, const SDLoc &DL,
+                         SDValue Chain = SDValue(),
+                         bool IsSignaling = false) {
   if (CmpOp1.getOpcode() == ISD::Constant) {
+    assert(!Chain);
     uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
     unsigned Opcode, CCValid;
     if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
@@ -2492,13 +2533,19 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
         isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
       return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
   }
-  Comparison C(CmpOp0, CmpOp1);
+  Comparison C(CmpOp0, CmpOp1, Chain);
   C.CCMask = CCMaskForCondCode(Cond);
   if (C.Op0.getValueType().isFloatingPoint()) {
     C.CCValid = SystemZ::CCMASK_FCMP;
-    C.Opcode = SystemZISD::FCMP;
+    if (!C.Chain)
+      C.Opcode = SystemZISD::FCMP;
+    else if (!IsSignaling)
+      C.Opcode = SystemZISD::STRICT_FCMP;
+    else
+      C.Opcode = SystemZISD::STRICT_FCMPS;
     adjustForFNeg(C);
   } else {
+    assert(!C.Chain);
     C.CCValid = SystemZ::CCMASK_ICMP;
     C.Opcode = SystemZISD::ICMP;
     // Choose the type of comparison.  Equality and inequality tests can
@@ -2556,6 +2603,10 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
     return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
                        DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
   }
+  if (C.Chain) {
+    SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+    return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
+  }
   return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
 }
 
@@ -2600,24 +2651,51 @@ static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
 }
 
 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
-// be done directly.  IsFP is true if CC is for a floating-point rather than
-// integer comparison.
-static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
+// be done directly.  Mode is CmpMode::Int for integer comparisons, CmpMode::FP
+// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
+// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
+// floating-point comparisons.
+enum class CmpMode { Int, FP, StrictFP, SignalingFP };
+static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
   switch (CC) {
   case ISD::SETOEQ:
   case ISD::SETEQ:
-    return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
+    switch (Mode) {
+    case CmpMode::Int:         return SystemZISD::VICMPE;
+    case CmpMode::FP:          return SystemZISD::VFCMPE;
+    case CmpMode::StrictFP:    return SystemZISD::STRICT_VFCMPE;
+    case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
+    }
+    llvm_unreachable("Bad mode");
 
   case ISD::SETOGE:
   case ISD::SETGE:
-    return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
+    switch (Mode) {
+    case CmpMode::Int:         return 0;
+    case CmpMode::FP:          return SystemZISD::VFCMPHE;
+    case CmpMode::StrictFP:    return SystemZISD::STRICT_VFCMPHE;
+    case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
+    }
+    llvm_unreachable("Bad mode");
 
   case ISD::SETOGT:
   case ISD::SETGT:
-    return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
+    switch (Mode) {
+    case CmpMode::Int:         return SystemZISD::VICMPH;
+    case CmpMode::FP:          return SystemZISD::VFCMPH;
+    case CmpMode::StrictFP:    return SystemZISD::STRICT_VFCMPH;
+    case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
+    }
+    llvm_unreachable("Bad mode");
 
   case ISD::SETUGT:
-    return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
+    switch (Mode) {
+    case CmpMode::Int:         return SystemZISD::VICMPHL;
+    case CmpMode::FP:          return 0;
+    case CmpMode::StrictFP:    return 0;
+    case CmpMode::SignalingFP: return 0;
+    }
+    llvm_unreachable("Bad mode");
 
   default:
     return 0;
@@ -2626,17 +2704,16 @@ static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
 
 // Return the SystemZISD vector comparison operation for CC or its inverse,
 // or 0 if neither can be done directly.  Indicate in Invert whether the
-// result is for the inverse of CC.  IsFP is true if CC is for a
-// floating-point rather than integer comparison.
-static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
+// result is for the inverse of CC.  Mode is as above.
+static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
                                             bool &Invert) {
-  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
     Invert = false;
     return Opcode;
   }
 
-  CC = ISD::getSetCCInverse(CC, !IsFP);
-  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+  CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
+  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
     Invert = true;
     return Opcode;
   }
@@ -2645,44 +2722,73 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
 }
 
 // Return a v2f64 that contains the extended form of elements Start and Start+1
-// of v4f32 value Op.
+// of v4f32 value Op.  If Chain is nonnull, return the strict form.
 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
-                                  SDValue Op) {
+                                  SDValue Op, SDValue Chain) {
   int Mask[] = { Start, -1, Start + 1, -1 };
   Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
+  if (Chain) {
+    SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
+    return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
+  }
   return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
 }
 
 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
-// producing a result of type VT.
+// producing a result of type VT.  If Chain is nonnull, return the strict form.
 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
                                             const SDLoc &DL, EVT VT,
                                             SDValue CmpOp0,
-                                            SDValue CmpOp1) const {
+                                            SDValue CmpOp1,
+                                            SDValue Chain) const {
   // There is no hardware support for v4f32 (unless we have the vector
   // enhancements facility 1), so extend the vector into two v2f64s
   // and compare those.
   if (CmpOp0.getValueType() == MVT::v4f32 &&
       !Subtarget.hasVectorEnhancements1()) {
-    SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
-    SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
-    SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
-    SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
+    SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
+    SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
+    SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
+    SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
+    if (Chain) {
+      SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
+      SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
+      SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
+      SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
+      SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
+                            H1.getValue(1), L1.getValue(1),
+                            HRes.getValue(1), LRes.getValue(1) };
+      SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+      SDValue Ops[2] = { Res, NewChain };
+      return DAG.getMergeValues(Ops, DL);
+    }
     SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
     SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
     return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
   }
+  if (Chain) {
+    SDVTList VTs = DAG.getVTList(VT, MVT::Other);
+    return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
+  }
   return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
 }
 
 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
-// an integer mask of type VT.
+// an integer mask of type VT.  If Chain is nonnull, we have a strict
+// floating-point comparison.  If in addition IsSignaling is true, we have
+// a strict signaling floating-point comparison.
 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
                                                 const SDLoc &DL, EVT VT,
                                                 ISD::CondCode CC,
                                                 SDValue CmpOp0,
-                                                SDValue CmpOp1) const {
+                                                SDValue CmpOp1,
+                                                SDValue Chain,
+                                                bool IsSignaling) const {
   bool IsFP = CmpOp0.getValueType().isFloatingPoint();
+  assert (!Chain || IsFP);
+  assert (!IsSignaling || Chain);
+  CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
+                 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
   bool Invert = false;
   SDValue Cmp;
   switch (CC) {
@@ -2692,9 +2798,14 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
     LLVM_FALLTHROUGH;
   case ISD::SETO: {
     assert(IsFP && "Unexpected integer comparison");
-    SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
-    SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
+    SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
+                              DL, VT, CmpOp1, CmpOp0, Chain);
+    SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
+                              DL, VT, CmpOp0, CmpOp1, Chain);
     Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
+    if (Chain)
+      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                          LT.getValue(1), GE.getValue(1));
     break;
   }
 
@@ -2704,9 +2815,14 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
     LLVM_FALLTHROUGH;
   case ISD::SETONE: {
     assert(IsFP && "Unexpected integer comparison");
-    SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
-    SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
+    SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
+                              DL, VT, CmpOp1, CmpOp0, Chain);
+    SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
+                              DL, VT, CmpOp0, CmpOp1, Chain);
     Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
+    if (Chain)
+      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                          LT.getValue(1), GT.getValue(1));
     break;
   }
 
@@ -2714,15 +2830,17 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
     // matter whether we try the inversion or the swap first, since
     // there are no cases where both work.
   default:
-    if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
-      Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
+    if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
+      Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
     else {
       CC = ISD::getSetCCSwappedOperands(CC);
-      if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
-        Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
+      if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
+        Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
       else
         llvm_unreachable("Unhandled comparison");
     }
+    if (Chain)
+      Chain = Cmp.getValue(1);
     break;
   }
   if (Invert) {
@@ -2730,6 +2848,10 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
       DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
     Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
   }
+  if (Chain && Chain.getNode() != Cmp.getNode()) {
+    SDValue Ops[2] = { Cmp, Chain };
+    Cmp = DAG.getMergeValues(Ops, DL);
+  }
   return Cmp;
 }
 
@@ -2748,6 +2870,29 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
   return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
 }
 
+SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
+                                                  SelectionDAG &DAG,
+                                                  bool IsSignaling) const {
+  SDValue Chain    = Op.getOperand(0);
+  SDValue CmpOp0   = Op.getOperand(1);
+  SDValue CmpOp1   = Op.getOperand(2);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
+  SDLoc DL(Op);
+  EVT VT = Op.getNode()->getValueType(0);
+  if (VT.isVector()) {
+    SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
+                                   Chain, IsSignaling);
+    return Res.getValue(Op.getResNo());
+  }
+
+  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
+  SDValue CCReg = emitCmp(DAG, DL, C);
+  CCReg->setFlags(Op->getFlags());
+  SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
+  SDValue Ops[2] = { Result, CCReg.getValue(1) };
+  return DAG.getMergeValues(Ops, DL);
+}
+
 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue CmpOp0   = Op.getOperand(2);
@@ -2828,17 +2973,26 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
 
   SDValue Result;
   if (Subtarget.isPC32DBLSymbol(GV, CM)) {
-    // Assign anchors at 1<<12 byte boundaries.
-    uint64_t Anchor = Offset & ~uint64_t(0xfff);
-    Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
-    Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
-
-    // The offset can be folded into the address if it is aligned to a halfword.
-    Offset -= Anchor;
-    if (Offset != 0 && (Offset & 1) == 0) {
-      SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
-      Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
-      Offset = 0;
+    if (isInt<32>(Offset)) {
+      // Assign anchors at 1<<12 byte boundaries.
+      uint64_t Anchor = Offset & ~uint64_t(0xfff);
+      Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
+      Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+
+      // The offset can be folded into the address if it is aligned to a
+      // halfword.
+      Offset -= Anchor;
+      if (Offset != 0 && (Offset & 1) == 0) {
+        SDValue Full =
+          DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
+        Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
+        Offset = 0;
+      }
+    } else {
+      // Conservatively load a constant offset greater than 32 bits into a
+      // register below.
+      Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
+      Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
     }
   } else {
     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
@@ -2865,6 +3019,10 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
   SDValue Chain = DAG.getEntryNode();
   SDValue Glue;
 
+  if (DAG.getMachineFunction().getFunction().getCallingConv() ==
+      CallingConv::GHC)
+    report_fatal_error("In GHC calling convention TLS is not supported");
+
   // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
   Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
@@ -2931,6 +3089,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
 
+  if (DAG.getMachineFunction().getFunction().getCallingConv() ==
+      CallingConv::GHC)
+    report_fatal_error("In GHC calling convention TLS is not supported");
+
   SDValue TP = lowerThreadPointer(DL, DAG);
 
   // Get the offset of GA from the thread pointer, based on the TLS model.
@@ -3060,14 +3222,10 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
-  // If the back chain frame index has not been allocated yet, do so.
-  SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>();
-  int BackChainIdx = FI->getFramePointerSaveIndex();
-  if (!BackChainIdx) {
-    // By definition, the frame address is the address of the back chain.
-    BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
-    FI->setFramePointerSaveIndex(BackChainIdx);
-  }
+  // By definition, the frame address is the address of the back chain.
+  auto *TFL =
+      static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+  int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
   SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
 
   // FIXME The frontend should detect this case.
@@ -3585,7 +3743,7 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
 
   // Get the known-zero mask for the operand.
   KnownBits Known = DAG.computeKnownBits(Op);
-  unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
+  unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
   if (NumSignificantBits == 0)
     return DAG.getConstant(0, DL, VT);
 
@@ -3861,6 +4019,9 @@ SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
                                               SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
+  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+    report_fatal_error("Variable-sized stack allocations are not supported "
+                       "in GHC calling convention");
   return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
                             SystemZ::R15D, Op.getValueType());
 }
@@ -3871,6 +4032,10 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
   bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
 
+  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+    report_fatal_error("Variable-sized stack allocations are not supported "
+                       "in GHC calling convention");
+
   SDValue Chain = Op.getOperand(0);
   SDValue NewSP = Op.getOperand(1);
   SDValue Backchain;
@@ -4935,6 +5100,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerSELECT_CC(Op, DAG);
   case ISD::SETCC:
     return lowerSETCC(Op, DAG);
+  case ISD::STRICT_FSETCC:
+    return lowerSTRICT_FSETCC(Op, DAG, false);
+  case ISD::STRICT_FSETCCS:
+    return lowerSTRICT_FSETCC(Op, DAG, true);
   case ISD::GlobalAddress:
     return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
   case ISD::GlobalTLSAddress:
@@ -5140,6 +5309,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(IABS);
     OPCODE(ICMP);
     OPCODE(FCMP);
+    OPCODE(STRICT_FCMP);
+    OPCODE(STRICT_FCMPS);
     OPCODE(TM);
     OPCODE(BR_CCMASK);
     OPCODE(SELECT_CCMASK);
@@ -5202,14 +5373,22 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(VICMPHS);
     OPCODE(VICMPHLS);
     OPCODE(VFCMPE);
+    OPCODE(STRICT_VFCMPE);
+    OPCODE(STRICT_VFCMPES);
     OPCODE(VFCMPH);
+    OPCODE(STRICT_VFCMPH);
+    OPCODE(STRICT_VFCMPHS);
     OPCODE(VFCMPHE);
+    OPCODE(STRICT_VFCMPHE);
+    OPCODE(STRICT_VFCMPHES);
     OPCODE(VFCMPES);
     OPCODE(VFCMPHS);
     OPCODE(VFCMPHES);
     OPCODE(VFTCI);
     OPCODE(VEXTEND);
+    OPCODE(STRICT_VEXTEND);
     OPCODE(VROUND);
+    OPCODE(STRICT_VROUND);
     OPCODE(VTM);
     OPCODE(VFAE_CC);
     OPCODE(VFAEZ_CC);
@@ -5732,6 +5911,19 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS(
   return SDValue();
 }
 
+static SDValue MergeInputChains(SDNode *N1, SDNode *N2) {
+  SDValue Chain1 = N1->getOperand(0);
+  SDValue Chain2 = N2->getOperand(0);
+
+  // Trivial case: both nodes take the same chain.
+  if (Chain1 == Chain2)
+    return Chain1;
+
+  // FIXME - we could handle more complex cases via TokenFactor,
+  // assuming we can verify that this would not create a cycle.
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::combineFP_ROUND(
     SDNode *N, DAGCombinerInfo &DCI) const {
 
@@ -5744,8 +5936,9 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
   // (extract_vector_elt (VROUND X) 2)
   //
   // This is a special case since the target doesn't really support v2f32s.
+  unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
   SelectionDAG &DAG = DCI.DAG;
-  SDValue Op0 = N->getOperand(0);
+  SDValue Op0 = N->getOperand(OpNo);
   if (N->getValueType(0) == MVT::f32 &&
       Op0.hasOneUse() &&
       Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -5761,20 +5954,34 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
           U->getOperand(1).getOpcode() == ISD::Constant &&
           cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
         SDValue OtherRound = SDValue(*U->use_begin(), 0);
-        if (OtherRound.getOpcode() == ISD::FP_ROUND &&
-            OtherRound.getOperand(0) == SDValue(U, 0) &&
+        if (OtherRound.getOpcode() == N->getOpcode() &&
+            OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
             OtherRound.getValueType() == MVT::f32) {
-          SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
-                                       MVT::v4f32, Vec);
+          SDValue VRound, Chain;
+          if (N->isStrictFPOpcode()) {
+            Chain = MergeInputChains(N, OtherRound.getNode());
+            if (!Chain)
+              continue;
+            VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
+                                 {MVT::v4f32, MVT::Other}, {Chain, Vec});
+            Chain = VRound.getValue(1);
+          } else
+            VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+                                 MVT::v4f32, Vec);
           DCI.AddToWorklist(VRound.getNode());
           SDValue Extract1 =
             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
                         VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
           DCI.AddToWorklist(Extract1.getNode());
           DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+          if (Chain)
+            DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
           SDValue Extract0 =
             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
                         VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+          if (Chain)
+            return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
+                               N->getVTList(), Extract0, Chain);
           return Extract0;
         }
       }
@@ -5795,8 +6002,9 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
   // (extract_vector_elt (VEXTEND X) 1)
   //
   // This is a special case since the target doesn't really support v2f32s.
+  unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
   SelectionDAG &DAG = DCI.DAG;
-  SDValue Op0 = N->getOperand(0);
+  SDValue Op0 = N->getOperand(OpNo);
   if (N->getValueType(0) == MVT::f64 &&
       Op0.hasOneUse() &&
       Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -5812,20 +6020,34 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
           U->getOperand(1).getOpcode() == ISD::Constant &&
           cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
         SDValue OtherExtend = SDValue(*U->use_begin(), 0);
-        if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
-            OtherExtend.getOperand(0) == SDValue(U, 0) &&
+        if (OtherExtend.getOpcode() == N->getOpcode() &&
+            OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
             OtherExtend.getValueType() == MVT::f64) {
-          SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
-                                        MVT::v2f64, Vec);
+          SDValue VExtend, Chain;
+          if (N->isStrictFPOpcode()) {
+            Chain = MergeInputChains(N, OtherExtend.getNode());
+            if (!Chain)
+              continue;
+            VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
+                                  {MVT::v2f64, MVT::Other}, {Chain, Vec});
+            Chain = VExtend.getValue(1);
+          } else
+            VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
+                                  MVT::v2f64, Vec);
           DCI.AddToWorklist(VExtend.getNode());
           SDValue Extract1 =
             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
                         VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
           DCI.AddToWorklist(Extract1.getNode());
           DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
+          if (Chain)
+            DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
           SDValue Extract0 =
             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
                         VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+          if (Chain)
+            return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
+                               N->getVTList(), Extract0, Chain);
           return Extract0;
         }
       }
@@ -6165,7 +6387,9 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::VECTOR_SHUFFLE:     return combineVECTOR_SHUFFLE(N, DCI);
   case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
   case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
+  case ISD::STRICT_FP_ROUND:
   case ISD::FP_ROUND:           return combineFP_ROUND(N, DCI);
+  case ISD::STRICT_FP_EXTEND:
   case ISD::FP_EXTEND:          return combineFP_EXTEND(N, DCI);
   case ISD::BSWAP:              return combineBSWAP(N, DCI);
   case SystemZISD::BR_CCMASK:   return combineBR_CCMASK(N, DCI);
@@ -7523,7 +7747,8 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
   // Replace pseudo with a normal load-and-test that models the def as
   // well.
   BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
-    .addReg(SrcReg);
+    .addReg(SrcReg)
+    .setMIFlags(MI.getFlags());
   MI.eraseFromParent();
 
   return MBB;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 23cdcc72bc42..defcaa6eb6eb 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -245,7 +245,7 @@ enum NodeType : unsigned {
   VICMPHS,
   VICMPHLS,
 
-  // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
+  // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1
   // vector result.  VFCMPE is for "ordered and equal", VFCMPH for "ordered and
   // greater than" and VFCMPHE for "ordered and greater than or equal to".
   VFCMPE,
@@ -290,6 +290,24 @@ enum NodeType : unsigned {
   // Operand 1: the bit mask
   TDC,
 
+  // Strict variants of scalar floating-point comparisons.
+  // Quiet and signaling versions.
+  STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+  STRICT_FCMPS,
+
+  // Strict variants of vector floating-point comparisons.
+  // Quiet and signaling versions.
+  STRICT_VFCMPE,
+  STRICT_VFCMPH,
+  STRICT_VFCMPHE,
+  STRICT_VFCMPES,
+  STRICT_VFCMPHS,
+  STRICT_VFCMPHES,
+
+  // Strict variants of VEXTEND and VROUND.
+  STRICT_VEXTEND,
+  STRICT_VROUND,
+
   // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
   // ATOMIC_LOAD_<op>.
   //
@@ -404,7 +422,8 @@ public:
   bool isCheapToSpeculateCtlz() const override { return true; }
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
                          EVT) const override;
-  bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+  bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+                                  EVT VT) const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT,
                     bool ForCodeSize) const override;
   bool isLegalICmpImmediate(int64_t Imm) const override;
@@ -530,11 +549,15 @@ private:
   // Implement LowerOperation for individual opcodes.
   SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
                        const SDLoc &DL, EVT VT,
-                       SDValue CmpOp0, SDValue CmpOp1) const;
+                       SDValue CmpOp0, SDValue CmpOp1, SDValue Chain) const;
   SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL,
                            EVT VT, ISD::CondCode CC,
-                           SDValue CmpOp0, SDValue CmpOp1) const;
+                           SDValue CmpOp0, SDValue CmpOp1,
+                           SDValue Chain = SDValue(),
+                           bool IsSignaling = false) const;
   SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG,
+                             bool IsSignaling) const;
   SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 9c95e8aec940..6d03274fe8a6 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -221,13 +221,13 @@ let Predicates = [FeatureNoVectorEnhancements1] in {
 
 // Convert a signed integer register value to a floating-point one.
 let Uses = [FPC], mayRaiseFPException = 1 in {
-  def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32,  GR32>;
-  def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64,  GR32>;
-  def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
+  def CEFBR : UnaryRRE<"cefbr", 0xB394, any_sint_to_fp, FP32,  GR32>;
+  def CDFBR : UnaryRRE<"cdfbr", 0xB395, any_sint_to_fp, FP64,  GR32>;
+  def CXFBR : UnaryRRE<"cxfbr", 0xB396, any_sint_to_fp, FP128, GR32>;
 
-  def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32,  GR64>;
-  def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64,  GR64>;
-  def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+  def CEGBR : UnaryRRE<"cegbr", 0xB3A4, any_sint_to_fp, FP32,  GR64>;
+  def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, any_sint_to_fp, FP64,  GR64>;
+  def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, any_sint_to_fp, FP128, GR64>;
 }
 
 // The FP extension feature provides versions of the above that allow
@@ -254,13 +254,13 @@ let Predicates = [FeatureFPExtension] in {
     def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>;
   }
 
-  def : Pat<(f32  (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>;
-  def : Pat<(f64  (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>;
-  def : Pat<(f128 (uint_to_fp GR32:$src)), (CXLFBR 0, GR32:$src, 0)>;
+  def : Pat<(f32  (any_uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>;
+  def : Pat<(f64  (any_uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>;
+  def : Pat<(f128 (any_uint_to_fp GR32:$src)), (CXLFBR 0, GR32:$src, 0)>;
 
-  def : Pat<(f32  (uint_to_fp GR64:$src)), (CELGBR 0, GR64:$src, 0)>;
-  def : Pat<(f64  (uint_to_fp GR64:$src)), (CDLGBR 0, GR64:$src, 0)>;
-  def : Pat<(f128 (uint_to_fp GR64:$src)), (CXLGBR 0, GR64:$src, 0)>;
+  def : Pat<(f32  (any_uint_to_fp GR64:$src)), (CELGBR 0, GR64:$src, 0)>;
+  def : Pat<(f64  (any_uint_to_fp GR64:$src)), (CDLGBR 0, GR64:$src, 0)>;
+  def : Pat<(f128 (any_uint_to_fp GR64:$src)), (CXLGBR 0, GR64:$src, 0)>;
 }
 
 // Convert a floating-point register value to a signed integer value,
@@ -467,16 +467,16 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
 // f64 multiplication of two FP32 registers.
 let Uses = [FPC], mayRaiseFPException = 1 in
   def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
-def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
-                    (f64 (fpextend FP32:$src2))),
+def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)),
+                    (f64 (any_fpextend FP32:$src2))),
           (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
                                 FP32:$src1, subreg_h32), FP32:$src2)>;
 
 // f64 multiplication of an FP32 register and an f32 memory.
 let Uses = [FPC], mayRaiseFPException = 1 in
   def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
-def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
-                    (f64 (extloadf32 bdxaddr12only:$addr))),
+def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)),
+                    (f64 (any_extloadf32 bdxaddr12only:$addr))),
           (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
                 bdxaddr12only:$addr)>;
 
@@ -484,8 +484,8 @@ def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
 let Uses = [FPC], mayRaiseFPException = 1 in
   def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
 let Predicates = [FeatureNoVectorEnhancements1] in
-  def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
-                      (f128 (fpextend FP64:$src2))),
+  def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)),
+                      (f128 (any_fpextend FP64:$src2))),
             (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
                                   FP64:$src1, subreg_h64), FP64:$src2)>;
 
@@ -493,8 +493,8 @@ let Predicates = [FeatureNoVectorEnhancements1] in
 let Uses = [FPC], mayRaiseFPException = 1 in
   def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
 let Predicates = [FeatureNoVectorEnhancements1] in
-  def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
-                      (f128 (extloadf64 bdxaddr12only:$addr))),
+  def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)),
+                      (f128 (any_extloadf64 bdxaddr12only:$addr))),
             (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
                   bdxaddr12only:$addr)>;
 
@@ -537,19 +537,19 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
 //===----------------------------------------------------------------------===//
 
 let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in {
-  def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32,  FP32>;
-  def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64,  FP64>;
-  def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
+  def CEBR : CompareRRE<"cebr", 0xB309, z_any_fcmp, FP32,  FP32>;
+  def CDBR : CompareRRE<"cdbr", 0xB319, z_any_fcmp, FP64,  FP64>;
+  def CXBR : CompareRRE<"cxbr", 0xB349, z_any_fcmp, FP128, FP128>;
 
-  def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>;
-  def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>;
+  def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, load, 4>;
+  def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, load, 8>;
 
-  def KEBR : CompareRRE<"kebr", 0xB308, null_frag, FP32,  FP32>;
-  def KDBR : CompareRRE<"kdbr", 0xB318, null_frag, FP64,  FP64>;
-  def KXBR : CompareRRE<"kxbr", 0xB348, null_frag, FP128, FP128>;
+  def KEBR : CompareRRE<"kebr", 0xB308, z_strict_fcmps, FP32,  FP32>;
+  def KDBR : CompareRRE<"kdbr", 0xB318, z_strict_fcmps, FP64,  FP64>;
+  def KXBR : CompareRRE<"kxbr", 0xB348, z_strict_fcmps, FP128, FP128>;
 
-  def KEB : CompareRXE<"keb", 0xED08, null_frag, FP32, load, 4>;
-  def KDB : CompareRXE<"kdb", 0xED18, null_frag, FP64, load, 8>;
+  def KEB : CompareRXE<"keb", 0xED08, z_strict_fcmps, FP32, load, 4>;
+  def KDB : CompareRXE<"kdb", 0xED18, z_strict_fcmps, FP64, load, 8>;
 }
 
 // Test Data Class.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index c9dbe3da686d..f064d33ac2f3 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -75,8 +75,9 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
   // SystemZ::CCMASK_*.
   bits<4> CCValues = 0;
 
-  // The subset of CCValues that have the same meaning as they would after
-  // a comparison of the first operand against zero.
+  // The subset of CCValues that have the same meaning as they would after a
+  // comparison of the first operand against zero. "Logical" instructions
+  // leave this blank as they set CC in a different way.
   bits<4> CompareZeroCCMask = 0;
 
   // True if the instruction is conditional and if the CC mask operand
@@ -87,9 +88,16 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
   bit CCMaskLast = 0;
 
   // True if the instruction is the "logical" rather than "arithmetic" form,
-  // in cases where a distinction exists.
+  // in cases where a distinction exists. Except for logical compares, if the
+  // instruction sets this flag along with a non-zero CCValues field, it is
+  // assumed to set CC to either CCMASK_LOGICAL_ZERO or
+  // CCMASK_LOGICAL_NONZERO.
   bit IsLogical = 0;
 
+  // True if the (add or sub) instruction sets CC like a compare of the
+  // result against zero, but only if the 'nsw' flag is set.
+  bit CCIfNoSignedWrap = 0;
+
   let TSFlags{0}     = SimpleBDXLoad;
   let TSFlags{1}     = SimpleBDXStore;
   let TSFlags{2}     = Has20BitOffset;
@@ -101,6 +109,7 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
   let TSFlags{18}    = CCMaskFirst;
   let TSFlags{19}    = CCMaskLast;
   let TSFlags{20}    = IsLogical;
+  let TSFlags{21}    = CCIfNoSignedWrap;
 }
 
 //===----------------------------------------------------------------------===//
@@ -3200,6 +3209,8 @@ class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
   let CCMaskLast = 1;
+  let NumOpsKey = !subst("loc", "sel", mnemonic);
+  let NumOpsValue = "2";
 }
 
 // Like CondBinaryRRF, but used for the raw assembly form.  The condition-code
@@ -3239,6 +3250,8 @@ class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
              [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
                                               cond4:$valid, cond4:$M4))]> {
   let CCMaskLast = 1;
+  let NumOpsKey = mnemonic;
+  let NumOpsValue = "3";
 }
 
 // Like CondBinaryRRFa, but used for the raw assembly form.  The condition-code
@@ -4789,7 +4802,8 @@ class TestBinarySILPseudo<SDPatternOperator operator, ImmOpWithPattern imm>
 
 // Like CondBinaryRRF, but expanded after RA depending on the choice of
 // register.
-class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
+class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1,
+                          RegisterOperand cls2>
   : Pseudo<(outs cls1:$R1),
            (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3),
            [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls1:$R1src,
@@ -4797,17 +4811,21 @@ class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
   let CCMaskLast = 1;
+  let NumOpsKey = !subst("loc", "sel", mnemonic);
+  let NumOpsValue = "2";
 }
 
 // Like CondBinaryRRFa, but expanded after RA depending on the choice of
 // register.
-class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
-                           RegisterOperand cls3>
+class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1,
+                           RegisterOperand cls2, RegisterOperand cls3>
   : Pseudo<(outs cls1:$R1),
            (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
            [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
                                             cond4:$valid, cond4:$M4))]> {
   let CCMaskLast = 1;
+  let NumOpsKey = mnemonic;
+  let NumOpsValue = "3";
 }
 
 // Like CondBinaryRIE, but expanded after RA depending on the choice of
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index bc783608d45b..97c8fa7aa32e 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -765,8 +765,8 @@ bool SystemZInstrInfo::PredicateInstruction(
 
 void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
-                                   const DebugLoc &DL, unsigned DestReg,
-                                   unsigned SrcReg, bool KillSrc) const {
+                                   const DebugLoc &DL, MCRegister DestReg,
+                                   MCRegister SrcReg, bool KillSrc) const {
   // Split 128-bit GPR moves into two 64-bit moves. Add implicit uses of the
   // super register in case one of the subregs is undefined.
   // This handles ADDR128 too.
@@ -791,12 +791,12 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // Move 128-bit floating-point values between VR128 and FP128.
   if (SystemZ::VR128BitRegClass.contains(DestReg) &&
       SystemZ::FP128BitRegClass.contains(SrcReg)) {
-    unsigned SrcRegHi =
-      RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64),
-                             SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
-    unsigned SrcRegLo =
-      RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64),
-                             SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
+    MCRegister SrcRegHi =
+        RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64),
+                               SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
+    MCRegister SrcRegLo =
+        RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64),
+                               SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
 
     BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg)
       .addReg(SrcRegHi, getKillRegState(KillSrc))
@@ -805,12 +805,12 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   }
   if (SystemZ::FP128BitRegClass.contains(DestReg) &&
       SystemZ::VR128BitRegClass.contains(SrcReg)) {
-    unsigned DestRegHi =
-      RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64),
-                             SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
-    unsigned DestRegLo =
-      RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64),
-                             SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
+    MCRegister DestRegHi =
+        RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64),
+                               SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
+    MCRegister DestRegLo =
+        RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64),
+                               SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
 
     if (DestRegHi != SrcReg)
       copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false);
@@ -945,6 +945,12 @@ static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) {
   }
 }
 
+static void transferMIFlag(MachineInstr *OldMI, MachineInstr *NewMI,
+                           MachineInstr::MIFlag Flag) {
+  if (OldMI->getFlag(Flag))
+    NewMI->setFlag(Flag);
+}
+
 MachineInstr *SystemZInstrInfo::convertToThreeAddress(
     MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
   MachineBasicBlock *MBB = MI.getParent();
@@ -1050,6 +1056,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
             .addImm(0)
             .addImm(MI.getOperand(2).getImm());
     transferDeadCC(&MI, BuiltMI);
+    transferMIFlag(&MI, BuiltMI, MachineInstr::NoSWrap);
     return BuiltMI;
   }
 
@@ -1200,6 +1207,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
       if (MemDesc.TSFlags & SystemZII::HasIndex)
         MIB.addReg(0);
       transferDeadCC(&MI, MIB);
+      transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
       return MIB;
     }
   }
@@ -1748,6 +1756,28 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
   BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
 }
 
+bool SystemZInstrInfo::verifyInstruction(const MachineInstr &MI,
+                                         StringRef &ErrInfo) const {
+  const MCInstrDesc &MCID = MI.getDesc();
+  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+    if (I >= MCID.getNumOperands())
+      break;
+    const MachineOperand &Op = MI.getOperand(I);
+    const MCOperandInfo &MCOI = MCID.OpInfo[I];
+    // Addressing modes have register and immediate operands. Op should be a
+    // register (or frame index) operand if MCOI.RegClass contains a valid
+    // register class, or an immediate otherwise.
+    if (MCOI.OperandType == MCOI::OPERAND_MEMORY &&
+        ((MCOI.RegClass != -1 && !Op.isReg() && !Op.isFI()) ||
+         (MCOI.RegClass == -1 && !Op.isImm()))) {
+      ErrInfo = "Addressing mode operands corrupt!";
+      return false;
+    }
+  }
+
+  return true;
+}
+
 bool SystemZInstrInfo::
 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
                                 const MachineInstr &MIb) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 6dc6e72aa52a..8391970c7d9d 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -46,7 +46,8 @@ enum {
   CompareZeroCCMaskShift = 14,
   CCMaskFirst            = (1 << 18),
   CCMaskLast             = (1 << 19),
-  IsLogical              = (1 << 20)
+  IsLogical              = (1 << 20),
+  CCIfNoSignedWrap       = (1 << 21)
 };
 
 static inline unsigned getAccessSize(unsigned int Flags) {
@@ -242,7 +243,7 @@ public:
   bool PredicateInstruction(MachineInstr &MI,
                             ArrayRef<MachineOperand> Pred) const override;
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                   const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+                   const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
                    bool KillSrc) const override;
   void storeRegToStackSlot(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI,
@@ -322,6 +323,10 @@ public:
                      MachineBasicBlock::iterator MBBI,
                      unsigned Reg, uint64_t Value) const;
 
+  // Perform target specific instruction verification.
+  bool verifyInstruction(const MachineInstr &MI,
+                         StringRef &ErrInfo) const override;
+
   // Sometimes, it is possible for the target to tell, even without
   // aliasing information, that two MIs access different memory
   // addresses. This function returns true if two MIs access different
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 8b334756611a..9579dcc0d1b6 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -492,7 +492,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
   let isCommutable = 1 in {
     // Expands to SELR or SELFHR or a branch-and-move sequence,
     // depending on the choice of registers.
-    def  SELRMux : CondBinaryRRFaPseudo<GRX32, GRX32, GRX32>;
+    def  SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>;
     defm SELFHR  : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
     defm SELR    : CondBinaryRRFaPair<"selr",   0xB9F0, GR32, GR32, GR32>;
     defm SELGR   : CondBinaryRRFaPair<"selgr",  0xB9E3, GR64, GR64, GR64>;
@@ -525,7 +525,7 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
   let isCommutable = 1 in {
     // Expands to LOCR or LOCFHR or a branch-and-move sequence,
     // depending on the choice of registers.
-    def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>;
+    def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>;
     defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
   }
 
@@ -915,7 +915,7 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
 //===----------------------------------------------------------------------===//
 
 // Addition producing a signed overflow flag.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+let Defs = [CC], CCValues = 0xF, CCIfNoSignedWrap = 1 in {
   // Addition of a register.
   let isCommutable = 1 in {
     defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>;
@@ -957,7 +957,7 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
 defm : SXB<z_sadd, GR64, AGFR>;
 
 // Addition producing a carry.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xF, IsLogical = 1 in {
   // Addition of a register.
   let isCommutable = 1 in {
     defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uadd, GR32, GR32>;
@@ -997,7 +997,7 @@ let Defs = [CC] in {
 defm : ZXB<z_uadd, GR64, ALGFR>;
 
 // Addition producing and using a carry.
-let Defs = [CC], Uses = [CC] in {
+let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in {
   // Addition of a register.
   def ALCR  : BinaryRRE<"alcr",  0xB998, z_addcarry, GR32, GR32>;
   def ALCGR : BinaryRRE<"alcgr", 0xB988, z_addcarry, GR64, GR64>;
@@ -1017,7 +1017,8 @@ def ALSIHN : BinaryRIL<"alsihn", 0xCCB, null_frag, GRH32, simm32>,
 //===----------------------------------------------------------------------===//
 
 // Subtraction producing a signed overflow flag.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8,
+    CCIfNoSignedWrap = 1 in {
   // Subtraction of a register.
   defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>;
   def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
@@ -1066,7 +1067,7 @@ def : Pat<(z_saddo GR64:$src1, imm64lf32n:$src2),
           (SGR GR64:$src1, (LLILF imm64lf32n:$src2))>;
 
 // Subtraction producing a carry.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0x7, IsLogical = 1 in {
   // Subtraction of a register.
   defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usub, GR32, GR32>;
   def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
@@ -1104,7 +1105,7 @@ def : Pat<(add GR64:$src1, imm64zx32n:$src2),
           (SLGFI GR64:$src1, imm64zx32n:$src2)>;
 
 // Subtraction producing and using a carry.
-let Defs = [CC], Uses = [CC] in {
+let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in {
   // Subtraction of a register.
   def SLBR  : BinaryRRE<"slbr",  0xB999, z_subcarry, GR32, GR32>;
   def SLBGR : BinaryRRE<"slbgr", 0xB989, z_subcarry, GR64, GR64>;
@@ -2069,7 +2070,7 @@ let Predicates = [FeatureProcessorAssist] in {
     def PPA : SideEffectTernaryRRFc<"ppa", 0xB2E8, GR64, GR64, imm32zx4>;
   def : Pat<(int_s390_ppa_txassist GR32:$src),
             (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
-                 0, 1)>;
+                 zero_reg, 1)>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 02364bbda5c1..c945122ee577 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1034,7 +1034,7 @@ let Predicates = [FeatureVector] in {
     def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
     def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
   }
-  def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
+  def : FPConversion<VCDGB, any_sint_to_fp, v128db, v128g, 0, 0>;
   let Predicates = [FeatureVectorEnhancements2] in {
     let Uses = [FPC], mayRaiseFPException = 1 in {
       let isAsmParserOnly = 1 in
@@ -1042,7 +1042,7 @@ let Predicates = [FeatureVector] in {
       def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>;
       def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>;
     }
-    def : FPConversion<VCEFB, sint_to_fp, v128sb, v128f, 0, 0>;
+    def : FPConversion<VCEFB, any_sint_to_fp, v128sb, v128f, 0, 0>;
   }
 
   // Convert from logical.
@@ -1051,7 +1051,7 @@ let Predicates = [FeatureVector] in {
     def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
     def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
   }
-  def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
+  def : FPConversion<VCDLGB, any_uint_to_fp, v128db, v128g, 0, 0>;
   let Predicates = [FeatureVectorEnhancements2] in {
     let Uses = [FPC], mayRaiseFPException = 1 in {
       let isAsmParserOnly = 1 in
@@ -1059,7 +1059,7 @@ let Predicates = [FeatureVector] in {
       def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>;
       def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>;
     }
-    def : FPConversion<VCELFB, uint_to_fp, v128sb, v128f, 0, 0>;
+    def : FPConversion<VCELFB, any_uint_to_fp, v128sb, v128f, 0, 0>;
   }
 
   // Convert to fixed.
@@ -1134,7 +1134,7 @@ let Predicates = [FeatureVector] in {
   // Load lengthened.
   let Uses = [FPC], mayRaiseFPException = 1 in {
     def VLDE  : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
-    def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
+    def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>;
     def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
@@ -1156,7 +1156,7 @@ let Predicates = [FeatureVector] in {
     def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
     def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
   }
-  def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
+  def : Pat<(v4f32 (z_any_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
   def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>;
   let Predicates = [FeatureVectorEnhancements1] in {
     let Uses = [FPC], mayRaiseFPException = 1 in {
@@ -1175,7 +1175,7 @@ let Predicates = [FeatureVector] in {
   // Maximum.
   multiclass VectorMax<Instruction insn, TypedReg tr> {
     def : FPMinMax<insn, any_fmaxnum, tr, 4>;
-    def : FPMinMax<insn, fmaximum, tr, 1>;
+    def : FPMinMax<insn, any_fmaximum, tr, 1>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
     let Uses = [FPC], mayRaiseFPException = 1 in {
@@ -1201,7 +1201,7 @@ let Predicates = [FeatureVector] in {
   // Minimum.
   multiclass VectorMin<Instruction insn, TypedReg tr> {
     def : FPMinMax<insn, any_fminnum, tr, 4>;
-    def : FPMinMax<insn, fminimum, tr, 1>;
+    def : FPMinMax<insn, any_fminimum, tr, 1>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
     let Uses = [FPC], mayRaiseFPException = 1 in {
@@ -1364,32 +1364,32 @@ let Predicates = [FeatureVector] in {
   // Compare scalar.
   let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
     def WFC   : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
-    def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
+    def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3>;
     let Predicates = [FeatureVectorEnhancements1] in {
-      def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>;
-      def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>;
+      def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2>;
+      def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>;
     }
   }
 
   // Compare and signal scalar.
   let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
     def WFK   : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
-    def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
+    def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3>;
     let Predicates = [FeatureVectorEnhancements1] in {
-      def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>;
-      def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>;
+      def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2>;
+      def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, z_strict_fcmps, v128xb, 4>;
     }
   }
 
   // Compare equal.
   let Uses = [FPC], mayRaiseFPException = 1 in {
     def  VFCE   : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
-    defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+    defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_any_vfcmpe, z_vfcmpes,
                                   v128g, v128db, 3, 0>;
     defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
                                   v64g, v64db, 3, 8>;
     let Predicates = [FeatureVectorEnhancements1] in {
-      defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+      defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_any_vfcmpe, z_vfcmpes,
                                     v128f, v128sb, 2, 0>;
       defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
                                     v32f, v32sb, 2, 8>;
@@ -1401,11 +1401,11 @@ let Predicates = [FeatureVector] in {
   // Compare and signal equal.
   let Uses = [FPC], mayRaiseFPException = 1,
       Predicates = [FeatureVectorEnhancements1] in {
-    defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag,
+    defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, z_strict_vfcmpes, null_frag,
                                   v128g, v128db, 3, 4>;
     defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
                                   v64g, v64db, 3, 12>;
-    defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, null_frag, null_frag,
+    defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, z_strict_vfcmpes, null_frag,
                                   v128f, v128sb, 2, 4>;
     defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag,
                                   v32f, v32sb, 2, 12>;
@@ -1416,12 +1416,12 @@ let Predicates = [FeatureVector] in {
   // Compare high.
   let Uses = [FPC], mayRaiseFPException = 1 in {
     def  VFCH   : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
-    defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
+    defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_any_vfcmph, z_vfcmphs,
                                   v128g, v128db, 3, 0>;
     defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
                                   v64g, v64db, 3, 8>;
     let Predicates = [FeatureVectorEnhancements1] in {
-      defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
+      defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_any_vfcmph, z_vfcmphs,
                                     v128f, v128sb, 2, 0>;
       defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
                                     v32f, v32sb, 2, 8>;
@@ -1433,11 +1433,11 @@ let Predicates = [FeatureVector] in {
   // Compare and signal high.
   let Uses = [FPC], mayRaiseFPException = 1,
       Predicates = [FeatureVectorEnhancements1] in {
-    defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag,
+    defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, z_strict_vfcmphs, null_frag,
                                   v128g, v128db, 3, 4>;
     defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
                                   v64g, v64db, 3, 12>;
-    defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, null_frag, null_frag,
+    defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, z_strict_vfcmphs, null_frag,
                                   v128f, v128sb, 2, 4>;
     defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag,
                                   v32f, v32sb, 2, 12>;
@@ -1448,12 +1448,12 @@ let Predicates = [FeatureVector] in {
   // Compare high or equal.
   let Uses = [FPC], mayRaiseFPException = 1 in {
     def  VFCHE   : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
-    defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+    defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_any_vfcmphe, z_vfcmphes,
                                    v128g, v128db, 3, 0>;
     defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
                                    v64g, v64db, 3, 8>;
     let Predicates = [FeatureVectorEnhancements1] in {
-      defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+      defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_any_vfcmphe, z_vfcmphes,
                                      v128f, v128sb, 2, 0>;
       defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
                                      v32f, v32sb, 2, 8>;
@@ -1465,11 +1465,11 @@ let Predicates = [FeatureVector] in {
   // Compare and signal high or equal.
   let Uses = [FPC], mayRaiseFPException = 1,
       Predicates = [FeatureVectorEnhancements1] in {
-    defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag,
+    defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, z_strict_vfcmphes, null_frag,
                                    v128g, v128db, 3, 4>;
     defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
                                    v64g, v64db, 3, 12>;
-    defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, null_frag, null_frag,
+    defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, z_strict_vfcmphes, null_frag,
                                    v128f, v128sb, 2, 4>;
     defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag,
                                    v32f, v32sb, 2, 12>;
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index 724111229569..b1964321c78a 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -162,7 +162,7 @@ private:
   void relaxBranches();
 
   const SystemZInstrInfo *TII = nullptr;
-  MachineFunction *MF;
+  MachineFunction *MF = nullptr;
   SmallVector<MBBInfo, 16> MBBs;
   SmallVector<TerminatorInfo, 16> Terminators;
 };
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 9eec3f37bc28..d1f6511ceea3 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -13,10 +13,22 @@
 
 namespace llvm {
 
+namespace SystemZ {
+// A struct to hold the low and high GPR registers to be saved/restored as
+// well as the offset into the register save area of the low register.
+struct GPRRegs {
+  unsigned LowGPR;
+  unsigned HighGPR;
+  unsigned GPROffset;
+  GPRRegs() : LowGPR(0), HighGPR(0), GPROffset(0) {}
+  };
+}
+
 class SystemZMachineFunctionInfo : public MachineFunctionInfo {
   virtual void anchor();
-  unsigned LowSavedGPR;
-  unsigned HighSavedGPR;
+
+  SystemZ::GPRRegs SpillGPRRegs;
+  SystemZ::GPRRegs RestoreGPRRegs;
   unsigned VarArgsFirstGPR;
   unsigned VarArgsFirstFPR;
   unsigned VarArgsFrameIndex;
@@ -27,19 +39,29 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
 
 public:
   explicit SystemZMachineFunctionInfo(MachineFunction &MF)
-    : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
-      VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0),
-      ManipulatesSP(false), NumLocalDynamics(0) {}
-
-  // Get and set the first call-saved GPR that should be saved and restored
-  // by this function.  This is 0 if no GPRs need to be saved or restored.
-  unsigned getLowSavedGPR() const { return LowSavedGPR; }
-  void setLowSavedGPR(unsigned Reg) { LowSavedGPR = Reg; }
-
-  // Get and set the last call-saved GPR that should be saved and restored
-  // by this function.
-  unsigned getHighSavedGPR() const { return HighSavedGPR; }
-  void setHighSavedGPR(unsigned Reg) { HighSavedGPR = Reg; }
+    : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0),
+      RegSaveFrameIndex(0), FramePointerSaveIndex(0), ManipulatesSP(false),
+      NumLocalDynamics(0) {}
+
+  // Get and set the first and last call-saved GPR that should be saved by
+  // this function and the SP offset for the STMG.  These are 0 if no GPRs
+  // need to be saved or restored.
+  SystemZ::GPRRegs getSpillGPRRegs() const { return SpillGPRRegs; }
+  void setSpillGPRRegs(unsigned Low, unsigned High, unsigned Offs) {
+    SpillGPRRegs.LowGPR = Low;
+    SpillGPRRegs.HighGPR = High;
+    SpillGPRRegs.GPROffset = Offs;
+  }
+
+  // Get and set the first and last call-saved GPR that should be restored by
+  // this function and the SP offset for the LMG.  These are 0 if no GPRs
+  // need to be saved or restored.
+  SystemZ::GPRRegs getRestoreGPRRegs() const { return RestoreGPRRegs; }
+  void setRestoreGPRRegs(unsigned Low, unsigned High, unsigned Offs) {
+    RestoreGPRRegs.LowGPR = Low;
+    RestoreGPRRegs.HighGPR = High;
+    RestoreGPRRegs.GPROffset = Offs;
+  }
 
   // Get and set the number of fixed (as opposed to variable) arguments
   // that are passed in GPRs to this function.
diff --git a/llvm/lib/Target/SystemZ/SystemZOperands.td b/llvm/lib/Target/SystemZ/SystemZOperands.td
index b2bab68a6274..bd40f6d7bf40 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -25,6 +25,7 @@ class ImmediateOp<ValueType vt, string asmop> : Operand<vt> {
   let PrintMethod = "print"##asmop##"Operand";
   let DecoderMethod = "decode"##asmop##"Operand";
   let ParserMatchClass = !cast<AsmOperandClass>(asmop);
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 class ImmOpWithPattern<ValueType vt, string asmop, code pred, SDNodeXForm xform,
@@ -63,13 +64,15 @@ class PCRelTLSAsmOperand<string size>
 
 // Constructs an operand for a PC-relative address with address type VT.
 // ASMOP is the associated asm operand.
-class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
-  let PrintMethod = "printPCRelOperand";
-  let ParserMatchClass = asmop;
-}
-class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
-  let PrintMethod = "printPCRelTLSOperand";
-  let ParserMatchClass = asmop;
+let OperandType = "OPERAND_PCREL" in {
+  class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
+    let PrintMethod = "printPCRelOperand";
+    let ParserMatchClass = asmop;
+  }
+  class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
+    let PrintMethod = "printPCRelTLSOperand";
+    let ParserMatchClass = asmop;
+  }
 }
 
 // Constructs both a DAG pattern and instruction operand for a PC-relative
@@ -105,6 +108,7 @@ class AddressOperand<string bitsize, string dispsize, string length,
   let EncoderMethod = "get"##format##dispsize##length##"Encoding";
   let DecoderMethod =
     "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
+  let OperandType = "OPERAND_MEMORY";
   let MIOperandInfo = operands;
   let ParserMatchClass =
     !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
@@ -508,7 +512,8 @@ defm imm64zx48 : Immediate<i64, [{
   return isUInt<64>(N->getZExtValue());
 }], UIMM48, "U48Imm">;
 
-def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>;
+let OperandType = "OPERAND_IMMEDIATE" in
+  def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>;
 
 //===----------------------------------------------------------------------===//
 // Floating-point immediates
@@ -657,4 +662,5 @@ def bdvaddr12only     : BDVMode<            "64", "12">;
 def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>,
             Operand<i32> {
   let PrintMethod = "printCond4Operand";
+  let OperandType = "OPERAND_IMMEDIATE";
 }
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 6fe383e64b74..a6a72903e573 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -258,6 +258,10 @@ def z_pcrel_offset      : SDNode<"SystemZISD::PCREL_OFFSET",
 def z_iabs              : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>;
 def z_icmp              : SDNode<"SystemZISD::ICMP", SDT_ZICmp>;
 def z_fcmp              : SDNode<"SystemZISD::FCMP", SDT_ZCmp>;
+def z_strict_fcmp       : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp,
+                                 [SDNPHasChain]>;
+def z_strict_fcmps      : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp,
+                                 [SDNPHasChain]>;
 def z_tm                : SDNode<"SystemZISD::TM", SDT_ZICmp>;
 def z_br_ccmask_1       : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
                                  [SDNPHasChain]>;
@@ -328,13 +332,29 @@ def z_vicmpes           : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinaryCC>;
 def z_vicmphs           : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinaryCC>;
 def z_vicmphls          : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinaryCC>;
 def z_vfcmpe            : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
+def z_strict_vfcmpe     : SDNode<"SystemZISD::STRICT_VFCMPE",
+                                 SDT_ZVecBinaryConv, [SDNPHasChain]>;
+def z_strict_vfcmpes    : SDNode<"SystemZISD::STRICT_VFCMPES",
+                                 SDT_ZVecBinaryConv, [SDNPHasChain]>;
 def z_vfcmph            : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
+def z_strict_vfcmph     : SDNode<"SystemZISD::STRICT_VFCMPH",
+                                 SDT_ZVecBinaryConv, [SDNPHasChain]>;
+def z_strict_vfcmphs    : SDNode<"SystemZISD::STRICT_VFCMPHS",
+                                 SDT_ZVecBinaryConv, [SDNPHasChain]>;
 def z_vfcmphe           : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
+def z_strict_vfcmphe    : SDNode<"SystemZISD::STRICT_VFCMPHE",
+                                 SDT_ZVecBinaryConv, [SDNPHasChain]>;
+def z_strict_vfcmphes   : SDNode<"SystemZISD::STRICT_VFCMPHES",
+                                 SDT_ZVecBinaryConv, [SDNPHasChain]>;
 def z_vfcmpes           : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>;
 def z_vfcmphs           : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>;
 def z_vfcmphes          : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>;
 def z_vextend           : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
+def z_strict_vextend    : SDNode<"SystemZISD::STRICT_VEXTEND",
+                                 SDT_ZVecUnaryConv, [SDNPHasChain]>;
 def z_vround            : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
+def z_strict_vround     : SDNode<"SystemZISD::STRICT_VROUND",
+                                 SDT_ZVecUnaryConv, [SDNPHasChain]>;
 def z_vtm               : SDNode<"SystemZISD::VTM", SDT_ZCmp>;
 def z_vfae_cc           : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>;
 def z_vfaez_cc          : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>;
@@ -707,6 +727,26 @@ def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
 // Floating-point negative absolute.
 def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
 
+// Strict floating-point fragments.
+def z_any_fcmp    : PatFrags<(ops node:$lhs, node:$rhs),
+                             [(z_strict_fcmp node:$lhs, node:$rhs),
+                              (z_fcmp node:$lhs, node:$rhs)]>;
+def z_any_vfcmpe  : PatFrags<(ops node:$lhs, node:$rhs),
+                             [(z_strict_vfcmpe node:$lhs, node:$rhs),
+                              (z_vfcmpe node:$lhs, node:$rhs)]>;
+def z_any_vfcmph  : PatFrags<(ops node:$lhs, node:$rhs),
+                             [(z_strict_vfcmph node:$lhs, node:$rhs),
+                              (z_vfcmph node:$lhs, node:$rhs)]>;
+def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs),
+                             [(z_strict_vfcmphe node:$lhs, node:$rhs),
+                              (z_vfcmphe node:$lhs, node:$rhs)]>;
+def z_any_vextend : PatFrags<(ops node:$src),
+                             [(z_strict_vextend node:$src),
+                              (z_vextend node:$src)]>;
+def z_any_vround  : PatFrags<(ops node:$src),
+                             [(z_strict_vround node:$src),
+                              (z_vround node:$src)]>;
+
 // Create a unary operator that loads from memory and then performs
 // the given operation on it.
 class loadu<SDPatternOperator operator, SDPatternOperator load = load>
diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td
index 65300fb47627..501a69488397 100644
--- a/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -148,9 +148,9 @@ multiclass BlockLoadStore<SDPatternOperator load, ValueType vt,
 // registers in CLS against zero.  The instruction has separate R1 and R2
 // operands, but they must be the same when the instruction is used like this.
 multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
-  def : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
+  def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
   // The sign of the zero makes no difference.
-  def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
+  def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
 }
 
 // Use INSN for performing binary operation OPERATION of type VT
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 39ace5594b7f..0d5e7af92523 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -87,6 +87,52 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
   bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
       VirtReg, Order, Hints, MF, VRM, Matrix);
 
+  if (VRM != nullptr) {
+    // Add any two address hints after any copy hints.
+    SmallSet<unsigned, 4> TwoAddrHints;
+    for (auto &Use : MRI->reg_nodbg_instructions(VirtReg))
+      if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) {
+        const MachineOperand *VRRegMO = nullptr;
+        const MachineOperand *OtherMO = nullptr;
+        const MachineOperand *CommuMO = nullptr;
+        if (VirtReg == Use.getOperand(0).getReg()) {
+          VRRegMO = &Use.getOperand(0);
+          OtherMO = &Use.getOperand(1);
+          if (Use.isCommutable())
+            CommuMO = &Use.getOperand(2);
+        } else if (VirtReg == Use.getOperand(1).getReg()) {
+          VRRegMO = &Use.getOperand(1);
+          OtherMO = &Use.getOperand(0);
+        } else if (VirtReg == Use.getOperand(2).getReg() &&
+                   Use.isCommutable()) {
+          VRRegMO = &Use.getOperand(2);
+          OtherMO = &Use.getOperand(0);
+        } else
+          continue;
+
+        auto tryAddHint = [&](const MachineOperand *MO) -> void {
+          Register Reg = MO->getReg();
+          Register PhysReg =
+            Register::isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
+          if (PhysReg) {
+            if (MO->getSubReg())
+              PhysReg = getSubReg(PhysReg, MO->getSubReg());
+            if (VRRegMO->getSubReg())
+              PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(),
+                                            MRI->getRegClass(VirtReg));
+            if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
+              TwoAddrHints.insert(PhysReg);
+          }
+        };
+        tryAddHint(OtherMO);
+        if (CommuMO)
+          tryAddHint(CommuMO);
+      }
+    for (MCPhysReg OrderReg : Order)
+      if (TwoAddrHints.count(OrderReg))
+        Hints.push_back(OrderReg);
+  }
+
   if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) {
     SmallVector<unsigned, 8> Worklist;
     SmallSet<unsigned, 4> DoneRegs;
@@ -143,58 +189,14 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
     }
   }
 
-  if (VRM == nullptr)
-    return BaseImplRetVal;
-
-  // Add any two address hints after any copy hints.
-  SmallSet<unsigned, 4> TwoAddrHints;
-  for (auto &Use : MRI->reg_nodbg_instructions(VirtReg))
-    if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) {
-      const MachineOperand *VRRegMO = nullptr;
-      const MachineOperand *OtherMO = nullptr;
-      const MachineOperand *CommuMO = nullptr;
-      if (VirtReg == Use.getOperand(0).getReg()) {
-        VRRegMO = &Use.getOperand(0);
-        OtherMO = &Use.getOperand(1);
-        if (Use.isCommutable())
-          CommuMO = &Use.getOperand(2);
-      } else if (VirtReg == Use.getOperand(1).getReg()) {
-        VRRegMO = &Use.getOperand(1);
-        OtherMO = &Use.getOperand(0);
-      } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) {
-        VRRegMO = &Use.getOperand(2);
-        OtherMO = &Use.getOperand(0);
-      } else
-        continue;
-
-      auto tryAddHint = [&](const MachineOperand *MO) -> void {
-        Register Reg = MO->getReg();
-        Register PhysReg =
-            Register::isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
-        if (PhysReg) {
-          if (MO->getSubReg())
-            PhysReg = getSubReg(PhysReg, MO->getSubReg());
-          if (VRRegMO->getSubReg())
-            PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(),
-                                          MRI->getRegClass(VirtReg));
-          if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
-            TwoAddrHints.insert(PhysReg);
-        }
-      };
-      tryAddHint(OtherMO);
-      if (CommuMO)
-        tryAddHint(CommuMO);
-    }
-  for (MCPhysReg OrderReg : Order)
-    if (TwoAddrHints.count(OrderReg))
-      Hints.push_back(OrderReg);
-
   return BaseImplRetVal;
 }
 
 const MCPhysReg *
 SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>();
+  if (MF->getFunction().getCallingConv() == CallingConv::GHC)
+    return CSR_SystemZ_NoRegs_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
     return Subtarget.hasVector()? CSR_SystemZ_AllRegs_Vector_SaveList
                                 : CSR_SystemZ_AllRegs_SaveList;
@@ -209,6 +211,8 @@ const uint32_t *
 SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                           CallingConv::ID CC) const {
   const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+  if (CC == CallingConv::GHC)
+    return CSR_SystemZ_NoRegs_RegMask;
   if (CC == CallingConv::AnyReg)
     return Subtarget.hasVector()? CSR_SystemZ_AllRegs_Vector_RegMask
                                 : CSR_SystemZ_AllRegs_RegMask;
diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index 2aca22c9082a..f6184cec795a 100644
--- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -46,7 +46,6 @@ private:
   bool shortenOn001(MachineInstr &MI, unsigned Opcode);
   bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
   bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
-  bool shortenSelect(MachineInstr &MI, unsigned Opcode);
 
   const SystemZInstrInfo *TII;
   const TargetRegisterInfo *TRI;
@@ -176,23 +175,6 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
   return false;
 }
 
-// MI is a three-operand select instruction.  If one of the sources match
-// the destination, convert to the equivalent load-on-condition.
-bool SystemZShortenInst::shortenSelect(MachineInstr &MI, unsigned Opcode) {
-  if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
-    MI.setDesc(TII->get(Opcode));
-    MI.tieOperands(0, 1);
-    return true;
-  }
-  if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
-    TII->commuteInstruction(MI, false, 1, 2);
-    MI.setDesc(TII->get(Opcode));
-    MI.tieOperands(0, 1);
-    return true;
-  }
-  return false;
-}
-
 // Process all instructions in MBB.  Return true if something changed.
 bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
@@ -213,18 +195,6 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
       Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
       break;
 
-    case SystemZ::SELR:
-      Changed |= shortenSelect(MI, SystemZ::LOCR);
-      break;
-
-    case SystemZ::SELFHR:
-      Changed |= shortenSelect(MI, SystemZ::LOCFHR);
-      break;
-
-    case SystemZ::SELGR:
-      Changed |= shortenSelect(MI, SystemZ::LOCGR);
-      break;
-
     case SystemZ::WFADB:
       Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
       break;
@@ -313,6 +283,14 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
       Changed |= shortenOn01(MI, SystemZ::CEBR);
       break;
 
+    case SystemZ::WFKDB:
+      Changed |= shortenOn01(MI, SystemZ::KDBR);
+      break;
+
+    case SystemZ::WFKSB:
+      Changed |= shortenOn01(MI, SystemZ::KEBR);
+      break;
+
     case SystemZ::VL32:
       // For z13 we prefer LDE over LE to avoid partial register dependencies.
       Changed |= shortenOn0(MI, SystemZ::LDE32);
diff --git a/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/llvm/lib/Target/SystemZ/SystemZTDC.cpp
index 478848c30701..f103812eb096 100644
--- a/llvm/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTDC.cpp
@@ -50,6 +50,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsS390.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include <deque>
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 20865037fe38..dfcdb5356485 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -29,7 +29,7 @@
 
 using namespace llvm;
 
-extern "C" void LLVMInitializeSystemZTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
   // Register the target.
   RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget());
 }
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 11c99aa11174..acec3c533585 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -62,7 +62,7 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   return 4 * TTI::TCC_Basic;
 }
 
-int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
                                   const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
@@ -180,8 +180,8 @@ int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
   return SystemZTTIImpl::getIntImmCost(Imm, Ty);
 }
 
-int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                                  const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+                                        const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -259,7 +259,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
       }
       if (isa<StoreInst>(&I)) {
         Type *MemAccessTy = I.getOperand(0)->getType();
-        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0);
+        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0);
       }
     }
 
@@ -348,11 +348,10 @@ static unsigned getNumVectorRegs(Type *Ty) {
 }
 
 int SystemZTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty,
-    TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
-    TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo,
-    ArrayRef<const Value *> Args) {
+    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+    const Instruction *CxtI) {
 
   // TODO: return a good value for BB-VECTORIZER that includes the
   // immediate loads, which we do not want to count for the loop
@@ -508,7 +507,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(
 
   // Fallback to the default implementation.
   return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
-                                       Opd1PropInfo, Opd2PropInfo, Args);
+                                       Opd1PropInfo, Opd2PropInfo, Args, CxtI);
 }
 
 int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
@@ -995,7 +994,7 @@ static bool isBswapIntrinsicCall(const Value *V) {
 }
 
 int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                    unsigned Alignment, unsigned AddressSpace,
+                                    MaybeAlign Alignment, unsigned AddressSpace,
                                     const Instruction *I) {
   assert(!Src->isVoidTy() && "Invalid type");
 
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 3ba80b31439f..bc4d066881c1 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -40,9 +40,9 @@ public:
 
   int getIntImmCost(const APInt &Imm, Type *Ty);
 
-  int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
-  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                    Type *Ty);
+  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                          Type *Ty);
 
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
 
@@ -75,7 +75,8 @@ public:
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);
   int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
   unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
   unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
@@ -87,7 +88,7 @@ public:
                          const Instruction *I = nullptr);
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
   bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
-  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
                       unsigned AddressSpace, const Instruction *I = nullptr);
 
   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
diff --git a/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index 713a55ee8400..36291e079882 100644
--- a/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -16,7 +16,7 @@ Target &llvm::getTheSystemZTarget() {
   return TheSystemZTarget;
 }
 
-extern "C" void LLVMInitializeSystemZTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetInfo() {
   RegisterTarget<Triple::systemz, /*HasJIT=*/true> X(
       getTheSystemZTarget(), "systemz", "SystemZ", "SystemZ");
 }