aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/SystemZ
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/SystemZ')
-rw-r--r--lib/Target/SystemZ/AsmParser/Makefile16
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp5
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt1
-rw-r--r--lib/Target/SystemZ/Disassembler/Makefile16
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp57
-rw-r--r--lib/Target/SystemZ/InstPrinter/Makefile16
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp3
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp22
-rw-r--r--lib/Target/SystemZ/Makefile28
-rw-r--r--lib/Target/SystemZ/README.txt30
-rw-r--r--lib/Target/SystemZ/SystemZ.h41
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp200
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.h50
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.td15
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp214
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp26
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h7
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp266
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp1043
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h97
-rw-r--r--lib/Target/SystemZ/SystemZInstrBuilder.h2
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td11
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td206
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp607
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h85
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td356
-rw-r--r--lib/Target/SystemZ/SystemZLDCleanup.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp19
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h9
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td8
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td29
-rw-r--r--lib/Target/SystemZ/SystemZProcessors.td8
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp16
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h9
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp102
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h36
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp34
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp14
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h9
-rw-r--r--lib/Target/SystemZ/SystemZTDC.cpp382
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp20
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h2
-rw-r--r--lib/Target/SystemZ/TargetInfo/Makefile15
46 files changed, 2991 insertions, 1175 deletions
diff --git a/lib/Target/SystemZ/AsmParser/Makefile b/lib/Target/SystemZ/AsmParser/Makefile
deleted file mode 100644
index 623ae2c4e3ef..000000000000
--- a/lib/Target/SystemZ/AsmParser/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/SystemZ/AsmParser/Makefile ---------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZAsmParser
-
-# Hack: we need to include 'main' SystemZ target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 9c995bf42b0b..3923614c89d3 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -13,9 +13,9 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -391,6 +391,9 @@ public:
: MCTargetAsmParser(Options, sti), Parser(parser) {
MCAsmParserExtension::Initialize(Parser);
+ // Alias the .word directive to .short.
+ parser.addAliasForDirective(".word", ".short");
+
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 336f037bb733..4b849ad6491b 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -30,6 +30,7 @@ add_llvm_target(SystemZCodeGen
SystemZSubtarget.cpp
SystemZTargetMachine.cpp
SystemZTargetTransformInfo.cpp
+ SystemZTDC.cpp
)
add_subdirectory(AsmParser)
diff --git a/lib/Target/SystemZ/Disassembler/Makefile b/lib/Target/SystemZ/Disassembler/Makefile
deleted file mode 100644
index efc4cc8e9cb0..000000000000
--- a/lib/Target/SystemZ/Disassembler/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===-- lib/Target/SystemZ/Disassembler/Makefile -----------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZDisassembler
-
-# Hack: we need to include 'main' x86 target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index bf67b75d5337..20e015b42d21 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "SystemZ.h"
-#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -46,6 +46,34 @@ extern "C" void LLVMInitializeSystemZDisassembler() {
createSystemZDisassembler);
}
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst.
+///
+/// @param Value - The immediate Value, has had any PC adjustment made by
+/// the caller.
+/// @param isBranch - If the instruction is a branch instruction
+/// @param Address - The starting address of the instruction
+/// @param Offset - The byte offset to this immediate in the instruction
+/// @param Width - The byte width of this immediate in the instruction
+///
+/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
+/// called then that function is called to get any symbolic information for the
+/// immediate in the instruction using the Address, Offset and Width. If that
+/// returns non-zero then the symbolic information it returns is used to create
+/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
+/// returns zero and isBranch is true then a symbol look up for immediate Value
+/// is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with the immediate Value is created. This function returns true
+/// if it adds an operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
+ uint64_t Address, uint64_t Offset,
+ uint64_t Width, MCInst &MI,
+ const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
+ Offset, Width);
+}
+
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
const unsigned *Regs, unsigned Size) {
assert(RegNo < Size && "Invalid register");
@@ -206,22 +234,35 @@ static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm,
template<unsigned N>
static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address) {
+ uint64_t Address,
+ bool isBranch,
+ const void *Decoder) {
assert(isUInt<N>(Imm) && "Invalid PC-relative offset");
- Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm) * 2 + Address));
+ uint64_t Value = SignExtend64<N>(Imm) * 2 + Address;
+
+ if (!tryAddingSymbolicOperand(Value, isBranch, Address, 2, N / 8,
+ Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(Value));
+
return MCDisassembler::Success;
}
-static DecodeStatus decodePC16DBLOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address,
- const void *Decoder) {
- return decodePCDBLOperand<16>(Inst, Imm, Address);
+static DecodeStatus decodePC16DBLBranchOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodePCDBLOperand<16>(Inst, Imm, Address, true, Decoder);
+}
+
+static DecodeStatus decodePC32DBLBranchOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodePCDBLOperand<32>(Inst, Imm, Address, true, Decoder);
}
static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address,
const void *Decoder) {
- return decodePCDBLOperand<32>(Inst, Imm, Address);
+ return decodePCDBLOperand<32>(Inst, Imm, Address, false, Decoder);
}
static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field,
diff --git a/lib/Target/SystemZ/InstPrinter/Makefile b/lib/Target/SystemZ/InstPrinter/Makefile
deleted file mode 100644
index 3ba8126735a5..000000000000
--- a/lib/Target/SystemZ/InstPrinter/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/SystemZ/AsmPrinter/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZAsmPrinter
-
-# Hack: we need to include 'main' mips target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/MCTargetDesc/Makefile b/lib/Target/SystemZ/MCTargetDesc/Makefile
deleted file mode 100644
index 08f1a9d51fb5..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 57eebe19c044..c4d546cb7dff 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -58,7 +58,8 @@ public:
const MCAsmLayout &Layout) const override {
return false;
}
- void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
+ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ MCInst &Res) const override {
llvm_unreachable("SystemZ does do not have assembler relaxation");
}
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index ee1af023769e..368c95f7bac2 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -24,8 +24,8 @@ public:
protected:
// Override MCELFObjectTargetWriter.
- unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel) const override;
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
};
} // end anonymous namespace
@@ -106,7 +106,8 @@ static unsigned getPLTReloc(unsigned Kind) {
llvm_unreachable("Unsupported absolute address");
}
-unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
+unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 2115d4480eef..e16ba9e15317 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -10,7 +10,6 @@
#include "SystemZMCTargetDesc.h"
#include "InstPrinter/SystemZInstPrinter.h"
#include "SystemZMCAsmInfo.h"
-#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -159,17 +158,8 @@ createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
return createSystemZMCSubtargetInfoImpl(TT, CPU, FS);
}
-static MCCodeGenInfo *createSystemZMCCodeGenInfo(const Triple &TT,
- Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
-
- // Static code is suitable for use in a dynamic executable; there is no
- // separate DynamicNoPIC model.
- if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
- RM = Reloc::Static;
-
+static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
+ CodeModel::Model &CM) {
// For SystemZ we define the models as follows:
//
// Small: BRASL can call any function and will use a stub if necessary.
@@ -203,8 +193,6 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(const Triple &TT,
CM = CodeModel::Small;
else if (CM == CodeModel::JITDefault)
CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
- X->initMCCodeGenInfo(RM, CM, OL);
- return X;
}
static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
@@ -220,9 +208,9 @@ extern "C" void LLVMInitializeSystemZTargetMC() {
TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget,
createSystemZMCAsmInfo);
- // Register the MCCodeGenInfo.
- TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget,
- createSystemZMCCodeGenInfo);
+ // Register the adjustCodeGenOpts.
+ TargetRegistry::registerMCAdjustCodeGenOpts(TheSystemZTarget,
+ adjustCodeGenOpts);
// Register the MCCodeEmitter.
TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget,
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
deleted file mode 100644
index 732c31725538..000000000000
--- a/lib/Target/SystemZ/Makefile
+++ /dev/null
@@ -1,28 +0,0 @@
-##===- lib/Target/SystemZ/Makefile -------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMSystemZCodeGen
-TARGET = SystemZ
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = SystemZGenRegisterInfo.inc \
- SystemZGenAsmWriter.inc \
- SystemZGenAsmMatcher.inc \
- SystemZGenDisassemblerTables.inc \
- SystemZGenInstrInfo.inc \
- SystemZGenDAGISel.inc \
- SystemZGenSubtargetInfo.inc \
- SystemZGenCallingConv.inc \
- SystemZGenMCCodeEmitter.inc
-
-DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index cd367d60bab7..86a1322c9e23 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -7,13 +7,6 @@ for later architectures at some point.
--
-SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand() is passed "m" for all
-inline asm memory constraints; it doesn't get to see the original constraint.
-This means that it must conservatively treat all inline asm constraints
-as the most restricted type, "R".
-
---
-
If an inline asm ties an i32 "r" result to an i64 input, the input
will be treated as an i32, leaving the upper bits uninitialised.
For example:
@@ -43,15 +36,6 @@ We don't use the BRANCH ON INDEX instructions.
--
-We might want to use BRANCH ON CONDITION for conditional indirect calls
-and conditional returns.
-
---
-
-We don't use the TEST DATA CLASS instructions.
-
---
-
We only use MVC, XC and CLC for constant-length block operations.
We could extend them to variable-length operations too,
using EXECUTE RELATIVE LONG.
@@ -79,11 +63,6 @@ via a register.)
--
-We don't use the halfword forms of LOAD REVERSED and STORE REVERSED
-(LRVH and STRVH).
-
---
-
We don't use ICM or STCM.
--
@@ -123,7 +102,7 @@ ought to be implemented as:
ngr %r2, %r0
br %r14
-but two-address optimisations reverse the order of the AND and force:
+but two-address optimizations reverse the order of the AND and force:
lhi %r0, 1
ngr %r0, %r2
@@ -166,3 +145,10 @@ If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG.
We might want to model all access registers and use them to spill
32-bit values.
+
+--
+
+We might want to use the 'overflow' condition of eg. AR to support
+llvm.sadd.with.overflow.i32 and related instructions - the generated code
+for signed overflow check is currently quite bad. This would improve
+the results of using -ftrapv.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index cafe2c5948c4..c8ea9641fb62 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -87,6 +87,11 @@ const unsigned CCMASK_VCMP_MIXED = CCMASK_1;
const unsigned CCMASK_VCMP_NONE = CCMASK_3;
const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3;
+// Condition-code mask assignments for Test Data Class.
+const unsigned CCMASK_TDC_NOMATCH = CCMASK_0;
+const unsigned CCMASK_TDC_MATCH = CCMASK_1;
+const unsigned CCMASK_TDC = CCMASK_TDC_NOMATCH | CCMASK_TDC_MATCH;
+
// The position of the low CC bit in an IPM result.
const unsigned IPM_CC = 28;
@@ -94,6 +99,41 @@ const unsigned IPM_CC = 28;
const unsigned PFD_READ = 1;
const unsigned PFD_WRITE = 2;
+// Mask assignments for TDC
+const unsigned TDCMASK_ZERO_PLUS = 0x800;
+const unsigned TDCMASK_ZERO_MINUS = 0x400;
+const unsigned TDCMASK_NORMAL_PLUS = 0x200;
+const unsigned TDCMASK_NORMAL_MINUS = 0x100;
+const unsigned TDCMASK_SUBNORMAL_PLUS = 0x080;
+const unsigned TDCMASK_SUBNORMAL_MINUS = 0x040;
+const unsigned TDCMASK_INFINITY_PLUS = 0x020;
+const unsigned TDCMASK_INFINITY_MINUS = 0x010;
+const unsigned TDCMASK_QNAN_PLUS = 0x008;
+const unsigned TDCMASK_QNAN_MINUS = 0x004;
+const unsigned TDCMASK_SNAN_PLUS = 0x002;
+const unsigned TDCMASK_SNAN_MINUS = 0x001;
+
+const unsigned TDCMASK_ZERO = TDCMASK_ZERO_PLUS | TDCMASK_ZERO_MINUS;
+const unsigned TDCMASK_POSITIVE = TDCMASK_NORMAL_PLUS |
+ TDCMASK_SUBNORMAL_PLUS |
+ TDCMASK_INFINITY_PLUS;
+const unsigned TDCMASK_NEGATIVE = TDCMASK_NORMAL_MINUS |
+ TDCMASK_SUBNORMAL_MINUS |
+ TDCMASK_INFINITY_MINUS;
+const unsigned TDCMASK_NAN = TDCMASK_QNAN_PLUS |
+ TDCMASK_QNAN_MINUS |
+ TDCMASK_SNAN_PLUS |
+ TDCMASK_SNAN_MINUS;
+const unsigned TDCMASK_PLUS = TDCMASK_POSITIVE |
+ TDCMASK_ZERO_PLUS |
+ TDCMASK_QNAN_PLUS |
+ TDCMASK_SNAN_PLUS;
+const unsigned TDCMASK_MINUS = TDCMASK_NEGATIVE |
+ TDCMASK_ZERO_MINUS |
+ TDCMASK_QNAN_MINUS |
+ TDCMASK_SNAN_MINUS;
+const unsigned TDCMASK_ALL = TDCMASK_PLUS | TDCMASK_MINUS;
+
// Number of bits in a vector register.
const unsigned VectorBits = 128;
@@ -138,6 +178,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZTDCPass();
} // end namespace llvm
#endif
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 75273114d62f..9c0f327ff744 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -109,6 +109,85 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D);
break;
+ case SystemZ::CondReturn:
+ LoweredMI = MCInstBuilder(SystemZ::BCR)
+ .addImm(MI->getOperand(0).getImm())
+ .addImm(MI->getOperand(1).getImm())
+ .addReg(SystemZ::R14D);
+ break;
+
+ case SystemZ::CRBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CGRBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CGRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CIBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CGIBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CGIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLRBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CLRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLGRBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CLGRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLIBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CLIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLGIBReturn:
+ LoweredMI = MCInstBuilder(SystemZ::CLGIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R14D)
+ .addImm(0);
+ break;
+
case SystemZ::CallBRASL:
LoweredMI = MCInstBuilder(SystemZ::BRASL)
.addReg(SystemZ::R14D)
@@ -126,10 +205,96 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT));
break;
+ case SystemZ::CallBRCL:
+ LoweredMI = MCInstBuilder(SystemZ::BRCL)
+ .addImm(MI->getOperand(0).getImm())
+ .addImm(MI->getOperand(1).getImm())
+ .addExpr(Lower.getExpr(MI->getOperand(2), MCSymbolRefExpr::VK_PLT));
+ break;
+
case SystemZ::CallBR:
LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
break;
+ case SystemZ::CallBCR:
+ LoweredMI = MCInstBuilder(SystemZ::BCR)
+ .addImm(MI->getOperand(0).getImm())
+ .addImm(MI->getOperand(1).getImm())
+ .addReg(SystemZ::R1D);
+ break;
+
+ case SystemZ::CRBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CGRBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CGRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CIBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CGIBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CGIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLRBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CLRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLGRBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CLGRB)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLIBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CLIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
+ case SystemZ::CLGIBCall:
+ LoweredMI = MCInstBuilder(SystemZ::CLGIB)
+ .addReg(MI->getOperand(0).getReg())
+ .addImm(MI->getOperand(1).getImm())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(SystemZ::R1D)
+ .addImm(0);
+ break;
+
case SystemZ::TLS_GDCALL:
LoweredMI = MCInstBuilder(SystemZ::BRASL)
.addReg(SystemZ::R14D)
@@ -260,6 +425,41 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addImm(15).addReg(SystemZ::R0D);
break;
+ // Emit nothing here but a comment if we can.
+ case SystemZ::MemBarrier:
+ OutStreamer->emitRawComment("MEMBARRIER");
+ return;
+
+ // We want to emit "j .+2" for traps, jumping to the relative immediate field
+ // of the jump instruction, which is an illegal instruction. We cannot emit a
+ // "." symbol, so create and emit a temp label before the instruction and use
+ // that instead.
+ case SystemZ::Trap: {
+ MCSymbol *DotSym = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(DotSym);
+
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);
+ const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext);
+ LoweredMI = MCInstBuilder(SystemZ::J)
+ .addExpr(MCBinaryExpr::createAdd(Expr, ConstExpr, OutContext));
+ }
+ break;
+
+ // Conditional traps will create a branch on condition instruction that jumps
+ // to the relative immediate field of the jump instruction. (eg. "jo .+2")
+ case SystemZ::CondTrap: {
+ MCSymbol *DotSym = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(DotSym);
+
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);
+ const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext);
+ LoweredMI = MCInstBuilder(SystemZ::BRC)
+ .addImm(MI->getOperand(0).getImm())
+ .addImm(MI->getOperand(1).getImm())
+ .addExpr(MCBinaryExpr::createAdd(Expr, ConstExpr, OutContext));
+ }
+ break;
+
default:
Lower.lower(MI, LoweredMI);
break;
diff --git a/lib/Target/SystemZ/SystemZCallingConv.cpp b/lib/Target/SystemZ/SystemZCallingConv.cpp
index cc9c84b6a058..72da51f74b10 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.cpp
+++ b/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -12,10 +12,10 @@
using namespace llvm;
-const unsigned SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = {
+const MCPhysReg SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = {
SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D
};
-const unsigned SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = {
+const MCPhysReg SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = {
SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
};
diff --git a/lib/Target/SystemZ/SystemZCallingConv.h b/lib/Target/SystemZ/SystemZCallingConv.h
index bff0706618aa..b5523e586f4c 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/lib/Target/SystemZ/SystemZCallingConv.h
@@ -12,14 +12,15 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/MC/MCRegisterInfo.h"
namespace llvm {
namespace SystemZ {
const unsigned NumArgGPRs = 5;
- extern const unsigned ArgGPRs[NumArgGPRs];
+ extern const MCPhysReg ArgGPRs[NumArgGPRs];
const unsigned NumArgFPRs = 4;
- extern const unsigned ArgFPRs[NumArgFPRs];
+ extern const MCPhysReg ArgFPRs[NumArgFPRs];
} // end namespace SystemZ
class SystemZCCState : public CCState {
@@ -79,6 +80,51 @@ public:
bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; }
};
+// Handle i128 argument types. These need to be passed by implicit
+// reference. This could be as simple as the following .td line:
+// CCIfType<[i128], CCPassIndirect<i64>>,
+// except that i128 is not a legal type, and therefore gets split by
+// common code into a pair of i64 arguments.
+inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // ArgFlags.isSplit() is true on the first part of a i128 argument;
+ // PendingMembers.empty() is false on all subsequent parts.
+ if (!ArgFlags.isSplit() && PendingMembers.empty())
+ return false;
+
+ // Push a pending Indirect value location for each part.
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::Indirect;
+ PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT,
+ LocVT, LocInfo));
+ if (!ArgFlags.isSplitEnd())
+ return true;
+
+ // OK, we've collected all parts in the pending list. Allocate
+ // the location (register or stack slot) for the indirect pointer.
+ // (This duplicates the usual i64 calling convention rules.)
+ unsigned Reg = State.AllocateReg(SystemZ::ArgGPRs);
+ unsigned Offset = Reg ? 0 : State.AllocateStack(8, 8);
+
+ // Use that same location for all the pending parts.
+ for (auto &It : PendingMembers) {
+ if (Reg)
+ It.convertToReg(Reg);
+ else
+ It.convertToMem(Offset);
+ State.addLoc(It);
+ }
+
+ PendingMembers.clear();
+
+ return true;
+}
+
} // end namespace llvm
#endif
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
index bdd1b1598adb..2bf5ac29865f 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -33,6 +33,9 @@ def RetCC_SystemZ : CallingConv<[
// Promote i32 to i64 if it has an explicit extension type.
CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+ // A SwiftError is returned in R9.
+ CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>,
+
// ABI-compliant code returns 64-bit integers in R2. Make the other
// call-clobbered argument registers available for code that doesn't
// care about the ABI. (R6 is an argument register too, but is
@@ -65,8 +68,17 @@ def CC_SystemZ : CallingConv<[
// are smaller than 64 bits shouldn't.
CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+ // A SwiftSelf is passed in callee-saved R10.
+ CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>,
+
+ // A SwiftError is passed in callee-saved R9.
+ CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>,
+
// Force long double values to the stack and pass i64 pointers to them.
CCIfType<[f128], CCPassIndirect<i64>>,
+ // Same for i128 values. These are already split into two i64 here,
+ // so we have to use a custom handler.
+ CCIfType<[i64], CCCustom<"CC_SystemZ_I128Indirect">>,
// The first 5 integer arguments are passed in R2-R6. Note that R6
// is call-saved.
@@ -105,3 +117,6 @@ def CC_SystemZ : CallingConv<[
//===----------------------------------------------------------------------===//
def CSR_SystemZ : CalleeSavedRegs<(add (sequence "R%dD", 6, 15),
(sequence "F%dD", 8, 15))>;
+
+// R9 is used to return SwiftError; remove it from CSR.
+def CSR_SystemZ_SwiftError : CalleeSavedRegs<(sub CSR_SystemZ, R9D)>;
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index 4818ed015522..27350b88554d 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -18,7 +18,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -65,18 +64,22 @@ public:
bool processBlock(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &F) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
private:
- Reference getRegReferences(MachineInstr *MI, unsigned Reg);
- bool convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
+ Reference getRegReferences(MachineInstr &MI, unsigned Reg);
+ bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers);
- bool convertToLoadAndTest(MachineInstr *MI);
- bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
+ bool convertToLoadAndTest(MachineInstr &MI);
+ bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers);
- bool optimizeCompareZero(MachineInstr *Compare,
+ bool optimizeCompareZero(MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers);
- bool fuseCompareAndBranch(MachineInstr *Compare,
- SmallVectorImpl<MachineInstr *> &CCUsers);
+ bool fuseCompareOperations(MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
const SystemZInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -98,14 +101,12 @@ static bool isCCLiveOut(MachineBasicBlock &MBB) {
}
// Return true if any CC result of MI would reflect the value of Reg.
-static bool resultTests(MachineInstr *MI, unsigned Reg) {
- if (MI->getNumOperands() > 0 &&
- MI->getOperand(0).isReg() &&
- MI->getOperand(0).isDef() &&
- MI->getOperand(0).getReg() == Reg)
+static bool resultTests(MachineInstr &MI, unsigned Reg) {
+ if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() &&
+ MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg)
return true;
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
case SystemZ::LR:
case SystemZ::LGR:
case SystemZ::LGFR:
@@ -118,7 +119,7 @@ static bool resultTests(MachineInstr *MI, unsigned Reg) {
case SystemZ::LTEBR:
case SystemZ::LTDBR:
case SystemZ::LTXBR:
- if (MI->getOperand(1).getReg() == Reg)
+ if (MI.getOperand(1).getReg() == Reg)
return true;
}
@@ -126,10 +127,10 @@ static bool resultTests(MachineInstr *MI, unsigned Reg) {
}
// Describe the references to Reg or any of its aliases in MI.
-Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
+Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) {
Reference Ref;
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
if (MO.isReg()) {
if (unsigned MOReg = MO.getReg()) {
if (TRI->regsOverlap(MOReg, Reg)) {
@@ -146,23 +147,23 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
// Return true if this is a load and test which can be optimized the
// same way as compare instruction.
-static bool isLoadAndTestAsCmp(MachineInstr *MI) {
+static bool isLoadAndTestAsCmp(MachineInstr &MI) {
// If we during isel used a load-and-test as a compare with 0, the
// def operand is dead.
- return ((MI->getOpcode() == SystemZ::LTEBR ||
- MI->getOpcode() == SystemZ::LTDBR ||
- MI->getOpcode() == SystemZ::LTXBR) &&
- MI->getOperand(0).isDead());
+ return (MI.getOpcode() == SystemZ::LTEBR ||
+ MI.getOpcode() == SystemZ::LTDBR ||
+ MI.getOpcode() == SystemZ::LTXBR) &&
+ MI.getOperand(0).isDead();
}
// Return the source register of Compare, which is the unknown value
// being tested.
-static unsigned getCompareSourceReg(MachineInstr *Compare) {
+static unsigned getCompareSourceReg(MachineInstr &Compare) {
unsigned reg = 0;
- if (Compare->isCompare())
- reg = Compare->getOperand(0).getReg();
+ if (Compare.isCompare())
+ reg = Compare.getOperand(0).getReg();
else if (isLoadAndTestAsCmp(Compare))
- reg = Compare->getOperand(1).getReg();
+ reg = Compare.getOperand(1).getReg();
assert (reg);
return reg;
@@ -171,11 +172,11 @@ static unsigned getCompareSourceReg(MachineInstr *Compare) {
// Compare compares the result of MI against zero. If MI is an addition
// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
// and convert the branch to a BRCT(G). Return true on success.
-bool
-SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
- SmallVectorImpl<MachineInstr *> &CCUsers) {
+bool SystemZElimCompare::convertToBRCT(
+ MachineInstr &MI, MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers) {
// Check whether we have an addition of -1.
- unsigned Opcode = MI->getOpcode();
+ unsigned Opcode = MI.getOpcode();
unsigned BRCT;
if (Opcode == SystemZ::AHI)
BRCT = SystemZ::BRCT;
@@ -183,7 +184,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
BRCT = SystemZ::BRCTG;
else
return false;
- if (MI->getOperand(2).getImm() != -1)
+ if (MI.getOperand(2).getImm() != -1)
return false;
// Check whether we have a single JLH.
@@ -201,7 +202,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
unsigned SrcReg = getCompareSourceReg(Compare);
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
for (++MBBI; MBBI != MBBE; ++MBBI)
- if (getRegReferences(MBBI, SrcReg))
+ if (getRegReferences(*MBBI, SrcReg))
return false;
// The transformation is OK. Rebuild Branch as a BRCT(G).
@@ -210,24 +211,24 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
Branch->RemoveOperand(0);
Branch->setDesc(TII->get(BRCT));
MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addOperand(Target)
- .addReg(SystemZ::CC, RegState::ImplicitDefine);
- MI->eraseFromParent();
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addOperand(Target)
+ .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+ MI.eraseFromParent();
return true;
}
// If MI is a load instruction, try to convert it into a LOAD AND TEST.
// Return true on success.
-bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) {
- unsigned Opcode = TII->getLoadAndTest(MI->getOpcode());
+bool SystemZElimCompare::convertToLoadAndTest(MachineInstr &MI) {
+ unsigned Opcode = TII->getLoadAndTest(MI.getOpcode());
if (!Opcode)
return false;
- MI->setDesc(TII->get(Opcode));
- MachineInstrBuilder(*MI->getParent()->getParent(), MI)
- .addReg(SystemZ::CC, RegState::ImplicitDefine);
+ MI.setDesc(TII->get(Opcode));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addReg(SystemZ::CC, RegState::ImplicitDefine);
return true;
}
@@ -236,10 +237,10 @@ bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) {
// would also reflect the value of X. Try to adjust CCUsers so that
// they test the result of MI directly, returning true on success.
// Leave everything unchanged on failure.
-bool SystemZElimCompare::
-adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
- SmallVectorImpl<MachineInstr *> &CCUsers) {
- int Opcode = MI->getOpcode();
+bool SystemZElimCompare::adjustCCMasksForInstr(
+ MachineInstr &MI, MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers) {
+ int Opcode = MI.getOpcode();
const MCInstrDesc &Desc = TII->get(Opcode);
unsigned MIFlags = Desc.TSFlags;
@@ -247,7 +248,7 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags);
// For unsigned comparisons with zero, only equality makes sense.
- unsigned CompareFlags = Compare->getDesc().TSFlags;
+ unsigned CompareFlags = Compare.getDesc().TSFlags;
if (CompareFlags & SystemZII::IsLogical)
ReusableCCMask &= SystemZ::CCMASK_CMP_EQ;
@@ -296,9 +297,9 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
}
// CC is now live after MI.
- int CCDef = MI->findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI);
+ int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI);
assert(CCDef >= 0 && "Couldn't find CC set");
- MI->getOperand(CCDef).setIsDead(false);
+ MI.getOperand(CCDef).setIsDead(false);
// Clear any intervening kills of CC.
MachineBasicBlock::iterator MBBI = MI, MBBE = Compare;
@@ -309,8 +310,8 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
}
// Return true if Compare is a comparison against zero.
-static bool isCompareZero(MachineInstr *Compare) {
- switch (Compare->getOpcode()) {
+static bool isCompareZero(MachineInstr &Compare) {
+ switch (Compare.getOpcode()) {
case SystemZ::LTEBRCompare:
case SystemZ::LTDBRCompare:
case SystemZ::LTXBRCompare:
@@ -321,9 +322,8 @@ static bool isCompareZero(MachineInstr *Compare) {
if (isLoadAndTestAsCmp(Compare))
return true;
- return (Compare->getNumExplicitOperands() == 2 &&
- Compare->getOperand(1).isImm() &&
- Compare->getOperand(1).getImm() == 0);
+ return Compare.getNumExplicitOperands() == 2 &&
+ Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
}
}
@@ -331,21 +331,20 @@ static bool isCompareZero(MachineInstr *Compare) {
// a value against zero. Return true on success and if Compare should be
// deleted as dead. CCUsers is the list of instructions that use the CC
// value produced by Compare.
-bool SystemZElimCompare::
-optimizeCompareZero(MachineInstr *Compare,
- SmallVectorImpl<MachineInstr *> &CCUsers) {
+bool SystemZElimCompare::optimizeCompareZero(
+ MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) {
if (!isCompareZero(Compare))
return false;
// Search back for CC results that are based on the first operand.
unsigned SrcReg = getCompareSourceReg(Compare);
- MachineBasicBlock &MBB = *Compare->getParent();
+ MachineBasicBlock &MBB = *Compare.getParent();
MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin();
Reference CCRefs;
Reference SrcRefs;
while (MBBI != MBBE) {
--MBBI;
- MachineInstr *MI = MBBI;
+ MachineInstr &MI = *MBBI;
if (resultTests(MI, SrcReg)) {
// Try to remove both MI and Compare by converting a branch to BRCT(G).
// We don't care in this case whether CC is modified between MI and
@@ -373,54 +372,85 @@ optimizeCompareZero(MachineInstr *Compare,
// Try to fuse comparison instruction Compare into a later branch.
// Return true on success and if Compare is therefore redundant.
-bool SystemZElimCompare::
-fuseCompareAndBranch(MachineInstr *Compare,
- SmallVectorImpl<MachineInstr *> &CCUsers) {
- // See whether we have a comparison that can be fused.
- unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(),
- Compare);
- if (!FusedOpcode)
- return false;
-
+bool SystemZElimCompare::fuseCompareOperations(
+ MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) {
// See whether we have a single branch with which to fuse.
if (CCUsers.size() != 1)
return false;
MachineInstr *Branch = CCUsers[0];
- if (Branch->getOpcode() != SystemZ::BRC)
+ SystemZII::FusedCompareType Type;
+ switch (Branch->getOpcode()) {
+ case SystemZ::BRC:
+ Type = SystemZII::CompareAndBranch;
+ break;
+ case SystemZ::CondReturn:
+ Type = SystemZII::CompareAndReturn;
+ break;
+ case SystemZ::CallBCR:
+ Type = SystemZII::CompareAndSibcall;
+ break;
+ case SystemZ::CondTrap:
+ Type = SystemZII::CompareAndTrap;
+ break;
+ default:
+ return false;
+ }
+
+ // See whether we have a comparison that can be fused.
+ unsigned FusedOpcode =
+ TII->getFusedCompare(Compare.getOpcode(), Type, &Compare);
+ if (!FusedOpcode)
return false;
// Make sure that the operands are available at the branch.
- unsigned SrcReg = Compare->getOperand(0).getReg();
- unsigned SrcReg2 = (Compare->getOperand(1).isReg() ?
- Compare->getOperand(1).getReg() : 0);
+ unsigned SrcReg = Compare.getOperand(0).getReg();
+ unsigned SrcReg2 =
+ Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0;
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
for (++MBBI; MBBI != MBBE; ++MBBI)
if (MBBI->modifiesRegister(SrcReg, TRI) ||
(SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
return false;
- // Read the branch mask and target.
+ // Read the branch mask, target (if applicable), regmask (if applicable).
MachineOperand CCMask(MBBI->getOperand(1));
- MachineOperand Target(MBBI->getOperand(2));
assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
"Invalid condition-code mask for integer comparison");
+ // This is only valid for CompareAndBranch.
+ MachineOperand Target(MBBI->getOperand(
+ Type == SystemZII::CompareAndBranch ? 2 : 0));
+ const uint32_t *RegMask;
+ if (Type == SystemZII::CompareAndSibcall)
+ RegMask = MBBI->getOperand(2).getRegMask();
// Clear out all current operands.
int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
- assert(CCUse >= 0 && "BRC must use CC");
+ assert(CCUse >= 0 && "BRC/BCR must use CC");
Branch->RemoveOperand(CCUse);
- Branch->RemoveOperand(2);
+ // Remove target (branch) or regmask (sibcall).
+ if (Type == SystemZII::CompareAndBranch ||
+ Type == SystemZII::CompareAndSibcall)
+ Branch->RemoveOperand(2);
Branch->RemoveOperand(1);
Branch->RemoveOperand(0);
// Rebuild Branch as a fused compare and branch.
Branch->setDesc(TII->get(FusedOpcode));
- MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
- .addOperand(Compare->getOperand(0))
- .addOperand(Compare->getOperand(1))
- .addOperand(CCMask)
- .addOperand(Target)
- .addReg(SystemZ::CC, RegState::ImplicitDefine);
+ MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
+ MIB.addOperand(Compare.getOperand(0))
+ .addOperand(Compare.getOperand(1))
+ .addOperand(CCMask);
+
+ if (Type == SystemZII::CompareAndBranch) {
+ // Only conditional branches define CC, as they may be converted back
+ // to a non-fused branch because of a long displacement. Conditional
+ // returns don't have that problem.
+ MIB.addOperand(Target)
+ .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+ }
+
+ if (Type == SystemZII::CompareAndSibcall)
+ MIB.addRegMask(RegMask);
// Clear any intervening kills of SrcReg and SrcReg2.
MBBI = Compare;
@@ -445,29 +475,31 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 4> CCUsers;
MachineBasicBlock::iterator MBBI = MBB.end();
while (MBBI != MBB.begin()) {
- MachineInstr *MI = --MBBI;
- if (CompleteCCUsers &&
- (MI->isCompare() || isLoadAndTestAsCmp(MI)) &&
+ MachineInstr &MI = *--MBBI;
+ if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) &&
(optimizeCompareZero(MI, CCUsers) ||
- fuseCompareAndBranch(MI, CCUsers))) {
+ fuseCompareOperations(MI, CCUsers))) {
++MBBI;
- MI->eraseFromParent();
+ MI.eraseFromParent();
Changed = true;
CCUsers.clear();
continue;
}
- if (MI->definesRegister(SystemZ::CC)) {
+ if (MI.definesRegister(SystemZ::CC)) {
CCUsers.clear();
CompleteCCUsers = true;
}
- if (MI->readsRegister(SystemZ::CC) && CompleteCCUsers)
- CCUsers.push_back(MI);
+ if (MI.readsRegister(SystemZ::CC) && CompleteCCUsers)
+ CCUsers.push_back(&MI);
}
return Changed;
}
bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
+ if (skipFunction(*F.getFunction()))
+ return false;
+
TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
TRI = &TII->getRegisterInfo();
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index e1b20d0536d1..ccaed49475ca 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -81,6 +81,12 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
SavedRegs.set(SystemZ::ArgGPRs[I]);
+ // If there are any landing pads, entering them will modify r6/r7.
+ if (!MF.getMMI().getLandingPads().empty()) {
+ SavedRegs.set(SystemZ::R6D);
+ SavedRegs.set(SystemZ::R7D);
+ }
+
// If the function requires a frame pointer, record that the hard
// frame pointer will be clobbered.
if (HasFP)
@@ -258,7 +264,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// Do a second scan adding regs as being defined by instruction
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
unsigned Reg = CSI[I].getReg();
- if (Reg != LowGPR && Reg != HighGPR)
+ if (Reg != LowGPR && Reg != HighGPR &&
+ SystemZ::GR64BitRegClass.contains(Reg))
MIB.addReg(Reg, RegState::ImplicitDefine);
}
}
@@ -353,6 +360,15 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
uint64_t StackSize = getAllocatedStackSize(MF);
if (StackSize) {
+ // Determine if we want to store a backchain.
+ bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
+
+ // If we need backchain, save current stack pointer. R1 is free at this
+ // point.
+ if (StoreBackchain)
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
+
// Allocate StackSize bytes.
int64_t Delta = -int64_t(StackSize);
emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
@@ -363,6 +379,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
SPOffsetFromCFA += Delta;
+
+ if (StoreBackchain)
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
+ .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0);
}
if (HasFP) {
@@ -511,7 +531,7 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return true;
}
-void SystemZFrameLowering::
+MachineBasicBlock::iterator SystemZFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
@@ -520,7 +540,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
case SystemZ::ADJCALLSTACKUP:
assert(hasReservedCallFrame(MF) &&
"ADJSTACKDOWN and ADJSTACKUP should be no-ops");
- MBB.erase(MI);
+ return MBB.erase(MI);
break;
default:
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 46bb6b7a7573..d43a176ad874 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -46,10 +46,9 @@ public:
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const
- override;
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
// Return the number of bytes in the callee-allocated part of the frame.
uint64_t getAllocatedStackSize(const MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index a9093094d884..cd7fcc3070a4 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -113,7 +113,8 @@ static uint64_t allOnes(unsigned int Count) {
// (and (rotl Input, Rotate), Mask)
//
// otherwise. The output value has BitSize bits, although Input may be
-// narrower (in which case the upper bits are don't care).
+// narrower (in which case the upper bits are don't care), or wider (in which
+// case the result will be truncated as part of the operation).
struct RxSBGOperands {
RxSBGOperands(unsigned Op, SDValue N)
: Opcode(Op), BitSize(N.getValueType().getSizeInBits()),
@@ -279,18 +280,18 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
bool expandRxSBG(RxSBGOperands &RxSBG) const;
// Return an undefined value of type VT.
- SDValue getUNDEF(SDLoc DL, EVT VT) const;
+ SDValue getUNDEF(const SDLoc &DL, EVT VT) const;
// Convert N to VT, if it isn't already.
- SDValue convertTo(SDLoc DL, EVT VT, SDValue N) const;
+ SDValue convertTo(const SDLoc &DL, EVT VT, SDValue N) const;
// Try to implement AND or shift node N using RISBG with the zero flag set.
// Return the selected node on success, otherwise return null.
- SDNode *tryRISBGZero(SDNode *N);
+ bool tryRISBGZero(SDNode *N);
// Try to use RISBG or Opcode to implement OR or XOR node N.
// Return the selected node on success, otherwise return null.
- SDNode *tryRxSBG(SDNode *N, unsigned Opcode);
+ bool tryRxSBG(SDNode *N, unsigned Opcode);
// If Op0 is null, then Node is a constant that can be loaded using:
//
@@ -299,14 +300,14 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
// If Op0 is nonnull, then Node can be implemented using:
//
// (Opcode (Opcode Op0 UpperVal) LowerVal)
- SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
- uint64_t UpperVal, uint64_t LowerVal);
+ void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
+ uint64_t UpperVal, uint64_t LowerVal);
// Try to use gather instruction Opcode to implement vector insertion N.
- SDNode *tryGather(SDNode *N, unsigned Opcode);
+ bool tryGather(SDNode *N, unsigned Opcode);
// Try to use scatter instruction Opcode to implement store Store.
- SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode);
+ bool tryScatter(StoreSDNode *Store, unsigned Opcode);
// Return true if Load and Store are loads and stores of the same size
// and are guaranteed not to overlap. Such operations can be implemented
@@ -343,7 +344,7 @@ public:
}
// Override SelectionDAGISel.
- SDNode *Select(SDNode *Node) override;
+ void Select(SDNode *Node) override;
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
@@ -554,6 +555,10 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
expandDisp(AM, true, SDValue(),
cast<ConstantSDNode>(Addr)->getSExtValue()))
;
+ // Also see if it's a bare ADJDYNALLOC.
+ else if (Addr.getOpcode() == SystemZISD::ADJDYNALLOC &&
+ expandAdjDynAlloc(AM, true, SDValue()))
+ ;
else
// Otherwise try expanding each component.
while (expandAddress(AM, true) ||
@@ -741,6 +746,16 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
SDValue N = RxSBG.Input;
unsigned Opcode = N.getOpcode();
switch (Opcode) {
+ case ISD::TRUNCATE: {
+ if (RxSBG.Opcode == SystemZ::RNSBG)
+ return false;
+ uint64_t BitSize = N.getValueType().getSizeInBits();
+ uint64_t Mask = allOnes(BitSize);
+ if (!refineRxSBGMask(RxSBG, Mask))
+ return false;
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
case ISD::AND: {
if (RxSBG.Opcode == SystemZ::RNSBG)
return false;
@@ -888,12 +903,13 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
}
}
-SDValue SystemZDAGToDAGISel::getUNDEF(SDLoc DL, EVT VT) const {
+SDValue SystemZDAGToDAGISel::getUNDEF(const SDLoc &DL, EVT VT) const {
SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
return SDValue(N, 0);
}
-SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
+SDValue SystemZDAGToDAGISel::convertTo(const SDLoc &DL, EVT VT,
+ SDValue N) const {
if (N.getValueType() == MVT::i32 && VT == MVT::i64)
return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32,
DL, VT, getUNDEF(DL, MVT::i64), N);
@@ -903,23 +919,27 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
return N;
}
-SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
+bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
if (!VT.isInteger() || VT.getSizeInBits() > 64)
- return nullptr;
+ return false;
RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
unsigned Count = 0;
while (expandRxSBG(RISBG))
- if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND)
+ // The widening or narrowing is expected to be free.
+ // Counting widening or narrowing as a saved operation will result in
+ // preferring an R*SBG over a simple shift/logical instruction.
+ if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND &&
+ RISBG.Input.getOpcode() != ISD::TRUNCATE)
Count += 1;
if (Count == 0)
- return nullptr;
+ return false;
if (Count == 1) {
// Prefer to use normal shift instructions over RISBG, since they can handle
// all cases and are sometimes shorter.
if (N->getOpcode() != ISD::AND)
- return nullptr;
+ return false;
// Prefer register extensions like LLC over RISBG. Also prefer to start
// out with normal ANDs if one instruction would be enough. We can convert
@@ -934,9 +954,10 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
if (MaskN->getZExtValue() != RISBG.Mask) {
SDValue NewMask = CurDAG->getConstant(RISBG.Mask, DL, VT);
N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask);
- return SelectCode(N);
+ SelectCode(N);
+ return true;
}
- return nullptr;
+ return false;
}
}
@@ -952,8 +973,11 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
}
SDValue In = convertTo(DL, VT, RISBG.Input);
- N = CurDAG->getMachineNode(OpCode, DL, VT, In);
- return convertTo(DL, VT, SDValue(N, 0)).getNode();
+ SDValue New = convertTo(
+ DL, VT, SDValue(CurDAG->getMachineNode(OpCode, DL, VT, In), 0));
+ ReplaceUses(N, New.getNode());
+ CurDAG->RemoveDeadNode(N);
+ return true;
}
unsigned Opcode = SystemZ::RISBG;
@@ -974,15 +998,18 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32),
CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32)
};
- N = CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops);
- return convertTo(DL, VT, SDValue(N, 0)).getNode();
+ SDValue New = convertTo(
+ DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops), 0));
+ ReplaceUses(N, New.getNode());
+ CurDAG->RemoveDeadNode(N);
+ return true;
}
-SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
+bool SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
if (!VT.isInteger() || VT.getSizeInBits() > 64)
- return nullptr;
+ return false;
// Try treating each operand of N as the second operand of the RxSBG
// and see which goes deepest.
RxSBGOperands RxSBG[] = {
@@ -992,12 +1019,16 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
unsigned Count[] = { 0, 0 };
for (unsigned I = 0; I < 2; ++I)
while (expandRxSBG(RxSBG[I]))
- if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND)
+ // The widening or narrowing is expected to be free.
+ // Counting widening or narrowing as a saved operation will result in
+ // preferring an R*SBG over a simple shift/logical instruction.
+ if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND &&
+ RxSBG[I].Input.getOpcode() != ISD::TRUNCATE)
Count[I] += 1;
// Do nothing if neither operand is suitable.
if (Count[0] == 0 && Count[1] == 0)
- return nullptr;
+ return false;
// Pick the deepest second operand.
unsigned I = Count[0] > Count[1] ? 0 : 1;
@@ -1007,7 +1038,7 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0)
if (auto *Load = dyn_cast<LoadSDNode>(Op0.getNode()))
if (Load->getMemoryVT() == MVT::i8)
- return nullptr;
+ return false;
// See whether we can avoid an AND in the first operand by converting
// ROSBG to RISBG.
@@ -1025,47 +1056,70 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32),
CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32)
};
- N = CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops);
- return convertTo(DL, VT, SDValue(N, 0)).getNode();
+ SDValue New = convertTo(
+ DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops), 0));
+ ReplaceNode(N, New.getNode());
+ return true;
}
-SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
- SDValue Op0, uint64_t UpperVal,
- uint64_t LowerVal) {
+void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
+ SDValue Op0, uint64_t UpperVal,
+ uint64_t LowerVal) {
EVT VT = Node->getValueType(0);
SDLoc DL(Node);
SDValue Upper = CurDAG->getConstant(UpperVal, DL, VT);
if (Op0.getNode())
Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper);
- Upper = SDValue(Select(Upper.getNode()), 0);
+
+ {
+ // When we haven't passed in Op0, Upper will be a constant. In order to
+ // prevent folding back to the large immediate in `Or = getNode(...)` we run
+ // SelectCode first and end up with an opaque machine node. This means that
+ // we need to use a handle to keep track of Upper in case it gets CSE'd by
+ // SelectCode.
+ //
+ // Note that in the case where Op0 is passed in we could just call
+ // SelectCode(Upper) later, along with the SelectCode(Or), and avoid needing
+ // the handle at all, but it's fine to do it here.
+ //
+ // TODO: This is a pretty hacky way to do this. Can we do something that
+ // doesn't require a two paragraph explanation?
+ HandleSDNode Handle(Upper);
+ SelectCode(Upper.getNode());
+ Upper = Handle.getValue();
+ }
SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT);
SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower);
- return Or.getNode();
+
+ ReplaceUses(Node, Or.getNode());
+ CurDAG->RemoveDeadNode(Node);
+
+ SelectCode(Or.getNode());
}
-SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
+bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
SDValue ElemV = N->getOperand(2);
auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
if (!ElemN)
- return 0;
+ return false;
unsigned Elem = ElemN->getZExtValue();
EVT VT = N->getValueType(0);
if (Elem >= VT.getVectorNumElements())
- return 0;
+ return false;
auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
if (!Load || !Load->hasOneUse())
- return 0;
+ return false;
if (Load->getMemoryVT().getSizeInBits() !=
Load->getValueType(0).getSizeInBits())
- return 0;
+ return false;
SDValue Base, Disp, Index;
if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) ||
Index.getValueType() != VT.changeVectorElementTypeToInteger())
- return 0;
+ return false;
SDLoc DL(Load);
SDValue Ops[] = {
@@ -1074,39 +1128,41 @@ SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
};
SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops);
ReplaceUses(SDValue(Load, 1), SDValue(Res, 1));
- return Res;
+ ReplaceNode(N, Res);
+ return true;
}
-SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
+bool SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
SDValue Value = Store->getValue();
if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return 0;
+ return false;
if (Store->getMemoryVT().getSizeInBits() !=
Value.getValueType().getSizeInBits())
- return 0;
+ return false;
SDValue ElemV = Value.getOperand(1);
auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
if (!ElemN)
- return 0;
+ return false;
SDValue Vec = Value.getOperand(0);
EVT VT = Vec.getValueType();
unsigned Elem = ElemN->getZExtValue();
if (Elem >= VT.getVectorNumElements())
- return 0;
+ return false;
SDValue Base, Disp, Index;
if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) ||
Index.getValueType() != VT.changeVectorElementTypeToInteger())
- return 0;
+ return false;
SDLoc DL(Store);
SDValue Ops[] = {
Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32),
Store->getChain()
};
- return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+ ReplaceNode(Store, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
+ return true;
}
bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
@@ -1167,7 +1223,7 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB);
}
-SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
+void SystemZDAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
@@ -1175,43 +1231,47 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
if (Node->isMachineOpcode()) {
DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
Node->setNodeId(-1);
- return nullptr;
+ return;
}
unsigned Opcode = Node->getOpcode();
- SDNode *ResNode = nullptr;
switch (Opcode) {
case ISD::OR:
if (Node->getOperand(1).getOpcode() != ISD::Constant)
- ResNode = tryRxSBG(Node, SystemZ::ROSBG);
+ if (tryRxSBG(Node, SystemZ::ROSBG))
+ return;
goto or_xor;
case ISD::XOR:
if (Node->getOperand(1).getOpcode() != ISD::Constant)
- ResNode = tryRxSBG(Node, SystemZ::RXSBG);
+ if (tryRxSBG(Node, SystemZ::RXSBG))
+ return;
// Fall through.
or_xor:
// If this is a 64-bit operation in which both 32-bit halves are nonzero,
// split the operation into two.
- if (!ResNode && Node->getValueType(0) == MVT::i64)
+ if (Node->getValueType(0) == MVT::i64)
if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
uint64_t Val = Op1->getZExtValue();
- if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val))
- Node = splitLargeImmediate(Opcode, Node, Node->getOperand(0),
- Val - uint32_t(Val), uint32_t(Val));
+ if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {
+ splitLargeImmediate(Opcode, Node, Node->getOperand(0),
+ Val - uint32_t(Val), uint32_t(Val));
+ return;
+ }
}
break;
case ISD::AND:
if (Node->getOperand(1).getOpcode() != ISD::Constant)
- ResNode = tryRxSBG(Node, SystemZ::RNSBG);
+ if (tryRxSBG(Node, SystemZ::RNSBG))
+ return;
// Fall through.
case ISD::ROTL:
case ISD::SHL:
case ISD::SRL:
case ISD::ZERO_EXTEND:
- if (!ResNode)
- ResNode = tryRISBGZero(Node);
+ if (tryRISBGZero(Node))
+ return;
break;
case ISD::Constant:
@@ -1219,9 +1279,11 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// LLIHF and LGFI, split it into two 32-bit pieces.
if (Node->getValueType(0) == MVT::i64) {
uint64_t Val = cast<ConstantSDNode>(Node)->getZExtValue();
- if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val))
- Node = splitLargeImmediate(ISD::OR, Node, SDValue(),
- Val - uint32_t(Val), uint32_t(Val));
+ if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val)) {
+ splitLargeImmediate(ISD::OR, Node, SDValue(), Val - uint32_t(Val),
+ uint32_t(Val));
+ return;
+ }
}
break;
@@ -1249,63 +1311,75 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
case ISD::INSERT_VECTOR_ELT: {
EVT VT = Node->getValueType(0);
unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
- if (ElemBitSize == 32)
- ResNode = tryGather(Node, SystemZ::VGEF);
- else if (ElemBitSize == 64)
- ResNode = tryGather(Node, SystemZ::VGEG);
+ if (ElemBitSize == 32) {
+ if (tryGather(Node, SystemZ::VGEF))
+ return;
+ } else if (ElemBitSize == 64) {
+ if (tryGather(Node, SystemZ::VGEG))
+ return;
+ }
break;
}
case ISD::STORE: {
auto *Store = cast<StoreSDNode>(Node);
unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits();
- if (ElemBitSize == 32)
- ResNode = tryScatter(Store, SystemZ::VSCEF);
- else if (ElemBitSize == 64)
- ResNode = tryScatter(Store, SystemZ::VSCEG);
+ if (ElemBitSize == 32) {
+ if (tryScatter(Store, SystemZ::VSCEF))
+ return;
+ } else if (ElemBitSize == 64) {
+ if (tryScatter(Store, SystemZ::VSCEG))
+ return;
+ }
break;
}
}
- // Select the default instruction
- if (!ResNode)
- ResNode = SelectCode(Node);
-
- DEBUG(errs() << "=> ";
- if (ResNode == nullptr || ResNode == Node)
- Node->dump(CurDAG);
- else
- ResNode->dump(CurDAG);
- errs() << "\n";
- );
- return ResNode;
+ SelectCode(Node);
}
bool SystemZDAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
+ SystemZAddressingMode::AddrForm Form;
+ SystemZAddressingMode::DispRange DispRange;
+ SDValue Base, Disp, Index;
+
switch(ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
case InlineAsm::Constraint_i:
- case InlineAsm::Constraint_m:
case InlineAsm::Constraint_Q:
+ // Accept an address with a short displacement, but no index.
+ Form = SystemZAddressingMode::FormBD;
+ DispRange = SystemZAddressingMode::Disp12Only;
+ break;
case InlineAsm::Constraint_R:
+ // Accept an address with a short displacement and an index.
+ Form = SystemZAddressingMode::FormBDXNormal;
+ DispRange = SystemZAddressingMode::Disp12Only;
+ break;
case InlineAsm::Constraint_S:
+ // Accept an address with a long displacement, but no index.
+ Form = SystemZAddressingMode::FormBD;
+ DispRange = SystemZAddressingMode::Disp20Only;
+ break;
case InlineAsm::Constraint_T:
- // Accept addresses with short displacements, which are compatible
- // with Q, R, S and T. But keep the index operand for future expansion.
- SDValue Base, Disp, Index;
- if (selectBDXAddr(SystemZAddressingMode::FormBD,
- SystemZAddressingMode::Disp12Only,
- Op, Base, Disp, Index)) {
- OutOps.push_back(Base);
- OutOps.push_back(Disp);
- OutOps.push_back(Index);
- return false;
- }
+ case InlineAsm::Constraint_m:
+ // Accept an address with a long displacement and an index.
+ // m works the same as T, as this is the most general case.
+ Form = SystemZAddressingMode::FormBDXNormal;
+ DispRange = SystemZAddressingMode::Disp20Only;
break;
}
+
+ if (selectBDXAddr(Form, DispRange, Op, Base, Disp, Index)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Disp);
+ OutOps.push_back(Index);
+ return false;
+ }
+
return true;
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index b0a612764636..14991bbbd365 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -184,8 +184,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// No special instructions for these.
setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
// Use *MUL_LOHI where possible instead of MULH*.
@@ -216,6 +214,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ // Traps are legal, as we will convert them to "j .+2".
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
// z10 has instructions for signed but not unsigned FP conversion.
// Handle unsigned 32-bit types as signed 64-bit types.
if (!Subtarget.hasFPExtension()) {
@@ -253,6 +256,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// We need to handle dynamic allocations specially because of the
// 160-byte area at the bottom of the stack.
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
// Use custom expanders so that we can force the function to use
// a frame pointer.
@@ -310,8 +314,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Legal);
setOperationAction(ISD::CTLZ, VT, Legal);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
// Convert a GPR scalar to a vector by inserting it into element 0.
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
@@ -437,6 +439,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::BSWAP);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::ROTL);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -799,7 +806,7 @@ static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
// Value is a value that has been passed to us in the location described by VA
// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
// any loads onto Chain.
-static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
CCValAssign &VA, SDValue Chain,
SDValue Value) {
// If the argument has been promoted from a smaller type, insert an
@@ -813,16 +820,12 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
if (VA.isExtInLoc())
Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
- else if (VA.getLocInfo() == CCValAssign::Indirect)
- Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
- MachinePointerInfo(), false, false, false, 0);
else if (VA.getLocInfo() == CCValAssign::BCvt) {
// If this is a short vector argument loaded from the stack,
// extend from i64 to full vector size and then bitcast.
assert(VA.getLocVT() == MVT::i64);
assert(VA.getValVT().isVector());
- Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
- Value, DAG.getUNDEF(MVT::i64));
+ Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
} else
assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
@@ -832,7 +835,7 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
// Value is a value of type VA.getValVT() that we need to copy into
// the location described by VA. Return a copy of Value converted to
// VA.getValVT(). The caller is responsible for handling indirect values.
-static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
CCValAssign &VA, SDValue Value) {
switch (VA.getLocInfo()) {
case CCValAssign::SExt:
@@ -856,11 +859,10 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
}
}
-SDValue SystemZTargetLowering::
-LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+SDValue SystemZTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -868,6 +870,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
MF.getInfo<SystemZMachineFunctionInfo>();
auto *TFL =
static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Detect unsupported vector argument types.
if (Subtarget.hasVector())
@@ -930,19 +933,34 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
// Create the SelectionDAG nodes corresponding to a load
// from this parameter. Unpromoted ints and floats are
// passed as right-justified 8-byte values.
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getIntPtrConstant(4, DL));
ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(MF, FI), false,
- false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, FI));
}
// Convert the value of the argument register into the value that's
// being passed.
- InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
+ if (VA.getLocInfo() == CCValAssign::Indirect) {
+ InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
+ MachinePointerInfo()));
+ // If the original argument was split (e.g. i128), we need
+ // to load all parts of it here (using the same address).
+ unsigned ArgIndex = Ins[I].OrigArgIndex;
+ assert (Ins[I].PartOffset == 0);
+ while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
+ CCValAssign &PartVA = ArgLocs[I + 1];
+ unsigned PartOffset = Ins[I + 1].PartOffset;
+ SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
+ DAG.getIntPtrConstant(PartOffset, DL));
+ InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
+ MachinePointerInfo()));
+ ++I;
+ }
+ } else
+ InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
}
if (IsVarArg) {
@@ -973,8 +991,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
&SystemZ::FP64BitRegClass);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
- MachinePointerInfo::getFixedStack(MF, FI),
- false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, FI));
}
// Join the stores, which are independent of one another.
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
@@ -987,9 +1004,11 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
}
static bool canUseSiblingCall(const CCState &ArgCCInfo,
- SmallVectorImpl<CCValAssign> &ArgLocs) {
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ SmallVectorImpl<ISD::OutputArg> &Outs) {
// Punt if there are any indirect or stack arguments, or if the call
- // needs the call-saved argument register R6.
+ // needs the callee-saved argument register R6, or if the call uses
+ // the callee-saved register arguments SwiftSelf and SwiftError.
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
CCValAssign &VA = ArgLocs[I];
if (VA.getLocInfo() == CCValAssign::Indirect)
@@ -999,6 +1018,8 @@ static bool canUseSiblingCall(const CCState &ArgCCInfo,
unsigned Reg = VA.getLocReg();
if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
return false;
+ if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
+ return false;
}
return true;
}
@@ -1032,7 +1053,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// We don't support GuaranteedTailCallOpt, only automatically-detected
// sibling calls.
- if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs))
+ if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
IsTailCall = false;
// Get a count of how many bytes are to be pushed on the stack.
@@ -1054,11 +1075,25 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
- SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+ SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- MemOpChains.push_back(DAG.getStore(
- Chain, DL, ArgValue, SpillSlot,
- MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, ArgValue, SpillSlot,
+ MachinePointerInfo::getFixedStack(MF, FI)));
+ // If the original argument was split (e.g. i128), we need
+ // to store all parts of it here (and pass just one address).
+ unsigned ArgIndex = Outs[I].OrigArgIndex;
+ assert (Outs[I].PartOffset == 0);
+ while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
+ SDValue PartValue = OutVals[I + 1];
+ unsigned PartOffset = Outs[I + 1].PartOffset;
+ SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
+ DAG.getIntPtrConstant(PartOffset, DL));
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, PartValue, Address,
+ MachinePointerInfo::getFixedStack(MF, FI)));
+ ++I;
+ }
ArgValue = SpillSlot;
} else
ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
@@ -1080,9 +1115,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
DAG.getIntPtrConstant(Offset, DL));
// Emit the store.
- MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
- MachinePointerInfo(),
- false, false, 0));
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
}
}
@@ -1180,17 +1214,23 @@ CanLowerReturn(CallingConv::ID CallConv,
if (Subtarget.hasVector())
VerifyVectorTypes(Outs);
+ // Special case that we cannot easily detect in RetCC_SystemZ since
+ // i128 is not a legal type.
+ for (auto &Out : Outs)
+ if (Out.ArgVT == MVT::i128)
+ return false;
+
SmallVector<CCValAssign, 16> RetLocs;
CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
}
SDValue
-SystemZTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool IsVarArg,
+SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- SDLoc DL, SelectionDAG &DAG) const {
+ const SDLoc &DL, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
// Detect unsupported vector return types.
@@ -1235,8 +1275,8 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
}
-SDValue SystemZTargetLowering::
-prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
+SDValue SystemZTargetLowering::prepareVolatileOrAtomicLoad(
+ SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const {
return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
}
@@ -1399,6 +1439,11 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
CCValid = SystemZ::CCMASK_VCMP;
return true;
+ case Intrinsic::s390_tdc:
+ Opcode = SystemZISD::TDC;
+ CCValid = SystemZ::CCMASK_TDC;
+ return true;
+
default:
return false;
}
@@ -1538,7 +1583,7 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
// If C can be converted to a comparison against zero, adjust the operands
// as necessary.
-static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
if (C.ICmpType == SystemZICMP::UnsignedOnly)
return;
@@ -1558,7 +1603,8 @@ static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
// adjust the operands as necessary.
-static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison &C) {
// For us to make any changes, it must a comparison between a single-use
// load and a constant.
if (!C.Op0.hasOneUse() ||
@@ -1614,11 +1660,10 @@ static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
ISD::ZEXTLOAD);
if (C.Op0.getValueType() != MVT::i32 ||
Load->getExtensionType() != ExtType)
- C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32,
- Load->getChain(), Load->getBasePtr(),
- Load->getPointerInfo(), Load->getMemoryVT(),
- Load->isVolatile(), Load->isNonTemporal(),
- Load->isInvariant(), Load->getAlignment());
+ C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
+ Load->getBasePtr(), Load->getPointerInfo(),
+ Load->getMemoryVT(), Load->getAlignment(),
+ Load->getMemOperand()->getFlags());
// Make sure that the second operand is an i32 with the right value.
if (C.Op1.getValueType() != MVT::i32 ||
@@ -1719,7 +1764,8 @@ static unsigned reverseCCMask(unsigned CCMask) {
// Check whether C tests for equality between X and Y and whether X - Y
// or Y - X is also computed. In that case it's better to compare the
// result of the subtraction against zero.
-static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison &C) {
if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
C.CCMask == SystemZ::CCMASK_CMP_NE) {
for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
@@ -1784,7 +1830,8 @@ static void adjustForLTGFR(Comparison &C) {
// If C compares the truncation of an extending load, try to compare
// the untruncated value instead. This exposes more opportunities to
// reuse CC.
-static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison &C) {
if (C.Op0.getOpcode() == ISD::TRUNCATE &&
C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
C.Op1.getOpcode() == ISD::Constant &&
@@ -1915,7 +1962,8 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
// See whether C can be implemented as a TEST UNDER MASK instruction.
// Update the arguments with the TM version if so.
-static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison &C) {
// Check that we have a comparison with a constant.
auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
if (!ConstOp1)
@@ -2036,7 +2084,7 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
- ISD::CondCode Cond, SDLoc DL) {
+ ISD::CondCode Cond, const SDLoc &DL) {
if (CmpOp1.getOpcode() == ISD::Constant) {
uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
unsigned Opcode, CCValid;
@@ -2089,7 +2137,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
}
// Emit the comparison instruction described by C.
-static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
if (!C.Op1.getNode()) {
SDValue Op;
switch (C.Op0.getOpcode()) {
@@ -2119,9 +2167,9 @@ static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
// Implement a 32-bit *MUL_LOHI operation by extending both operands to
// 64 bits. Extend is the extension type to use. Store the high part
// in Hi and the low part in Lo.
-static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
- unsigned Extend, SDValue Op0, SDValue Op1,
- SDValue &Hi, SDValue &Lo) {
+static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
+ SDValue Op0, SDValue Op1, SDValue &Hi,
+ SDValue &Lo) {
Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
@@ -2136,10 +2184,9 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
// on the extended Op0 and (unextended) Op1. Store the even register result
// in Even and the odd register result in Odd.
-static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
- unsigned Extend, unsigned Opcode,
- SDValue Op0, SDValue Op1,
- SDValue &Even, SDValue &Odd) {
+static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ unsigned Extend, unsigned Opcode, SDValue Op0,
+ SDValue Op1, SDValue &Even, SDValue &Odd) {
SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
SDValue(In128, 0), Op1);
@@ -2151,7 +2198,7 @@ static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
// Return an i32 value that is 1 if the CC value produced by Glue is
// in the mask CCMask and 0 otherwise. CC is known to have a value
// in CCValid, so other values can be ignored.
-static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue,
+static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue,
unsigned CCValid, unsigned CCMask) {
IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
@@ -2220,7 +2267,7 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
// Return a v2f64 that contains the extended form of elements Start and Start+1
// of v4f32 value Op.
-static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
+static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
SDValue Op) {
int Mask[] = { Start, -1, Start + 1, -1 };
Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
@@ -2229,7 +2276,7 @@ static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
// producing a result of type VT.
-static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
+static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL,
EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
// There is no hardware support for v4f32, so extend the vector into
// two v2f64s and compare those.
@@ -2247,7 +2294,7 @@ static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
// an integer mask of type VT.
-static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
+static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
ISD::CondCode CC, SDValue CmpOp0,
SDValue CmpOp1) {
bool IsFP = CmpOp0.getValueType().isFloatingPoint();
@@ -2342,7 +2389,7 @@ static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
}
// Return the absolute or negative absolute of Op; IsNegative decides which.
-static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op,
+static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
bool IsNegative) {
Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
if (IsNegative)
@@ -2414,11 +2461,10 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
const GlobalValue *GV = Node->getGlobal();
int64_t Offset = Node->getOffset();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- Reloc::Model RM = DAG.getTarget().getRelocationModel();
CodeModel::Model CM = DAG.getTarget().getCodeModel();
SDValue Result;
- if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
+ if (Subtarget.isPC32DBLSymbol(GV, CM)) {
// Assign anchors at 1<<12 byte boundaries.
uint64_t Anchor = Offset & ~uint64_t(0xfff);
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
@@ -2435,8 +2481,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
}
// If there was a non-zero offset that we didn't fold, create an explicit
@@ -2495,14 +2540,9 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
}
-SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
- SelectionDAG &DAG) const {
- if (DAG.getTarget().Options.EmulatedTLS)
- return LowerToTLSEmulatedModel(Node, DAG);
- SDLoc DL(Node);
- const GlobalValue *GV = Node->getGlobal();
+SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
+ SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
// The high part of the thread pointer is in access register 0.
SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
@@ -2517,7 +2557,19 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
// Merge them into a single 64-bit address.
SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
DAG.getConstant(32, DL, PtrVT));
- SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
+ return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
+}
+
+SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG) const {
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(Node, DAG);
+ SDLoc DL(Node);
+ const GlobalValue *GV = Node->getGlobal();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
+
+ SDValue TP = lowerThreadPointer(DL, DAG);
// Get the offset of GA from the thread pointer, based on the TLS model.
SDValue Offset;
@@ -2530,8 +2582,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
// Call __tls_get_offset to retrieve the offset.
Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
@@ -2546,8 +2597,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
// Call __tls_get_offset to retrieve the module base offset.
Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
@@ -2565,8 +2615,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
DTPOffset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), DTPOffset,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
break;
@@ -2577,9 +2626,9 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
SystemZII::MO_INDNTPOFF);
Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, false, 0);
+ Offset =
+ DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
break;
}
@@ -2591,8 +2640,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
break;
}
}
@@ -2640,6 +2688,57 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
}
+SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ SDLoc DL(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // If the back chain frame index has not been allocated yet, do so.
+ SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>();
+ int BackChainIdx = FI->getFramePointerSaveIndex();
+ if (!BackChainIdx) {
+ // By definition, the frame address is the address of the back chain.
+ BackChainIdx = MFI->CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
+ FI->setFramePointerSaveIndex(BackChainIdx);
+ }
+ SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
+
+ // FIXME The frontend should detect this case.
+ if (Depth > 0) {
+ report_fatal_error("Unsupported stack frame traversal count");
+ }
+
+ return BackChain;
+}
+
+SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
+ SDLoc DL(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // FIXME The frontend should detect this case.
+ if (Depth > 0) {
+ report_fatal_error("Unsupported stack frame traversal count");
+ }
+
+ // Return R14D, which has the return address. Mark it an implicit live-in.
+ unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
+ return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
+}
+
SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -2715,8 +2814,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
DAG.getIntPtrConstant(Offset, DL));
MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
- MachinePointerInfo(SV, Offset),
- false, false, 0);
+ MachinePointerInfo(SV, Offset));
Offset += 8;
}
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
@@ -2740,8 +2838,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
SDValue SystemZTargetLowering::
lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
- bool RealignOpt = !DAG.getMachineFunction().getFunction()->
- hasFnAttribute("no-realign-stack");
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool RealignOpt = !MF.getFunction()-> hasFnAttribute("no-realign-stack");
+ bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
@@ -2763,10 +2862,15 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
// Get a reference to the stack pointer.
SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
+ // If we need a backchain, save it now.
+ SDValue Backchain;
+ if (StoreBackchain)
+ Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
+
// Add extra space for alignment if needed.
if (ExtraAlignSpace)
NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
- DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
+ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
// Get the new stack pointer value.
SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
@@ -2790,10 +2894,20 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
}
+ if (StoreBackchain)
+ Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
+
SDValue Ops[2] = { Result, Chain };
return DAG.getMergeValues(Ops, DL);
}
+SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
+ SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
+}
+
SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -3031,6 +3145,27 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
return Op;
}
+SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+ SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+ // The only fence that needs an instruction is a sequentially-consistent
+ // cross-thread fence.
+ if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
+ FenceScope == CrossThread) {
+ return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
+ Op.getOperand(0)),
+ 0);
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
// Op is an atomic load. Lower it into a normal volatile load.
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SelectionDAG &DAG) const {
@@ -3220,8 +3355,24 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
- return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op),
- SystemZ::R15D, Op.getOperand(1));
+ bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain");
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue NewSP = Op.getOperand(1);
+ SDValue Backchain;
+ SDLoc DL(Op);
+
+ if (StoreBackchain) {
+ SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
+ Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
+ }
+
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
+
+ if (StoreBackchain)
+ Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
+
+ return Chain;
}
SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
@@ -3286,6 +3437,9 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (Id) {
+ case Intrinsic::thread_pointer:
+ return lowerThreadPointer(SDLoc(Op), DAG);
+
case Intrinsic::s390_vpdi:
return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
@@ -3553,7 +3707,7 @@ static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
// Create a node that performs P on operands Op0 and Op1, casting the
// operands to the appropriate type. The type of the result is determined by P.
-static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL,
+static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
const Permute &P, SDValue Op0, SDValue Op1) {
// VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
// elements of a PACK are twice as wide as the outputs.
@@ -3582,7 +3736,8 @@ static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL,
// Bytes is a VPERM-like permute vector, except that -1 is used for
// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
// VSLDI or VPERM.
-static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops,
+static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue *Ops,
const SmallVectorImpl<int> &Bytes) {
for (unsigned I = 0; I < 2; ++I)
Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
@@ -3600,7 +3755,7 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops,
IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
else
IndexNodes[I] = DAG.getUNDEF(MVT::i32);
- SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes);
+ SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
}
@@ -3610,7 +3765,7 @@ struct GeneralShuffle {
GeneralShuffle(EVT vt) : VT(vt) {}
void addUndef();
void add(SDValue, unsigned);
- SDValue getNode(SelectionDAG &, SDLoc);
+ SDValue getNode(SelectionDAG &, const SDLoc &);
// The operands of the shuffle.
SmallVector<SDValue, SystemZ::VectorBytes> Ops;
@@ -3667,7 +3822,7 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
}
Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
Byte = unsigned(NewByte) % SystemZ::VectorBytes;
- } else if (Op.getOpcode() == ISD::UNDEF) {
+ } else if (Op.isUndef()) {
addUndef();
return;
} else
@@ -3689,7 +3844,7 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
}
// Return SDNodes for the completed shuffle.
-SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
+SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
if (Ops.size() == 0)
@@ -3770,37 +3925,37 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
static bool isScalarToVector(SDValue Op) {
for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
- if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
+ if (!Op.getOperand(I).isUndef())
return false;
return true;
}
// Return a vector of type VT that contains Value in the first element.
// The other elements don't matter.
-static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
+static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SDValue Value) {
// If we have a constant, replicate it to all elements and let the
// BUILD_VECTOR lowering take care of it.
if (Value.getOpcode() == ISD::Constant ||
Value.getOpcode() == ISD::ConstantFP) {
SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
- return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+ return DAG.getBuildVector(VT, DL, Ops);
}
- if (Value.getOpcode() == ISD::UNDEF)
+ if (Value.isUndef())
return DAG.getUNDEF(VT);
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
}
// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
// element 1. Used for cases in which replication is cheap.
-static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
+static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SDValue Op0, SDValue Op1) {
- if (Op0.getOpcode() == ISD::UNDEF) {
- if (Op1.getOpcode() == ISD::UNDEF)
+ if (Op0.isUndef()) {
+ if (Op1.isUndef())
return DAG.getUNDEF(VT);
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
}
- if (Op1.getOpcode() == ISD::UNDEF)
+ if (Op1.isUndef())
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
buildScalarToVector(DAG, DL, VT, Op0),
@@ -3809,15 +3964,15 @@ static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
// vector for them.
-static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0,
+static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
SDValue Op1) {
- if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF)
+ if (Op0.isUndef() && Op1.isUndef())
return DAG.getUNDEF(MVT::v2i64);
// If one of the two inputs is undefined then replicate the other one,
// in order to avoid using another register unnecessarily.
- if (Op0.getOpcode() == ISD::UNDEF)
+ if (Op0.isUndef())
Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
- else if (Op1.getOpcode() == ISD::UNDEF)
+ else if (Op1.isUndef())
Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
else {
Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
@@ -3834,7 +3989,7 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
unsigned BytesPerElement = ElemVT.getStoreSize();
for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
SDValue Op = BVN->getOperand(I);
- if (Op.getOpcode() != ISD::UNDEF) {
+ if (!Op.isUndef()) {
uint64_t Value;
if (Op.getOpcode() == ISD::Constant)
Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
@@ -3862,7 +4017,7 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
// an empty value.
static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
const SystemZInstrInfo *TII,
- SDLoc DL, EVT VT, uint64_t Value,
+ const SDLoc &DL, EVT VT, uint64_t Value,
unsigned BitsPerElement) {
// Signed 16-bit values can be replicated using VREPI.
int64_t SignedValue = SignExtend64(Value, BitsPerElement);
@@ -3919,7 +4074,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
GS.add(Op.getOperand(0), Elem);
FoundOne = true;
- } else if (Op.getOpcode() == ISD::UNDEF) {
+ } else if (Op.isUndef()) {
GS.addUndef();
} else {
GS.add(SDValue(), ResidueOps.size());
@@ -3937,7 +4092,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
for (auto &Op : GS.Ops) {
if (!Op.getNode()) {
- Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
+ Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
break;
}
}
@@ -3946,14 +4101,14 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
}
// Combine GPR scalar values Elems into a vector of type VT.
-static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
+static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SmallVectorImpl<SDValue> &Elems) {
// See whether there is a single replicated value.
SDValue Single;
unsigned int NumElements = Elems.size();
unsigned int Count = 0;
for (auto Elem : Elems) {
- if (Elem.getOpcode() != ISD::UNDEF) {
+ if (!Elem.isUndef()) {
if (!Single.getNode())
Single = Elem;
else if (Elem != Single) {
@@ -3998,9 +4153,9 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
// Avoid unnecessary undefs by reusing the other operand.
- if (Op01.getOpcode() == ISD::UNDEF)
+ if (Op01.isUndef())
Op01 = Op23;
- else if (Op23.getOpcode() == ISD::UNDEF)
+ else if (Op23.isUndef())
Op23 = Op01;
// Merging identical replications is a no-op.
if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
@@ -4034,7 +4189,7 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
for (unsigned I = 0; I < NumElements; ++I)
if (!Constants[I].getNode())
Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
- Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants);
+ Result = DAG.getBuildVector(VT, DL, Constants);
} else {
// Otherwise try to use VLVGP to start the sequence in order to
// avoid a false dependency on any previous contents of the vector
@@ -4042,8 +4197,8 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
// is defined.
unsigned I1 = NumElements / 2 - 1;
unsigned I2 = NumElements - 1;
- bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF);
- bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF);
+ bool Def1 = !Elems[I1].isUndef();
+ bool Def2 = !Elems[I2].isUndef();
if (Def1 || Def2) {
SDValue Elem1 = Elems[Def1 ? I1 : I2];
SDValue Elem2 = Elems[Def2 ? I2 : I1];
@@ -4057,7 +4212,7 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
// Use VLVGx to insert the other elements.
for (unsigned I = 0; I < NumElements; ++I)
- if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF)
+ if (!Done[I] && !Elems[I].isUndef())
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
DAG.getConstant(I, DL, MVT::i32));
return Result;
@@ -4120,8 +4275,7 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
}
// See if we should use shuffles to construct the vector from other vectors.
- SDValue Res = tryBuildVectorShuffle(DAG, BVN);
- if (Res.getNode())
+ if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
return Res;
// Detect SCALAR_TO_VECTOR conversions.
@@ -4312,6 +4466,10 @@ SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
+ case ISD::FRAMEADDR:
+ return lowerFRAMEADDR(Op, DAG);
+ case ISD::RETURNADDR:
+ return lowerRETURNADDR(Op, DAG);
case ISD::BR_CC:
return lowerBR_CC(Op, DAG);
case ISD::SELECT_CC:
@@ -4336,6 +4494,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerVACOPY(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return lowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
case ISD::SMUL_LOHI:
return lowerSMUL_LOHI(Op, DAG);
case ISD::UMUL_LOHI:
@@ -4348,12 +4508,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerOR(Op, DAG);
case ISD::CTPOP:
return lowerCTPOP(Op, DAG);
- case ISD::CTLZ_ZERO_UNDEF:
- return DAG.getNode(ISD::CTLZ, SDLoc(Op),
- Op.getValueType(), Op.getOperand(0));
- case ISD::CTTZ_ZERO_UNDEF:
- return DAG.getNode(ISD::CTTZ, SDLoc(Op),
- Op.getValueType(), Op.getOperand(0));
+ case ISD::ATOMIC_FENCE:
+ return lowerATOMIC_FENCE(Op, DAG);
case ISD::ATOMIC_SWAP:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
@@ -4457,6 +4613,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(SEARCH_STRING);
OPCODE(IPM);
OPCODE(SERIALIZE);
+ OPCODE(MEMBARRIER);
OPCODE(TBEGIN);
OPCODE(TBEGIN_NOFLOAT);
OPCODE(TEND);
@@ -4506,6 +4663,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VISTR_CC);
OPCODE(VSTRC_CC);
OPCODE(VSTRCZ_CC);
+ OPCODE(TDC);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
@@ -4518,6 +4676,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_LOADW_UMIN);
OPCODE(ATOMIC_LOADW_UMAX);
OPCODE(ATOMIC_CMP_SWAPW);
+ OPCODE(LRV);
+ OPCODE(STRV);
OPCODE(PREFETCH);
}
return nullptr;
@@ -4535,8 +4695,9 @@ static bool canTreatAsByteVector(EVT VT) {
// of the input vector and Index is the index (based on type VecVT) that
// should be extracted. Return the new extraction if a simplification
// was possible or if Force is true.
-SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
- SDValue Op, unsigned Index,
+SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
+ EVT VecVT, SDValue Op,
+ unsigned Index,
DAGCombinerInfo &DCI,
bool Force) const {
SelectionDAG &DAG = DCI.DAG;
@@ -4639,9 +4800,8 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
// Optimize vector operations in scalar value Op on the basis that Op
// is truncated to TruncVT.
-SDValue
-SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
- DAGCombinerInfo &DCI) const {
+SDValue SystemZTargetLowering::combineTruncateExtract(
+ const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
// If we have (trunc (extract_vector_elt X, Y)), try to turn it into
// (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
// of type TruncVT.
@@ -4675,145 +4835,295 @@ SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
return SDValue();
}
-SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
+SDValue SystemZTargetLowering::combineSIGN_EXTEND(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ // Convert (sext (ashr (shl X, C1), C2)) to
+ // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
+ // cheap as narrower ones.
SelectionDAG &DAG = DCI.DAG;
- unsigned Opcode = N->getOpcode();
- if (Opcode == ISD::SIGN_EXTEND) {
- // Convert (sext (ashr (shl X, C1), C2)) to
- // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
- // cheap as narrower ones.
- SDValue N0 = N->getOperand(0);
- EVT VT = N->getValueType(0);
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
- auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- SDValue Inner = N0.getOperand(0);
- if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
- if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
- unsigned Extra = (VT.getSizeInBits() -
- N0.getValueType().getSizeInBits());
- unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
- unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
- EVT ShiftVT = N0.getOperand(1).getValueType();
- SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
- Inner.getOperand(0));
- SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
- DAG.getConstant(NewShlAmt, SDLoc(Inner),
- ShiftVT));
- return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
- DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
- }
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
+ auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ SDValue Inner = N0.getOperand(0);
+ if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
+ if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
+ unsigned Extra = (VT.getSizeInBits() -
+ N0.getValueType().getSizeInBits());
+ unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
+ unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
+ EVT ShiftVT = N0.getOperand(1).getValueType();
+ SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
+ Inner.getOperand(0));
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
+ DAG.getConstant(NewShlAmt, SDLoc(Inner),
+ ShiftVT));
+ return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
+ DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
}
}
}
- if (Opcode == SystemZISD::MERGE_HIGH ||
- Opcode == SystemZISD::MERGE_LOW) {
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- if (Op0.getOpcode() == ISD::BITCAST)
- Op0 = Op0.getOperand(0);
- if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
- cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
- // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
- // for v4f32.
- if (Op1 == N->getOperand(0))
- return Op1;
- // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
- EVT VT = Op1.getValueType();
- unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
- if (ElemBytes <= 4) {
- Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
- SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
- EVT InVT = VT.changeVectorElementTypeToInteger();
- EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
- SystemZ::VectorBytes / ElemBytes / 2);
- if (VT != InVT) {
- Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
- DCI.AddToWorklist(Op1.getNode());
- }
- SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
- DCI.AddToWorklist(Op.getNode());
- return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineMERGE(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ unsigned Opcode = N->getOpcode();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
+ cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+ // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
+ // for v4f32.
+ if (Op1 == N->getOperand(0))
+ return Op1;
+ // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
+ EVT VT = Op1.getValueType();
+ unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
+ if (ElemBytes <= 4) {
+ Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
+ SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
+ EVT InVT = VT.changeVectorElementTypeToInteger();
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
+ SystemZ::VectorBytes / ElemBytes / 2);
+ if (VT != InVT) {
+ Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
+ DCI.AddToWorklist(Op1.getNode());
}
+ SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
+ DCI.AddToWorklist(Op.getNode());
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
}
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSTORE(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ auto *SN = cast<StoreSDNode>(N);
+ auto &Op1 = N->getOperand(1);
+ EVT MemVT = SN->getMemoryVT();
// If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
// for the extraction to be done on a vMiN value, so that we can use VSTE.
// If X has wider elements then convert it to:
// (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
- if (Opcode == ISD::STORE) {
- auto *SN = cast<StoreSDNode>(N);
- EVT MemVT = SN->getMemoryVT();
- if (MemVT.isInteger()) {
- SDValue Value = combineTruncateExtract(SDLoc(N), MemVT,
- SN->getValue(), DCI);
- if (Value.getNode()) {
- DCI.AddToWorklist(Value.getNode());
-
- // Rewrite the store with the new form of stored value.
- return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
- SN->getBasePtr(), SN->getMemoryVT(),
- SN->getMemOperand());
- }
+ if (MemVT.isInteger()) {
+ if (SDValue Value =
+ combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
+ DCI.AddToWorklist(Value.getNode());
+
+ // Rewrite the store with the new form of stored value.
+ return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
+ SN->getBasePtr(), SN->getMemoryVT(),
+ SN->getMemOperand());
}
}
- // Try to simplify a vector extraction.
- if (Opcode == ISD::EXTRACT_VECTOR_ELT) {
- if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
- SDValue Op0 = N->getOperand(0);
- EVT VecVT = Op0.getValueType();
- return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
- IndexN->getZExtValue(), DCI, false);
+ // Combine STORE (BSWAP) into STRVH/STRV/STRVG
+ // See comment in combineBSWAP about volatile accesses.
+ if (!SN->isVolatile() &&
+ Op1.getOpcode() == ISD::BSWAP &&
+ Op1.getNode()->hasOneUse() &&
+ (Op1.getValueType() == MVT::i16 ||
+ Op1.getValueType() == MVT::i32 ||
+ Op1.getValueType() == MVT::i64)) {
+
+ SDValue BSwapOp = Op1.getOperand(0);
+
+ if (BSwapOp.getValueType() == MVT::i16)
+ BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
+
+ SDValue Ops[] = {
+ N->getOperand(0), BSwapOp, N->getOperand(2),
+ DAG.getValueType(Op1.getValueType())
+ };
+
+ return
+ DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
+ Ops, MemVT, SN->getMemOperand());
}
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ // Try to simplify a vector extraction.
+ if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VecVT = Op0.getValueType();
+ return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
+ IndexN->getZExtValue(), DCI, false);
}
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineJOIN_DWORDS(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
// (join_dwords X, X) == (replicate X)
- if (Opcode == SystemZISD::JOIN_DWORDS &&
- N->getOperand(0) == N->getOperand(1))
+ if (N->getOperand(0) == N->getOperand(1))
return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
N->getOperand(0));
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineFP_ROUND(
+ SDNode *N, DAGCombinerInfo &DCI) const {
// (fround (extract_vector_elt X 0))
// (fround (extract_vector_elt X 1)) ->
// (extract_vector_elt (VROUND X) 0)
// (extract_vector_elt (VROUND X) 1)
//
// This is a special case since the target doesn't really support v2f32s.
- if (Opcode == ISD::FP_ROUND) {
- SDValue Op0 = N->getOperand(0);
- if (N->getValueType(0) == MVT::f32 &&
- Op0.hasOneUse() &&
- Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- Op0.getOperand(0).getValueType() == MVT::v2f64 &&
- Op0.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
- SDValue Vec = Op0.getOperand(0);
- for (auto *U : Vec->uses()) {
- if (U != Op0.getNode() &&
- U->hasOneUse() &&
- U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- U->getOperand(0) == Vec &&
- U->getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
- SDValue OtherRound = SDValue(*U->use_begin(), 0);
- if (OtherRound.getOpcode() == ISD::FP_ROUND &&
- OtherRound.getOperand(0) == SDValue(U, 0) &&
- OtherRound.getValueType() == MVT::f32) {
- SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
- MVT::v4f32, Vec);
- DCI.AddToWorklist(VRound.getNode());
- SDValue Extract1 =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
- VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
- DCI.AddToWorklist(Extract1.getNode());
- DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
- SDValue Extract0 =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
- VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
- return Extract0;
- }
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op0 = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f32 &&
+ Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getValueType() == MVT::v2f64 &&
+ Op0.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+ SDValue Vec = Op0.getOperand(0);
+ for (auto *U : Vec->uses()) {
+ if (U != Op0.getNode() &&
+ U->hasOneUse() &&
+ U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ U->getOperand(0) == Vec &&
+ U->getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
+ SDValue OtherRound = SDValue(*U->use_begin(), 0);
+ if (OtherRound.getOpcode() == ISD::FP_ROUND &&
+ OtherRound.getOperand(0) == SDValue(U, 0) &&
+ OtherRound.getValueType() == MVT::f32) {
+ SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+ MVT::v4f32, Vec);
+ DCI.AddToWorklist(VRound.getNode());
+ SDValue Extract1 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
+ VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
+ DCI.AddToWorklist(Extract1.getNode());
+ DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+ SDValue Extract0 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
+ VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ return Extract0;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineBSWAP(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ // Combine BSWAP (LOAD) into LRVH/LRV/LRVG
+ // These loads are allowed to access memory multiple times, and so we must check
+ // that the loads are not volatile before performing the combine.
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
+ N->getValueType(0) == MVT::i64) &&
+ !cast<LoadSDNode>(N->getOperand(0))->isVolatile()) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+
+ // Create the byte-swapping load.
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ DAG.getValueType(N->getValueType(0)) // VT
+ };
+ SDValue BSLoad =
+ DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
+ DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+ MVT::i64 : MVT::i32, MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+
+ // If this is an i16 load, insert the truncate.
+ SDValue ResVal = BSLoad;
+ if (N->getValueType(0) == MVT::i16)
+ ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
+
+ // First, combine the bswap away. This makes the value produced by the
+ // load dead.
+ DCI.CombineTo(N, ResVal);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the bswap is dead.
+ DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSHIFTROT(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Shift/rotate instructions only use the last 6 bits of the second operand
+ // register. If the second operand is the result of an AND with an immediate
+ // value that has its last 6 bits set, we can safely remove the AND operation.
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::AND) {
+ auto *AndMask = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+
+ // The AND mask is constant
+ if (AndMask) {
+ auto AmtVal = AndMask->getZExtValue();
+
+ // Bottom 6 bits are set
+ if ((AmtVal & 0x3f) == 0x3f) {
+ SDValue AndOp = N1->getOperand(0);
+
+ // This is the only use, so remove the node
+ if (N1.hasOneUse()) {
+ // Combine the AND away
+ DCI.CombineTo(N1.getNode(), AndOp);
+
+ // Return N so it isn't rechecked
+ return SDValue(N, 0);
+
+ // The node will be reused, so create a new node for this one use
+ } else {
+ SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
+ N->getValueType(0), N->getOperand(0),
+ AndOp);
+ DCI.AddToWorklist(Replace.getNode());
+
+ return Replace;
}
}
}
}
+
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
+ case SystemZISD::MERGE_HIGH:
+ case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
+ case ISD::STORE: return combineSTORE(N, DCI);
+ case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
+ case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
+ case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
+ case ISD::BSWAP: return combineBSWAP(N, DCI);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL: return combineSHIFTROT(N, DCI);
+ }
+
return SDValue();
}
@@ -4831,7 +5141,7 @@ static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
// Split MBB after MI and return the new block (the one that contains
// instructions after MI).
-static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
+static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB) {
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
NewMBB->splice(NewMBB->begin(), MBB,
@@ -4841,7 +5151,7 @@ static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
}
// Split MBB before MI and return the new block (the one that contains MI).
-static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
+static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB) {
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
@@ -4850,34 +5160,36 @@ static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
}
// Force base value Base into a register before MI. Return the register.
-static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
+static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
const SystemZInstrInfo *TII) {
if (Base.isReg())
return Base.getReg();
- MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock *MBB = MI.getParent();
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg)
- .addOperand(Base).addImm(0).addReg(0);
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
+ .addOperand(Base)
+ .addImm(0)
+ .addReg(0);
return Reg;
}
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
MachineBasicBlock *
-SystemZTargetLowering::emitSelect(MachineInstr *MI,
+SystemZTargetLowering::emitSelect(MachineInstr &MI,
MachineBasicBlock *MBB) const {
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
- unsigned DestReg = MI->getOperand(0).getReg();
- unsigned TrueReg = MI->getOperand(1).getReg();
- unsigned FalseReg = MI->getOperand(2).getReg();
- unsigned CCValid = MI->getOperand(3).getImm();
- unsigned CCMask = MI->getOperand(4).getImm();
- DebugLoc DL = MI->getDebugLoc();
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned TrueReg = MI.getOperand(1).getReg();
+ unsigned FalseReg = MI.getOperand(2).getReg();
+ unsigned CCValid = MI.getOperand(3).getImm();
+ unsigned CCMask = MI.getOperand(4).getImm();
+ DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
@@ -4905,7 +5217,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
.addReg(TrueReg).addMBB(StartMBB)
.addReg(FalseReg).addMBB(FalseMBB);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return JoinMBB;
}
@@ -4913,21 +5225,21 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
// StoreOpcode is the store to use and Invert says whether the store should
// happen when the condition is false rather than true. If a STORE ON
// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
-MachineBasicBlock *
-SystemZTargetLowering::emitCondStore(MachineInstr *MI,
- MachineBasicBlock *MBB,
- unsigned StoreOpcode, unsigned STOCOpcode,
- bool Invert) const {
+MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ unsigned StoreOpcode,
+ unsigned STOCOpcode,
+ bool Invert) const {
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
- unsigned SrcReg = MI->getOperand(0).getReg();
- MachineOperand Base = MI->getOperand(1);
- int64_t Disp = MI->getOperand(2).getImm();
- unsigned IndexReg = MI->getOperand(3).getReg();
- unsigned CCValid = MI->getOperand(4).getImm();
- unsigned CCMask = MI->getOperand(5).getImm();
- DebugLoc DL = MI->getDebugLoc();
+ unsigned SrcReg = MI.getOperand(0).getReg();
+ MachineOperand Base = MI.getOperand(1);
+ int64_t Disp = MI.getOperand(2).getImm();
+ unsigned IndexReg = MI.getOperand(3).getReg();
+ unsigned CCValid = MI.getOperand(4).getImm();
+ unsigned CCMask = MI.getOperand(5).getImm();
+ DebugLoc DL = MI.getDebugLoc();
StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
@@ -4940,7 +5252,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
.addReg(SrcReg).addOperand(Base).addImm(Disp)
.addImm(CCValid).addImm(CCMask);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return MBB;
}
@@ -4969,7 +5281,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
.addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
MBB->addSuccessor(JoinMBB);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return JoinMBB;
}
@@ -4980,12 +5292,9 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
// is one of the operands. Invert says whether the field should be
// inverted after performing BinOpcode (e.g. for NAND).
-MachineBasicBlock *
-SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
- MachineBasicBlock *MBB,
- unsigned BinOpcode,
- unsigned BitSize,
- bool Invert) const {
+MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
+ unsigned BitSize, bool Invert) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
@@ -4994,15 +5303,15 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
// Extract the operands. Base can be a register or a frame index.
// Src2 can be a register or immediate.
- unsigned Dest = MI->getOperand(0).getReg();
- MachineOperand Base = earlyUseOperand(MI->getOperand(1));
- int64_t Disp = MI->getOperand(2).getImm();
- MachineOperand Src2 = earlyUseOperand(MI->getOperand(3));
- unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0);
- unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
- DebugLoc DL = MI->getDebugLoc();
+ unsigned Dest = MI.getOperand(0).getReg();
+ MachineOperand Base = earlyUseOperand(MI.getOperand(1));
+ int64_t Disp = MI.getOperand(2).getImm();
+ MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
+ unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
+ unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
+ DebugLoc DL = MI.getDebugLoc();
if (IsSubWord)
- BitSize = MI->getOperand(6).getImm();
+ BitSize = MI.getOperand(6).getImm();
// Subword operations use 32-bit registers.
const TargetRegisterClass *RC = (BitSize <= 32 ?
@@ -5090,7 +5399,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return DoneMBB;
}
@@ -5100,12 +5409,9 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
// minimum or maximum value. KeepOldMask is the BRC condition-code mask
// for when the current field should be kept. BitSize is the width of
// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
-MachineBasicBlock *
-SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
- MachineBasicBlock *MBB,
- unsigned CompareOpcode,
- unsigned KeepOldMask,
- unsigned BitSize) const {
+MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
+ unsigned KeepOldMask, unsigned BitSize) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
@@ -5113,15 +5419,15 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
bool IsSubWord = (BitSize < 32);
// Extract the operands. Base can be a register or a frame index.
- unsigned Dest = MI->getOperand(0).getReg();
- MachineOperand Base = earlyUseOperand(MI->getOperand(1));
- int64_t Disp = MI->getOperand(2).getImm();
- unsigned Src2 = MI->getOperand(3).getReg();
- unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0);
- unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
- DebugLoc DL = MI->getDebugLoc();
+ unsigned Dest = MI.getOperand(0).getReg();
+ MachineOperand Base = earlyUseOperand(MI.getOperand(1));
+ int64_t Disp = MI.getOperand(2).getImm();
+ unsigned Src2 = MI.getOperand(3).getReg();
+ unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
+ unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
+ DebugLoc DL = MI.getDebugLoc();
if (IsSubWord)
- BitSize = MI->getOperand(6).getImm();
+ BitSize = MI.getOperand(6).getImm();
// Subword operations use 32-bit registers.
const TargetRegisterClass *RC = (BitSize <= 32 ?
@@ -5209,30 +5515,31 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return DoneMBB;
}
// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
// instruction MI.
MachineBasicBlock *
-SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
+SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
MachineBasicBlock *MBB) const {
+
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
// Extract the operands. Base can be a register or a frame index.
- unsigned Dest = MI->getOperand(0).getReg();
- MachineOperand Base = earlyUseOperand(MI->getOperand(1));
- int64_t Disp = MI->getOperand(2).getImm();
- unsigned OrigCmpVal = MI->getOperand(3).getReg();
- unsigned OrigSwapVal = MI->getOperand(4).getReg();
- unsigned BitShift = MI->getOperand(5).getReg();
- unsigned NegBitShift = MI->getOperand(6).getReg();
- int64_t BitSize = MI->getOperand(7).getImm();
- DebugLoc DL = MI->getDebugLoc();
+ unsigned Dest = MI.getOperand(0).getReg();
+ MachineOperand Base = earlyUseOperand(MI.getOperand(1));
+ int64_t Disp = MI.getOperand(2).getImm();
+ unsigned OrigCmpVal = MI.getOperand(3).getReg();
+ unsigned OrigSwapVal = MI.getOperand(4).getReg();
+ unsigned BitShift = MI.getOperand(5).getReg();
+ unsigned NegBitShift = MI.getOperand(6).getReg();
+ int64_t BitSize = MI.getOperand(7).getImm();
+ DebugLoc DL = MI.getDebugLoc();
const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
@@ -5323,7 +5630,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return DoneMBB;
}
@@ -5331,18 +5638,18 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
// if the high register of the GR128 value must be cleared or false if
// it's "don't care". SubReg is subreg_l32 when extending a GR32
// and subreg_l64 when extending a GR64.
-MachineBasicBlock *
-SystemZTargetLowering::emitExt128(MachineInstr *MI,
- MachineBasicBlock *MBB,
- bool ClearEven, unsigned SubReg) const {
+MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ bool ClearEven,
+ unsigned SubReg) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
+ unsigned Dest = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
@@ -5359,25 +5666,23 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
.addReg(In128).addReg(Src).addImm(SubReg);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return MBB;
}
-MachineBasicBlock *
-SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
- MachineBasicBlock *MBB,
- unsigned Opcode) const {
+MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
- MachineOperand DestBase = earlyUseOperand(MI->getOperand(0));
- uint64_t DestDisp = MI->getOperand(1).getImm();
- MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2));
- uint64_t SrcDisp = MI->getOperand(3).getImm();
- uint64_t Length = MI->getOperand(4).getImm();
+ MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
+ uint64_t DestDisp = MI.getOperand(1).getImm();
+ MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
+ uint64_t SrcDisp = MI.getOperand(3).getImm();
+ uint64_t Length = MI.getOperand(4).getImm();
// When generating more than one CLC, all but the last will need to
// branch to the end when a difference is found.
@@ -5385,10 +5690,10 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
splitBlockAfter(MI, MBB) : nullptr);
// Check for the loop form, in which operand 5 is the trip count.
- if (MI->getNumExplicitOperands() > 5) {
+ if (MI.getNumExplicitOperands() > 5) {
bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
- uint64_t StartCountReg = MI->getOperand(5).getReg();
+ uint64_t StartCountReg = MI.getOperand(5).getReg();
uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
forceReg(MI, DestBase, TII));
@@ -5491,15 +5796,19 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
// Apply them using LAY if so.
if (!isUInt<12>(DestDisp)) {
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
- .addOperand(DestBase).addImm(DestDisp).addReg(0);
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .addOperand(DestBase)
+ .addImm(DestDisp)
+ .addReg(0);
DestBase = MachineOperand::CreateReg(Reg, false);
DestDisp = 0;
}
if (!isUInt<12>(SrcDisp)) {
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
- .addOperand(SrcBase).addImm(SrcDisp).addReg(0);
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .addOperand(SrcBase)
+ .addImm(SrcDisp)
+ .addReg(0);
SrcBase = MachineOperand::CreateReg(Reg, false);
SrcDisp = 0;
}
@@ -5527,26 +5836,24 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
MBB->addLiveIn(SystemZ::CC);
}
- MI->eraseFromParent();
+ MI.eraseFromParent();
return MBB;
}
// Decompose string pseudo-instruction MI into a loop that continually performs
// Opcode until CC != 3.
-MachineBasicBlock *
-SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
- MachineBasicBlock *MBB,
- unsigned Opcode) const {
+MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
- uint64_t End1Reg = MI->getOperand(0).getReg();
- uint64_t Start1Reg = MI->getOperand(1).getReg();
- uint64_t Start2Reg = MI->getOperand(2).getReg();
- uint64_t CharReg = MI->getOperand(3).getReg();
+ uint64_t End1Reg = MI.getOperand(0).getReg();
+ uint64_t Start1Reg = MI.getOperand(1).getReg();
+ uint64_t Start2Reg = MI.getOperand(2).getReg();
+ uint64_t CharReg = MI.getOperand(3).getReg();
const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
uint64_t This1Reg = MRI.createVirtualRegister(RC);
@@ -5589,26 +5896,24 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
DoneMBB->addLiveIn(SystemZ::CC);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return DoneMBB;
}
// Update TBEGIN instruction with final opcode and register clobbers.
-MachineBasicBlock *
-SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
- MachineBasicBlock *MBB,
- unsigned Opcode,
- bool NoFloat) const {
+MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
+ bool NoFloat) const {
MachineFunction &MF = *MBB->getParent();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
// Update opcode.
- MI->setDesc(TII->get(Opcode));
+ MI.setDesc(TII->get(Opcode));
// We cannot handle a TBEGIN that clobbers the stack or frame pointer.
// Make sure to add the corresponding GRSM bits if they are missing.
- uint64_t Control = MI->getOperand(2).getImm();
+ uint64_t Control = MI.getOperand(2).getImm();
static const unsigned GPRControlBit[16] = {
0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
@@ -5616,13 +5921,13 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
Control |= GPRControlBit[15];
if (TFI->hasFP(MF))
Control |= GPRControlBit[11];
- MI->getOperand(2).setImm(Control);
+ MI.getOperand(2).setImm(Control);
// Add GPR clobbers.
for (int I = 0; I < 16; I++) {
if ((Control & GPRControlBit[I]) == 0) {
unsigned Reg = SystemZMC::GR64Regs[I];
- MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
}
}
@@ -5631,12 +5936,12 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
if (Subtarget.hasVector()) {
for (int I = 0; I < 32; I++) {
unsigned Reg = SystemZMC::VR128Regs[I];
- MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
}
} else {
for (int I = 0; I < 16; I++) {
unsigned Reg = SystemZMC::FP64Regs[I];
- MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
}
}
}
@@ -5644,17 +5949,15 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
return MBB;
}
-MachineBasicBlock *
-SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI,
- MachineBasicBlock *MBB,
- unsigned Opcode) const {
+MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
+ MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo *MRI = &MF.getRegInfo();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
- unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(0).getReg();
// Create new virtual register of the same class as source.
const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
@@ -5664,14 +5967,14 @@ SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI,
// well.
BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
.addReg(SrcReg);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return MBB;
}
-MachineBasicBlock *SystemZTargetLowering::
-EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
- switch (MI->getOpcode()) {
+MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr &MI, MachineBasicBlock *MBB) const {
+ switch (MI.getOpcode()) {
case SystemZ::Select32Mux:
case SystemZ::Select32:
case SystemZ::SelectF32:
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 391636e5467f..b1de8936beed 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -146,6 +146,9 @@ enum NodeType : unsigned {
// Perform a serialization operation. (BCR 15,0 or BCR 14,0.)
SERIALIZE,
+ // Compiler barrier only; generate a no-op.
+ MEMBARRIER,
+
// Transaction begin. The first operand is the chain, the second
// the TDB pointer, and the third the immediate control field.
// Returns chain and glue.
@@ -275,6 +278,12 @@ enum NodeType : unsigned {
VSTRC_CC,
VSTRCZ_CC,
+ // Test Data Class.
+ //
+ // Operand 0: the value to test
+ // Operand 1: the bit mask
+ TDC,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@@ -308,6 +317,19 @@ enum NodeType : unsigned {
// Operand 5: the width of the field in bits (8 or 16)
ATOMIC_CMP_SWAPW,
+ // Byte swapping load.
+ //
+ // Operand 0: the address to load from
+ // Operand 1: the type of load (i16, i32, i64)
+ LRV,
+
+ // Byte swapping store.
+ //
+ // Operand 0: the value to store
+ // Operand 1: the address to store to
+ // Operand 2: the type of store (i16, i32, i64)
+ STRV,
+
// Prefetch from the second operand using the 4-bit control code in
// the first operand. The code is 1 for a load prefetch and 2 for
// a store prefetch.
@@ -423,16 +445,23 @@ public:
return SystemZ::R7D;
}
- MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const
- override;
+ /// Override to support customized stack guard loading.
+ bool useLoadStackGuardNode() const override {
+ return true;
+ }
+ void insertSSPDeclarations(Module &M) const override {
+ }
+
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *BB) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
bool allowTruncateForTailCall(Type *, Type *) const override;
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc DL, SelectionDAG &DAG,
+ const SDLoc &DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
@@ -443,12 +472,20 @@ public:
LLVMContext &Context) const override;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc DL, SelectionDAG &DAG) const override;
- SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL,
+ const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
+ SelectionDAG &DAG) const override;
+ SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL,
SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ ISD::NodeType getExtendForAtomicOps() const override {
+ return ISD::ANY_EXTEND;
+ }
+
+ bool supportSwiftError() const override {
+ return true;
+ }
+
private:
const SystemZSubtarget &Subtarget;
@@ -461,15 +498,19 @@ private:
SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node,
SelectionDAG &DAG, unsigned Opcode,
SDValue GOTOffset) const;
+ SDValue lowerThreadPointer(const SDLoc &DL, SelectionDAG &DAG) const;
SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const;
SDValue lowerBlockAddress(BlockAddressSDNode *Node,
SelectionDAG &DAG) const;
SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const;
SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
+ SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
@@ -477,6 +518,7 @@ private:
SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
@@ -498,11 +540,19 @@ private:
unsigned UnpackHigh) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
- SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
+ SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
unsigned Index, DAGCombinerInfo &DCI,
bool Force) const;
- SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
+ SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op,
DAGCombinerInfo &DCI) const;
+ SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -514,40 +564,33 @@ private:
MachineBasicBlock *Target) const;
// Implement EmitInstrWithCustomInserter for individual operation types.
- MachineBasicBlock *emitSelect(MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *emitCondStore(MachineInstr *MI,
- MachineBasicBlock *BB,
+ MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const;
- MachineBasicBlock *emitExt128(MachineInstr *MI,
- MachineBasicBlock *MBB,
+ MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB,
bool ClearEven, unsigned SubReg) const;
- MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI,
+ MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI,
MachineBasicBlock *BB,
unsigned BinOpcode, unsigned BitSize,
bool Invert = false) const;
- MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI,
+ MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr &MI,
MachineBasicBlock *MBB,
unsigned CompareOpcode,
unsigned KeepOldMask,
unsigned BitSize) const;
- MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
+ MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr &MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI,
- MachineBasicBlock *BB,
+ MachineBasicBlock *emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *BB,
unsigned Opcode) const;
- MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
- MachineBasicBlock *BB,
+ MachineBasicBlock *emitStringWrapper(MachineInstr &MI, MachineBasicBlock *BB,
unsigned Opcode) const;
- MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
+ MachineBasicBlock *emitTransactionBegin(MachineInstr &MI,
MachineBasicBlock *MBB,
- unsigned Opcode,
- bool NoFloat) const;
- MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr *MI,
+ unsigned Opcode, bool NoFloat) const;
+ MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
MachineBasicBlock *MBB,
unsigned Opcode) const;
-
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index 5a1c874dfa36..2cb8aba1b322 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -29,7 +29,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) {
MachineFunction &MF = *MI->getParent()->getParent();
MachineFrameInfo *MFFrame = MF.getFrameInfo();
const MCInstrDesc &MCID = MI->getDesc();
- unsigned Flags = 0;
+ auto Flags = MachineMemOperand::MONone;
if (MCID.mayLoad())
Flags |= MachineMemOperand::MOLoad;
if (MCID.mayStore())
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 0cb267290cc1..8b32047e08e3 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -37,6 +37,10 @@ let hasSideEffects = 0 in {
def LER : UnaryRR <"le", 0x38, null_frag, FP32, FP32>;
def LDR : UnaryRR <"ld", 0x28, null_frag, FP64, FP64>;
def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>;
+
+ // For z13 we prefer LDR over LER to avoid partial register dependencies.
+ let isCodeGenOnly = 1 in
+ def LDR32 : UnaryRR<"ld", 0x28, null_frag, FP32, FP32>;
}
// Moves between two floating-point registers that also set the condition
@@ -443,6 +447,13 @@ let Defs = [CC], CCValues = 0xF in {
def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>;
}
+// Test Data Class.
+let Defs = [CC], CCValues = 0xC in {
+ def TCEB : TestRXE<"tceb", 0xED10, z_tdc, FP32>;
+ def TCDB : TestRXE<"tcdb", 0xED11, z_tdc, FP64>;
+ def TCXB : TestRXE<"tcxb", 0xED12, z_tdc, FP128>;
+}
+
//===----------------------------------------------------------------------===//
// Peepholes
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 01f4cdec05cb..973894d5c001 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -158,6 +158,17 @@ def getThreeOperandOpcode : InstrMapping {
//
//===----------------------------------------------------------------------===//
+class InstI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<2, outs, ins, asmstr, pattern> {
+ field bits<16> Inst;
+ field bits<16> SoftFail = 0;
+
+ bits<8> I1;
+
+ let Inst{15-8} = op;
+ let Inst{7-0} = I1;
+}
+
class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<4, outs, ins, asmstr, pattern> {
field bits<32> Inst;
@@ -172,6 +183,24 @@ class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{15-0} = I2;
}
+class InstRIEa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<16> I2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = 0;
+ let Inst{31-16} = I2;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
class InstRIEb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
@@ -260,6 +289,24 @@ class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{31-0} = I2;
}
+class InstRIS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<8> I2;
+ bits<4> M3;
+ bits<16> BD4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = M3;
+ let Inst{31-16} = BD4;
+ let Inst{15-8} = I2;
+ let Inst{7-0} = op{7-0};
+}
+
class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<2, outs, ins, asmstr, pattern> {
field bits<16> Inst;
@@ -320,6 +367,41 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{3-0} = R2;
}
+class InstRRFc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> M3;
+
+ let Inst{31-16} = op;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = 0;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRRS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> M3;
+ bits<16> BD4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R2;
+ let Inst{31-16} = BD4;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<4, outs, ins, asmstr, pattern> {
field bits<32> Inst;
@@ -919,6 +1001,10 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
// Compare:
// Two input operands and an implicit CC output operand.
//
+// Test:
+// Two input operands and an implicit CC output operand. The second
+// input operand is an "address" operand used as a test class mask.
+//
// Ternary:
// One register output operand and three input operands.
//
@@ -974,12 +1060,30 @@ class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>
let DisableEncoding = "$R1src";
}
-class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
- : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins bdaddr20only:$BD2),
+class LoadMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+ AddressingMode mode = bdaddr12only>
+ : InstRS<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
+ let mayLoad = 1;
+}
+
+class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ AddressingMode mode = bdaddr20only>
+ : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let mayLoad = 1;
}
+multiclass LoadMultipleRSPair<string mnemonic, bits<8> rsOpcode,
+ bits<16> rsyOpcode, RegisterOperand cls> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : LoadMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : LoadMultipleRSY<mnemonic#"y", rsyOpcode, cls, bdaddr20pair>;
+ }
+}
+
class LoadMultipleVRSa<string mnemonic, bits<16> opcode>
: InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2),
mnemonic#"\t$V1, $V3, $BD2", []> {
@@ -1055,12 +1159,30 @@ class StoreLengthVRSb<string mnemonic, bits<16> opcode,
let AccessBytes = bytes;
}
-class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
- : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2),
+class StoreMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+ AddressingMode mode = bdaddr12only>
+ : InstRS<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
+ let mayStore = 1;
+}
+
+class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ AddressingMode mode = bdaddr20only>
+ : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let mayStore = 1;
}
+multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode,
+ bits<16> rsyOpcode, RegisterOperand cls> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : StoreMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : StoreMultipleRSY<mnemonic#"y", rsyOpcode, cls, bdaddr20pair>;
+ }
+}
+
class StoreMultipleVRSa<string mnemonic, bits<16> opcode>
: InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2),
mnemonic#"\t$V1, $V3, $BD2", []> {
@@ -1186,6 +1308,15 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let R4 = 0;
}
+class CondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ Immediate imm>
+ : InstRIEd<opcode, (outs cls:$R1),
+ (ins imm:$I2, cond4:$valid, cond4:$R3),
+ mnemonic#"$R3\t$R1, $I2", []>,
+ Requires<[FeatureLoadStoreOnCond2]> {
+ let CCMaskLast = 1;
+}
+
// Like CondUnaryRRF, but used for the raw assembly form. The condition-code
// mask is the third operand rather than being part of the mnemonic.
class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
@@ -1198,6 +1329,16 @@ class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let R4 = 0;
}
+class AsmCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ Immediate imm>
+ : InstRIEd<opcode, (outs cls:$R1),
+ (ins cls:$R1src, imm:$I2, imm32zx4:$R3),
+ mnemonic#"\t$R1, $I2, $R3", []>,
+ Requires<[FeatureLoadStoreOnCond2]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
// Like CondUnaryRRF, but with a fixed CC mask.
class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2, bits<4> ccmask>
@@ -1210,6 +1351,17 @@ class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let R4 = 0;
}
+class FixedCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ Immediate imm, bits<4> ccmask>
+ : InstRIEd<opcode, (outs cls:$R1),
+ (ins cls:$R1src, imm:$I2),
+ mnemonic#"\t$R1, $I2", []>,
+ Requires<[FeatureLoadStoreOnCond2]> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let R3 = ccmask;
+}
+
class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRI<opcode, (outs cls:$R1), (ins imm:$I2),
@@ -1391,9 +1543,9 @@ class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
- : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R3, cls2:$R2),
+ : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3),
mnemonic#"r\t$R1, $R3, $R2",
- [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> {
+ [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> {
let OpKey = mnemonic ## cls1;
let OpType = "reg";
let R4 = 0;
@@ -1874,6 +2026,14 @@ class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M5 = 0;
}
+class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $XBD2",
+ [(operator cls:$R1, bdxaddr12only:$XBD2)]> {
+ let M3 = 0;
+}
+
class TernaryRRD<string mnemonic, bits<16> opcode,
SDPatternOperator operator, RegisterOperand cls>
: InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2),
@@ -1885,6 +2045,40 @@ class TernaryRRD<string mnemonic, bits<16> opcode,
let DisableEncoding = "$R1src";
}
+class TernaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr12only>
+ : InstRS<opcode, (outs cls:$R1),
+ (ins cls:$R1src, imm32zx4:$R3, mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
+
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdaddr20only>
+ : InstRSY<opcode, (outs cls:$R1),
+ (ins cls:$R1src, imm32zx4:$R3, mode:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []> {
+
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
+ RegisterOperand cls, bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ let DispSize = "12" in
+ def "" : TernaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
+ let DispSize = "20" in
+ def Y : TernaryRSY<mnemonic#"y", rsyOpcode, cls, bytes, bdaddr20pair>;
+ }
+}
+
class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
: InstRXF<opcode, (outs cls:$R1),
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index e6b5fc8e6235..4084e93e5acb 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -15,6 +15,7 @@
#include "SystemZInstrBuilder.h"
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
@@ -54,7 +55,7 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
// Get two load or store instructions. Use the original instruction for one
// of them (arbitrarily the second here) and create a clone for the other.
- MachineInstr *EarlierMI = MF.CloneMachineInstr(MI);
+ MachineInstr *EarlierMI = MF.CloneMachineInstr(&*MI);
MBB->insert(MI, EarlierMI);
// Set up the two 64-bit registers.
@@ -69,8 +70,8 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
MachineOperand &LowOffsetOp = MI->getOperand(2);
LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
- // Clear the kill flags for the base and index registers in the first
- // instruction.
+ // Clear the kill flags for the base and index registers in the first
+ // instruction.
EarlierMI->getOperand(1).setIsKill(false);
EarlierMI->getOperand(3).setIsKill(false);
@@ -105,59 +106,89 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
// and HighOpcode takes an unsigned 32-bit operand. In those cases,
// MI has the same kind of operand as LowOpcode, so needs to be converted
// if HighOpcode is used.
-void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
+void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode,
bool ConvertHigh) const {
- unsigned Reg = MI->getOperand(0).getReg();
+ unsigned Reg = MI.getOperand(0).getReg();
bool IsHigh = isHighReg(Reg);
- MI->setDesc(get(IsHigh ? HighOpcode : LowOpcode));
+ MI.setDesc(get(IsHigh ? HighOpcode : LowOpcode));
if (IsHigh && ConvertHigh)
- MI->getOperand(1).setImm(uint32_t(MI->getOperand(1).getImm()));
+ MI.getOperand(1).setImm(uint32_t(MI.getOperand(1).getImm()));
}
// MI is a three-operand RIE-style pseudo instruction. Replace it with
// LowOpcodeK if the registers are both low GR32s, otherwise use a move
// followed by HighOpcode or LowOpcode, depending on whether the target
// is a high or low GR32.
-void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
+void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned LowOpcodeK,
unsigned HighOpcode) const {
- unsigned DestReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
bool DestIsHigh = isHighReg(DestReg);
bool SrcIsHigh = isHighReg(SrcReg);
if (!DestIsHigh && !SrcIsHigh)
- MI->setDesc(get(LowOpcodeK));
+ MI.setDesc(get(LowOpcodeK));
else {
- emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(),
- DestReg, SrcReg, SystemZ::LR, 32,
- MI->getOperand(1).isKill());
- MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
- MI->getOperand(1).setReg(DestReg);
- MI->tieOperands(0, 1);
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg,
+ SystemZ::LR, 32, MI.getOperand(1).isKill());
+ MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
+ MI.getOperand(1).setReg(DestReg);
+ MI.tieOperands(0, 1);
}
}
// MI is an RXY-style pseudo instruction. Replace it with LowOpcode
// if the first operand is a low GR32 and HighOpcode if the first operand
// is a high GR32.
-void SystemZInstrInfo::expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
+void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const {
- unsigned Reg = MI->getOperand(0).getReg();
+ unsigned Reg = MI.getOperand(0).getReg();
unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode,
- MI->getOperand(2).getImm());
- MI->setDesc(get(Opcode));
+ MI.getOperand(2).getImm());
+ MI.setDesc(get(Opcode));
}
// MI is an RR-style pseudo instruction that zero-extends the low Size bits
// of one GRX32 into another. Replace it with LowOpcode if both operands
// are low registers, otherwise use RISB[LH]G.
-void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
+void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const {
- emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(),
- MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
- LowOpcode, Size, MI->getOperand(1).isKill());
- MI->eraseFromParent();
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(),
+ MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode,
+ Size, MI.getOperand(1).isKill());
+ MI.eraseFromParent();
+}
+
+void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const unsigned Reg = MI->getOperand(0).getReg();
+
+ // Conveniently, all 4 instructions are cloned from LOAD_STACK_GUARD,
+ // so they already have operand 0 set to reg.
+
+ // ear <reg>, %a0
+ MachineInstr *Ear1MI = MF.CloneMachineInstr(MI);
+ MBB->insert(MI, Ear1MI);
+ Ear1MI->setDesc(get(SystemZ::EAR));
+ MachineInstrBuilder(MF, Ear1MI).addImm(0);
+
+ // sllg <reg>, <reg>, 32
+ MachineInstr *SllgMI = MF.CloneMachineInstr(MI);
+ MBB->insert(MI, SllgMI);
+ SllgMI->setDesc(get(SystemZ::SLLG));
+ MachineInstrBuilder(MF, SllgMI).addReg(Reg).addReg(0).addImm(32);
+
+ // ear <reg>, %a1
+ MachineInstr *Ear2MI = MF.CloneMachineInstr(MI);
+ MBB->insert(MI, Ear2MI);
+ Ear2MI->setDesc(get(SystemZ::EAR));
+ MachineInstrBuilder(MF, Ear2MI).addImm(1);
+
+ // lg <reg>, 40(<reg>)
+ MI->setDesc(get(SystemZ::LG));
+ MachineInstrBuilder(MF, MI).addReg(Reg).addImm(40).addReg(0);
}
// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR
@@ -167,7 +198,7 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
// KillSrc is true if this move is the last use of SrcReg.
void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc DL, unsigned DestReg,
+ const DebugLoc &DL, unsigned DestReg,
unsigned SrcReg, unsigned LowLowOpcode,
unsigned Size, bool KillSrc) const {
unsigned Opcode;
@@ -196,45 +227,41 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
// Return 0 otherwise.
//
// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
-static int isSimpleMove(const MachineInstr *MI, int &FrameIndex,
+static int isSimpleMove(const MachineInstr &MI, int &FrameIndex,
unsigned Flag) {
- const MCInstrDesc &MCID = MI->getDesc();
- if ((MCID.TSFlags & Flag) &&
- MI->getOperand(1).isFI() &&
- MI->getOperand(2).getImm() == 0 &&
- MI->getOperand(3).getReg() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ const MCInstrDesc &MCID = MI.getDesc();
+ if ((MCID.TSFlags & Flag) && MI.getOperand(1).isFI() &&
+ MI.getOperand(2).getImm() == 0 && MI.getOperand(3).getReg() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
return 0;
}
-unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad);
}
-unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore);
}
-bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI,
+bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr &MI,
int &DestFrameIndex,
int &SrcFrameIndex) const {
// Check for MVC 0(Length,FI1),0(FI2)
- const MachineFrameInfo *MFI = MI->getParent()->getParent()->getFrameInfo();
- if (MI->getOpcode() != SystemZ::MVC ||
- !MI->getOperand(0).isFI() ||
- MI->getOperand(1).getImm() != 0 ||
- !MI->getOperand(3).isFI() ||
- MI->getOperand(4).getImm() != 0)
+ const MachineFrameInfo *MFI = MI.getParent()->getParent()->getFrameInfo();
+ if (MI.getOpcode() != SystemZ::MVC || !MI.getOperand(0).isFI() ||
+ MI.getOperand(1).getImm() != 0 || !MI.getOperand(3).isFI() ||
+ MI.getOperand(4).getImm() != 0)
return false;
// Check that Length covers the full slots.
- int64_t Length = MI->getOperand(2).getImm();
- unsigned FI1 = MI->getOperand(0).getIndex();
- unsigned FI2 = MI->getOperand(3).getIndex();
+ int64_t Length = MI.getOperand(2).getImm();
+ unsigned FI1 = MI.getOperand(0).getIndex();
+ unsigned FI2 = MI.getOperand(3).getIndex();
if (MFI->getObjectSize(FI1) != Length ||
MFI->getObjectSize(FI2) != Length)
return false;
@@ -244,7 +271,7 @@ bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI,
return true;
}
-bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -261,7 +288,7 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Working from the bottom, when we see a non-terminator instruction, we're
// done.
- if (!isUnpredicatedTerminator(I))
+ if (!isUnpredicatedTerminator(*I))
break;
// A terminator that isn't a branch can't easily be handled by this
@@ -270,7 +297,7 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
// Can't handle indirect branches.
- SystemZII::Branch Branch(getBranchInfo(I));
+ SystemZII::Branch Branch(getBranchInfo(*I));
if (!Branch.Target->isMBB())
return true;
@@ -347,7 +374,7 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
continue;
if (!I->isBranch())
break;
- if (!getBranchInfo(I).Target->isMBB())
+ if (!getBranchInfo(*I).Target->isMBB())
break;
// Remove the branch.
I->eraseFromParent();
@@ -365,11 +392,11 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
-unsigned
-SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- ArrayRef<MachineOperand> Cond,
- DebugLoc DL) const {
+unsigned SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond,
+ const DebugLoc &DL) const {
// In this function we output 32-bit branches, which should always
// have enough range. They can be shortened and relaxed by later code
// in the pipeline, if desired.
@@ -402,17 +429,16 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
return Count;
}
-bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
- unsigned &SrcReg, unsigned &SrcReg2,
- int &Mask, int &Value) const {
- assert(MI->isCompare() && "Caller should have checked for a comparison");
+bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &Mask,
+ int &Value) const {
+ assert(MI.isCompare() && "Caller should have checked for a comparison");
- if (MI->getNumExplicitOperands() == 2 &&
- MI->getOperand(0).isReg() &&
- MI->getOperand(1).isImm()) {
- SrcReg = MI->getOperand(0).getReg();
+ if (MI.getNumExplicitOperands() == 2 && MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isImm()) {
+ SrcReg = MI.getOperand(0).getReg();
SrcReg2 = 0;
- Value = MI->getOperand(1).getImm();
+ Value = MI.getOperand(1).getImm();
Mask = ~0;
return true;
}
@@ -445,7 +471,7 @@ static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
// the result of an IPM sequence whose input CC survives until Compare,
// and whether Compare is therefore redundant. Delete it and return
// true if so.
-static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
+static bool removeIPMBasedCompare(MachineInstr &Compare, unsigned SrcReg,
const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI) {
MachineInstr *LGFR = nullptr;
@@ -466,16 +492,16 @@ static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
return false;
// Check that there are no assignments to CC between the IPM and Compare,
- if (IPM->getParent() != Compare->getParent())
+ if (IPM->getParent() != Compare.getParent())
return false;
- MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare;
+ MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare.getIterator();
for (++MBBI; MBBI != MBBE; ++MBBI) {
- MachineInstr *MI = MBBI;
- if (MI->modifiesRegister(SystemZ::CC, TRI))
+ MachineInstr &MI = *MBBI;
+ if (MI.modifiesRegister(SystemZ::CC, TRI))
return false;
}
- Compare->eraseFromParent();
+ Compare.eraseFromParent();
if (LGFR)
eraseIfDead(LGFR, MRI);
eraseIfDead(RLL, MRI);
@@ -485,13 +511,11 @@ static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
return true;
}
-bool
-SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
- unsigned SrcReg, unsigned SrcReg2,
- int Mask, int Value,
- const MachineRegisterInfo *MRI) const {
+bool SystemZInstrInfo::optimizeCompareInstr(
+ MachineInstr &Compare, unsigned SrcReg, unsigned SrcReg2, int Mask,
+ int Value, const MachineRegisterInfo *MRI) const {
assert(!SrcReg2 && "Only optimizing constant comparisons so far");
- bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
+ bool IsLogical = (Compare.getDesc().TSFlags & SystemZII::IsLogical) != 0;
return Value == 0 && !IsLogical &&
removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
}
@@ -506,15 +530,43 @@ static unsigned getConditionalMove(unsigned Opcode) {
}
}
-bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const {
- unsigned Opcode = MI->getOpcode();
- return STI.hasLoadStoreOnCond() && getConditionalMove(Opcode);
+static unsigned getConditionalLoadImmediate(unsigned Opcode) {
+ switch (Opcode) {
+ case SystemZ::LHI: return SystemZ::LOCHI;
+ case SystemZ::LGHI: return SystemZ::LOCGHI;
+ default: return 0;
+ }
+}
+
+bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ if (STI.hasLoadStoreOnCond() && getConditionalMove(Opcode))
+ return true;
+ if (STI.hasLoadStoreOnCond2() && getConditionalLoadImmediate(Opcode))
+ return true;
+ if (Opcode == SystemZ::Return ||
+ Opcode == SystemZ::Trap ||
+ Opcode == SystemZ::CallJG ||
+ Opcode == SystemZ::CallBR)
+ return true;
+ return false;
}
bool SystemZInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
BranchProbability Probability) const {
+ // Avoid using conditional returns at the end of a loop (since then
+ // we'd need to emit an unconditional branch to the beginning anyway,
+ // making the loop body longer). This doesn't apply for low-probability
+ // loops (eg. compare-and-swap retry), so just decide based on branch
+ // probability instead of looping structure.
+ // However, since Compare and Trap instructions cost the same as a regular
+ // Compare instruction, we should allow the if conversion to convert this
+ // into a Conditional Compare regardless of the branch probability.
+ if (MBB.getLastNonDebugInstr()->getOpcode() != SystemZ::Trap &&
+ MBB.succ_empty() && Probability < BranchProbability(1, 8))
+ return false;
// For now only convert single instructions.
return NumCycles == 1;
}
@@ -530,27 +582,82 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
}
bool SystemZInstrInfo::
-PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
+isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const {
+ // For now only duplicate single instructions.
+ return NumCycles == 1;
+}
+
+bool SystemZInstrInfo::PredicateInstruction(
+ MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
assert(Pred.size() == 2 && "Invalid condition");
unsigned CCValid = Pred[0].getImm();
unsigned CCMask = Pred[1].getImm();
assert(CCMask > 0 && CCMask < 15 && "Invalid predicate");
- unsigned Opcode = MI->getOpcode();
+ unsigned Opcode = MI.getOpcode();
if (STI.hasLoadStoreOnCond()) {
if (unsigned CondOpcode = getConditionalMove(Opcode)) {
- MI->setDesc(get(CondOpcode));
- MachineInstrBuilder(*MI->getParent()->getParent(), MI)
- .addImm(CCValid).addImm(CCMask)
- .addReg(SystemZ::CC, RegState::Implicit);
+ MI.setDesc(get(CondOpcode));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(CCValid)
+ .addImm(CCMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
return true;
}
}
+ if (STI.hasLoadStoreOnCond2()) {
+ if (unsigned CondOpcode = getConditionalLoadImmediate(Opcode)) {
+ MI.setDesc(get(CondOpcode));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(CCValid)
+ .addImm(CCMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
+ return true;
+ }
+ }
+ if (Opcode == SystemZ::Trap) {
+ MI.setDesc(get(SystemZ::CondTrap));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(CCValid).addImm(CCMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
+ return true;
+ }
+ if (Opcode == SystemZ::Return) {
+ MI.setDesc(get(SystemZ::CondReturn));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(CCValid).addImm(CCMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
+ return true;
+ }
+ if (Opcode == SystemZ::CallJG) {
+ MachineOperand FirstOp = MI.getOperand(0);
+ const uint32_t *RegMask = MI.getOperand(1).getRegMask();
+ MI.RemoveOperand(1);
+ MI.RemoveOperand(0);
+ MI.setDesc(get(SystemZ::CallBRCL));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(CCValid).addImm(CCMask)
+ .addOperand(FirstOp)
+ .addRegMask(RegMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
+ return true;
+ }
+ if (Opcode == SystemZ::CallBR) {
+ const uint32_t *RegMask = MI.getOperand(0).getRegMask();
+ MI.RemoveOperand(0);
+ MI.setDesc(get(SystemZ::CallBCR));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(CCValid).addImm(CCMask)
+ .addRegMask(RegMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
+ return true;
+ }
return false;
}
void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc DL, unsigned DestReg,
+ const DebugLoc &DL, unsigned DestReg,
unsigned SrcReg, bool KillSrc) const {
// Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too.
if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
@@ -571,7 +678,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LGR;
else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
- Opcode = SystemZ::LER;
+ // For z13 we prefer LDR over LER to avoid partial register dependencies.
+ Opcode = STI.hasVector() ? SystemZ::LDR32 : SystemZ::LER;
else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LDR;
else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
@@ -654,6 +762,14 @@ static LogicOp interpretAndImmediate(unsigned Opcode) {
}
}
+static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) {
+ if (OldMI->registerDefIsDead(SystemZ::CC)) {
+ MachineOperand *CCDef = NewMI->findRegisterDefOperand(SystemZ::CC);
+ if (CCDef != nullptr)
+ CCDef->setIsDead(true);
+ }
+}
+
// Used to return from convertToThreeAddress after replacing two-address
// instruction OldMI with three-address instruction NewMI.
static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
@@ -664,31 +780,29 @@ static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
for (unsigned I = 1; I < NumOps; ++I) {
MachineOperand &Op = OldMI->getOperand(I);
if (Op.isReg() && Op.isKill())
- LV->replaceKillInstruction(Op.getReg(), OldMI, NewMI);
+ LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI);
}
}
+ transferDeadCC(OldMI, NewMI);
return NewMI;
}
-MachineInstr *
-SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineBasicBlock::iterator &MBBI,
- LiveVariables *LV) const {
- MachineInstr *MI = MBBI;
- MachineBasicBlock *MBB = MI->getParent();
+MachineInstr *SystemZInstrInfo::convertToThreeAddress(
+ MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
+ MachineBasicBlock *MBB = MI.getParent();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned Opcode = MI->getOpcode();
- unsigned NumOps = MI->getNumOperands();
+ unsigned Opcode = MI.getOpcode();
+ unsigned NumOps = MI.getNumOperands();
// Try to convert something like SLL into SLLK, if supported.
// We prefer to keep the two-operand form where possible both
// because it tends to be shorter and because some instructions
// have memory forms that can be used during spilling.
if (STI.hasDistinctOps()) {
- MachineOperand &Dest = MI->getOperand(0);
- MachineOperand &Src = MI->getOperand(1);
+ MachineOperand &Dest = MI.getOperand(0);
+ MachineOperand &Src = MI.getOperand(1);
unsigned DestReg = Dest.getReg();
unsigned SrcReg = Src.getReg();
// AHIMux is only really a three-operand instruction when both operands
@@ -707,23 +821,23 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// Create three address instruction without adding the implicit
// operands. Those will instead be copied over from the original
// instruction by the loop below.
- MachineInstrBuilder MIB(*MF,
- MF->CreateMachineInstr(get(ThreeOperandOpcode),
- MI->getDebugLoc(), /*NoImplicit=*/true));
+ MachineInstrBuilder MIB(
+ *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
+ /*NoImplicit=*/true));
MIB.addOperand(Dest);
// Keep the kill state, but drop the tied flag.
MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
// Keep the remaining operands as-is.
for (unsigned I = 2; I < NumOps; ++I)
- MIB.addOperand(MI->getOperand(I));
+ MIB.addOperand(MI.getOperand(I));
MBB->insert(MI, MIB);
- return finishConvertToThreeAddress(MI, MIB, LV);
+ return finishConvertToThreeAddress(&MI, MIB, LV);
}
}
// Try to convert an AND into an RISBG-type instruction.
if (LogicOp And = interpretAndImmediate(Opcode)) {
- uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB;
+ uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB;
// AND IMMEDIATE leaves the other bits of the register unchanged.
Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
unsigned Start, End;
@@ -739,36 +853,55 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
Start &= 31;
End &= 31;
}
- MachineOperand &Dest = MI->getOperand(0);
- MachineOperand &Src = MI->getOperand(1);
+ MachineOperand &Dest = MI.getOperand(0);
+ MachineOperand &Src = MI.getOperand(1);
MachineInstrBuilder MIB =
- BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode))
- .addOperand(Dest).addReg(0)
- .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg())
- .addImm(Start).addImm(End + 128).addImm(0);
- return finishConvertToThreeAddress(MI, MIB, LV);
+ BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpcode))
+ .addOperand(Dest)
+ .addReg(0)
+ .addReg(Src.getReg(), getKillRegState(Src.isKill()),
+ Src.getSubReg())
+ .addImm(Start)
+ .addImm(End + 128)
+ .addImm(0);
+ return finishConvertToThreeAddress(&MI, MIB, LV);
}
}
return nullptr;
}
MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
- MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex,
+ LiveIntervals *LIS) const {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
- unsigned Opcode = MI->getOpcode();
+ unsigned Opcode = MI.getOpcode();
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
- if ((Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
- isInt<8>(MI->getOperand(2).getImm()) &&
- !MI->getOperand(3).getReg()) {
- // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
- return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
- get(SystemZ::AGSI))
- .addFrameIndex(FrameIndex)
- .addImm(0)
- .addImm(MI->getOperand(2).getImm());
+ if (LIS != nullptr && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
+ isInt<8>(MI.getOperand(2).getImm()) && !MI.getOperand(3).getReg()) {
+
+ // Check CC liveness, since new instruction introduces a dead
+ // def of CC.
+ MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
+ LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit);
+ ++CCUnit;
+ assert (!CCUnit.isValid() && "CC only has one reg unit.");
+ SlotIndex MISlot =
+ LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
+ if (!CCLiveRange.liveAt(MISlot)) {
+ // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
+ MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt,
+ MI.getDebugLoc(), get(SystemZ::AGSI))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(MI.getOperand(2).getImm());
+ BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true);
+ CCLiveRange.createDeadDef(MISlot, LIS->getVNInfoAllocator());
+ return BuiltMI;
+ }
}
return nullptr;
}
@@ -778,20 +911,23 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
return nullptr;
unsigned OpNum = Ops[0];
- assert(Size == MF.getRegInfo()
- .getRegClass(MI->getOperand(OpNum).getReg())->getSize() &&
+ assert(Size ==
+ MF.getRegInfo()
+ .getRegClass(MI.getOperand(OpNum).getReg())
+ ->getSize() &&
"Invalid size combination");
- if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) &&
- OpNum == 0 &&
- isInt<8>(MI->getOperand(2).getImm())) {
+ if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && OpNum == 0 &&
+ isInt<8>(MI.getOperand(2).getImm())) {
// A(G)HI %reg, CONST -> A(G)SI %mem, CONST
Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI);
- return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
- get(Opcode))
- .addFrameIndex(FrameIndex)
- .addImm(0)
- .addImm(MI->getOperand(2).getImm());
+ MachineInstr *BuiltMI =
+ BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(Opcode))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(MI.getOperand(2).getImm());
+ transferDeadCC(&MI, BuiltMI);
+ return BuiltMI;
}
if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
@@ -801,9 +937,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
// source register instead.
if (OpNum == 0) {
unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD;
- return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
get(StoreOpcode))
- .addOperand(MI->getOperand(1))
+ .addOperand(MI.getOperand(1))
.addFrameIndex(FrameIndex)
.addImm(0)
.addReg(0);
@@ -812,8 +948,8 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
// destination register instead.
if (OpNum == 1) {
unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD;
- unsigned Dest = MI->getOperand(0).getReg();
- return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+ unsigned Dest = MI.getOperand(0).getReg();
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
get(LoadOpcode), Dest)
.addFrameIndex(FrameIndex)
.addImm(0)
@@ -834,26 +970,26 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
// might be equal. We don't worry about that case here, because spill slot
// coloring happens later, and because we have special code to remove
// MVCs that turn out to be redundant.
- if (OpNum == 0 && MI->hasOneMemOperand()) {
- MachineMemOperand *MMO = *MI->memoperands_begin();
+ if (OpNum == 0 && MI.hasOneMemOperand()) {
+ MachineMemOperand *MMO = *MI.memoperands_begin();
if (MMO->getSize() == Size && !MMO->isVolatile()) {
// Handle conversion of loads.
- if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) {
- return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+ if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXLoad)) {
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
get(SystemZ::MVC))
.addFrameIndex(FrameIndex)
.addImm(0)
.addImm(Size)
- .addOperand(MI->getOperand(1))
- .addImm(MI->getOperand(2).getImm())
+ .addOperand(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm())
.addMemOperand(MMO);
}
// Handle conversion of stores.
- if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) {
- return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+ if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXStore)) {
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
get(SystemZ::MVC))
- .addOperand(MI->getOperand(1))
- .addImm(MI->getOperand(2).getImm())
+ .addOperand(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm())
.addImm(Size)
.addFrameIndex(FrameIndex)
.addImm(0)
@@ -866,7 +1002,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
// into <INSN>.
int MemOpcode = SystemZ::getMemOpcode(Opcode);
if (MemOpcode >= 0) {
- unsigned NumOps = MI->getNumExplicitOperands();
+ unsigned NumOps = MI.getNumExplicitOperands();
if (OpNum == NumOps - 1) {
const MCInstrDesc &MemDesc = get(MemOpcode);
uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
@@ -874,12 +1010,13 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
assert(AccessBytes <= Size && "Access outside the frame index");
uint64_t Offset = Size - AccessBytes;
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
- MI->getDebugLoc(), get(MemOpcode));
+ MI.getDebugLoc(), get(MemOpcode));
for (unsigned I = 0; I < OpNum; ++I)
- MIB.addOperand(MI->getOperand(I));
+ MIB.addOperand(MI.getOperand(I));
MIB.addFrameIndex(FrameIndex).addImm(Offset);
if (MemDesc.TSFlags & SystemZII::HasIndex)
MIB.addReg(0);
+ transferDeadCC(&MI, MIB);
return MIB;
}
}
@@ -888,14 +1025,14 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
}
MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
- MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+ LiveIntervals *LIS) const {
return nullptr;
}
-bool
-SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- switch (MI->getOpcode()) {
+bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
case SystemZ::L128:
splitMove(MI, SystemZ::LG);
return true;
@@ -1033,13 +1170,13 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return true;
case SystemZ::RISBMux: {
- bool DestIsHigh = isHighReg(MI->getOperand(0).getReg());
- bool SrcIsHigh = isHighReg(MI->getOperand(2).getReg());
+ bool DestIsHigh = isHighReg(MI.getOperand(0).getReg());
+ bool SrcIsHigh = isHighReg(MI.getOperand(2).getReg());
if (SrcIsHigh == DestIsHigh)
- MI->setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL));
+ MI.setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL));
else {
- MI->setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH));
- MI->getOperand(5).setImm(MI->getOperand(5).getImm() ^ 32);
+ MI.setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH));
+ MI.getOperand(5).setImm(MI.getOperand(5).getImm() ^ 32);
}
return true;
}
@@ -1048,62 +1185,65 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
splitAdjDynAlloc(MI);
return true;
+ case TargetOpcode::LOAD_STACK_GUARD:
+ expandLoadStackGuard(&MI);
+ return true;
+
default:
return false;
}
}
-uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr *MI) const {
- if (MI->getOpcode() == TargetOpcode::INLINEASM) {
- const MachineFunction *MF = MI->getParent()->getParent();
- const char *AsmStr = MI->getOperand(0).getSymbolName();
+uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+ if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const char *AsmStr = MI.getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
- return MI->getDesc().getSize();
+ return MI.getDesc().getSize();
}
SystemZII::Branch
-SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
case SystemZ::BR:
case SystemZ::J:
case SystemZ::JG:
return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY,
- SystemZ::CCMASK_ANY, &MI->getOperand(0));
+ SystemZ::CCMASK_ANY, &MI.getOperand(0));
case SystemZ::BRC:
case SystemZ::BRCL:
- return SystemZII::Branch(SystemZII::BranchNormal,
- MI->getOperand(0).getImm(),
- MI->getOperand(1).getImm(), &MI->getOperand(2));
+ return SystemZII::Branch(SystemZII::BranchNormal, MI.getOperand(0).getImm(),
+ MI.getOperand(1).getImm(), &MI.getOperand(2));
case SystemZ::BRCT:
return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP,
- SystemZ::CCMASK_CMP_NE, &MI->getOperand(2));
+ SystemZ::CCMASK_CMP_NE, &MI.getOperand(2));
case SystemZ::BRCTG:
return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP,
- SystemZ::CCMASK_CMP_NE, &MI->getOperand(2));
+ SystemZ::CCMASK_CMP_NE, &MI.getOperand(2));
case SystemZ::CIJ:
case SystemZ::CRJ:
return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP,
- MI->getOperand(2).getImm(), &MI->getOperand(3));
+ MI.getOperand(2).getImm(), &MI.getOperand(3));
case SystemZ::CLIJ:
case SystemZ::CLRJ:
return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP,
- MI->getOperand(2).getImm(), &MI->getOperand(3));
+ MI.getOperand(2).getImm(), &MI.getOperand(3));
case SystemZ::CGIJ:
case SystemZ::CGRJ:
return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP,
- MI->getOperand(2).getImm(), &MI->getOperand(3));
+ MI.getOperand(2).getImm(), &MI.getOperand(3));
case SystemZ::CLGIJ:
case SystemZ::CLGRJ:
return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP,
- MI->getOperand(2).getImm(), &MI->getOperand(3));
+ MI.getOperand(2).getImm(), &MI.getOperand(3));
default:
llvm_unreachable("Unrecognized branch opcode");
@@ -1250,28 +1390,107 @@ bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize,
return false;
}
-unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode,
- const MachineInstr *MI) const {
+unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
+ SystemZII::FusedCompareType Type,
+ const MachineInstr *MI) const {
switch (Opcode) {
- case SystemZ::CR:
- return SystemZ::CRJ;
- case SystemZ::CGR:
- return SystemZ::CGRJ;
case SystemZ::CHI:
- return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CIJ : 0;
case SystemZ::CGHI:
- return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CGIJ : 0;
- case SystemZ::CLR:
- return SystemZ::CLRJ;
- case SystemZ::CLGR:
- return SystemZ::CLGRJ;
+ if (!(MI && isInt<8>(MI->getOperand(1).getImm())))
+ return 0;
+ break;
case SystemZ::CLFI:
- return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLIJ : 0;
case SystemZ::CLGFI:
- return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLGIJ : 0;
- default:
- return 0;
+ if (!(MI && isUInt<8>(MI->getOperand(1).getImm())))
+ return 0;
}
+ switch (Type) {
+ case SystemZII::CompareAndBranch:
+ switch (Opcode) {
+ case SystemZ::CR:
+ return SystemZ::CRJ;
+ case SystemZ::CGR:
+ return SystemZ::CGRJ;
+ case SystemZ::CHI:
+ return SystemZ::CIJ;
+ case SystemZ::CGHI:
+ return SystemZ::CGIJ;
+ case SystemZ::CLR:
+ return SystemZ::CLRJ;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRJ;
+ case SystemZ::CLFI:
+ return SystemZ::CLIJ;
+ case SystemZ::CLGFI:
+ return SystemZ::CLGIJ;
+ default:
+ return 0;
+ }
+ case SystemZII::CompareAndReturn:
+ switch (Opcode) {
+ case SystemZ::CR:
+ return SystemZ::CRBReturn;
+ case SystemZ::CGR:
+ return SystemZ::CGRBReturn;
+ case SystemZ::CHI:
+ return SystemZ::CIBReturn;
+ case SystemZ::CGHI:
+ return SystemZ::CGIBReturn;
+ case SystemZ::CLR:
+ return SystemZ::CLRBReturn;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRBReturn;
+ case SystemZ::CLFI:
+ return SystemZ::CLIBReturn;
+ case SystemZ::CLGFI:
+ return SystemZ::CLGIBReturn;
+ default:
+ return 0;
+ }
+ case SystemZII::CompareAndSibcall:
+ switch (Opcode) {
+ case SystemZ::CR:
+ return SystemZ::CRBCall;
+ case SystemZ::CGR:
+ return SystemZ::CGRBCall;
+ case SystemZ::CHI:
+ return SystemZ::CIBCall;
+ case SystemZ::CGHI:
+ return SystemZ::CGIBCall;
+ case SystemZ::CLR:
+ return SystemZ::CLRBCall;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRBCall;
+ case SystemZ::CLFI:
+ return SystemZ::CLIBCall;
+ case SystemZ::CLGFI:
+ return SystemZ::CLGIBCall;
+ default:
+ return 0;
+ }
+ case SystemZII::CompareAndTrap:
+ switch (Opcode) {
+ case SystemZ::CR:
+ return SystemZ::CRT;
+ case SystemZ::CGR:
+ return SystemZ::CGRT;
+ case SystemZ::CHI:
+ return SystemZ::CIT;
+ case SystemZ::CGHI:
+ return SystemZ::CGIT;
+ case SystemZ::CLR:
+ return SystemZ::CLRT;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRT;
+ case SystemZ::CLFI:
+ return SystemZ::CLFIT;
+ case SystemZ::CLGFI:
+ return SystemZ::CLGIT;
+ default:
+ return 0;
+ }
+ }
+ return 0;
}
void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index d9094ba93658..010010b89dc8 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -111,6 +111,22 @@ struct Branch {
const MachineOperand *target)
: Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {}
};
+// Kinds of fused compares in compare-and-* instructions. Together with type
+// of the converted compare, this identifies the compare-and-*
+// instruction.
+enum FusedCompareType {
+ // Relative branch - CRJ etc.
+ CompareAndBranch,
+
+ // Indirect branch, used for return - CRBReturn etc.
+ CompareAndReturn,
+
+ // Indirect branch, used for sibcall - CRBCall etc.
+ CompareAndSibcall,
+
+ // Trap
+ CompareAndTrap
+};
} // end namespace SystemZII
class SystemZSubtarget;
@@ -120,16 +136,17 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
- void expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
- unsigned HighOpcode, bool ConvertHigh) const;
- void expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
+ void expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode,
+ bool ConvertHigh) const;
+ void expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned LowOpcodeK, unsigned HighOpcode) const;
- void expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
+ void expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const;
- void expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
+ void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const;
+ void expandLoadStackGuard(MachineInstr *MI) const;
void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
virtual void anchor();
@@ -137,26 +154,26 @@ public:
explicit SystemZInstrInfo(SystemZSubtarget &STI);
// Override TargetInstrInfo.
- unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- unsigned isStoreToStackSlot(const MachineInstr *MI,
+ unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- bool isStackSlotCopy(const MachineInstr *MI, int &DestFrameIndex,
+ bool isStackSlotCopy(const MachineInstr &MI, int &DestFrameIndex,
int &SrcFrameIndex) const override;
- bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
- DebugLoc DL) const override;
- bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ const DebugLoc &DL) const override;
+ bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
unsigned &SrcReg2, int &Mask, int &Value) const override;
- bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
unsigned SrcReg2, int Mask, int Value,
const MachineRegisterInfo *MRI) const override;
- bool isPredicable(MachineInstr *MI) const override;
+ bool isPredicable(MachineInstr &MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
BranchProbability Probability) const override;
@@ -165,10 +182,12 @@ public:
MachineBasicBlock &FMBB,
unsigned NumCyclesF, unsigned ExtraPredCyclesF,
BranchProbability Probability) const override;
- bool PredicateInstruction(MachineInstr *MI,
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const override;
+ bool PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -181,17 +200,18 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineBasicBlock::iterator &MBBI,
+ MachineInstr &MI,
LiveVariables *LV) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt,
- int FrameIndex) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt,
- MachineInstr *LoadMI) const override;
- bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override;
+ MachineInstr *
+ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
+ ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex,
+ LiveIntervals *LIS = nullptr) const override;
+ MachineInstr *foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+ LiveIntervals *LIS = nullptr) const override;
+ bool expandPostRAPseudo(MachineInstr &MBBI) const override;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
override;
@@ -199,14 +219,14 @@ public:
const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
// Return the size in bytes of MI.
- uint64_t getInstSizeInBytes(const MachineInstr *MI) const;
+ uint64_t getInstSizeInBytes(const MachineInstr &MI) const;
// Return true if MI is a conditional or unconditional branch.
// When returning true, set Cond to the mask of condition-code
// values on which the instruction will branch, and set Target
// to the operand that contains the branch target. This target
// can be a register or a basic block.
- SystemZII::Branch getBranchInfo(const MachineInstr *MI) const;
+ SystemZII::Branch getBranchInfo(const MachineInstr &MI) const;
// Get the load and store opcodes for a given register class.
void getLoadStoreOpcodes(const TargetRegisterClass *RC,
@@ -229,11 +249,12 @@ public:
bool isRxSBGMask(uint64_t Mask, unsigned BitSize,
unsigned &Start, unsigned &End) const;
- // If Opcode is a COMPARE opcode for which an associated COMPARE AND
- // BRANCH exists, return the opcode for the latter, otherwise return 0.
+ // If Opcode is a COMPARE opcode for which an associated fused COMPARE AND *
+ // operation exists, return the opcode for the latter, otherwise return 0.
// MI, if nonnull, is the compare instruction.
- unsigned getCompareAndBranch(unsigned Opcode,
- const MachineInstr *MI = nullptr) const;
+ unsigned getFusedCompare(unsigned Opcode,
+ SystemZII::FusedCompareType Type,
+ const MachineInstr *MI = nullptr) const;
// Emit code before MBBI in MI to move immediate value Value into
// physical register Reg.
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index d5dabc2cd6ab..c510ca774be3 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -36,6 +36,22 @@ let hasSideEffects = 0 in {
let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
+// A conditional return instruction (bcr <cond>, %r14).
+let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in
+ def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+
+// Fused compare and conditional returns.
+let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def CRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+ def CGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+ def CIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>;
+ def CGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>;
+ def CLRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+ def CLGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+ def CLIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>;
+ def CLGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
+}
+
// Unconditional branches. R1 is the condition-code mask (all 1s).
let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
let isIndirectBranch = 1 in
@@ -51,6 +67,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg\t$I2", []>;
}
+// FIXME: This trap instruction should be marked as isTerminator, but there is
+// currently a general bug that allows non-terminators to be placed between
+// terminators. Temporarily leave this unmarked until the bug is fixed.
+let isBarrier = 1, hasCtrlDep = 1 in {
+ def Trap : Alias<4, (outs), (ins), [(trap)]>;
+}
+
+let isTerminator = 1, hasCtrlDep = 1, Uses = [CC] in {
+ def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>;
+}
+
// Conditional branches. It's easier for LLVM to handle these branches
// in their raw BRC/BRCL form, with the 4-bit condition-code mask being
// the first operand. It seems friendlier to use mnemonic forms like
@@ -62,15 +89,25 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in {
[(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>;
def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1,
brtarget32:$I2), "jg$R1\t$I2", []>;
+ let isIndirectBranch = 1 in
+ def BCR : InstRR<0x07, (outs), (ins cond4:$valid, cond4:$R1, GR64:$R2),
+ "b${R1}r\t$R2", []>;
}
def AsmBRC : InstRI<0xA74, (outs), (ins imm32zx4:$R1, brtarget16:$I2),
"brc\t$R1, $I2", []>;
def AsmBRCL : InstRIL<0xC04, (outs), (ins imm32zx4:$R1, brtarget32:$I2),
"brcl\t$R1, $I2", []>;
- def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2),
- "bcr\t$R1, $R2", []>;
+ let isIndirectBranch = 1 in {
+ def AsmBC : InstRX<0x47, (outs), (ins imm32zx4:$R1, bdxaddr12only:$XBD2),
+ "bc\t$R1, $XBD2", []>;
+ def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2),
+ "bcr\t$R1, $R2", []>;
+ }
}
+def AsmNop : InstAlias<"nop\t$XBD", (AsmBC 0, bdxaddr12only:$XBD), 0>;
+def AsmNopR : InstAlias<"nopr\t$R", (AsmBCR 0, GR64:$R), 0>;
+
// Fused compare-and-branch instructions. As for normal branches,
// we handle these instructions internally in their raw CRJ-like form,
// but use assembly macros like CRJE when writing them out.
@@ -83,38 +120,83 @@ multiclass CompareBranches<Operand ccmask, string pos1, string pos2> {
let isBranch = 1, isTerminator = 1, Defs = [CC] in {
def RJ : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
brtarget16:$RI4),
- "crj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ "crj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
def GRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
brtarget16:$RI4),
- "cgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ "cgrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
def IJ : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3,
brtarget16:$RI4),
- "cij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ "cij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
def GIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3,
brtarget16:$RI4),
- "cgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ "cgij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
def LRJ : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
brtarget16:$RI4),
- "clrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ "clrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
def LGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
brtarget16:$RI4),
- "clgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ "clgrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
def LIJ : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3,
brtarget16:$RI4),
- "clij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ "clij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
def LGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3,
brtarget16:$RI4),
- "clgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ "clgij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
+ let isIndirectBranch = 1 in {
+ def RB : InstRRS<0xECF6, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
+ bdaddr12only:$BD4),
+ "crb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
+ def GRB : InstRRS<0xECE4, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
+ bdaddr12only:$BD4),
+ "cgrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
+ def IB : InstRIS<0xECFE, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3,
+ bdaddr12only:$BD4),
+ "cib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
+ def GIB : InstRIS<0xECFC, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3,
+ bdaddr12only:$BD4),
+ "cgib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
+ def LRB : InstRRS<0xECF7, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
+ bdaddr12only:$BD4),
+ "clrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
+ def LGRB : InstRRS<0xECE5, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
+ bdaddr12only:$BD4),
+ "clgrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
+ def LIB : InstRIS<0xECFF, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3,
+ bdaddr12only:$BD4),
+ "clib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
+ def LGIB : InstRIS<0xECFD, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3,
+ bdaddr12only:$BD4),
+ "clgib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
+ }
+ }
+
+ let isTerminator = 1, hasCtrlDep = 1 in {
+ def RT : InstRRFc<0xB972, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3),
+ "crt"##pos1##"\t$R1, $R2"##pos2, []>;
+ def GRT : InstRRFc<0xB960, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3),
+ "cgrt"##pos1##"\t$R1, $R2"##pos2, []>;
+ def LRT : InstRRFc<0xB973, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3),
+ "clrt"##pos1##"\t$R1, $R2"##pos2, []>;
+ def LGRT : InstRRFc<0xB961, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3),
+ "clgrt"##pos1##"\t$R1, $R2"##pos2, []>;
+ def IT : InstRIEa<0xEC72, (outs), (ins GR32:$R1, imm32sx16:$I2, ccmask:$M3),
+ "cit"##pos1##"\t$R1, $I2"##pos2, []>;
+ def GIT : InstRIEa<0xEC70, (outs), (ins GR64:$R1, imm32sx16:$I2, ccmask:$M3),
+ "cgit"##pos1##"\t$R1, $I2"##pos2, []>;
+ def LFIT : InstRIEa<0xEC73, (outs), (ins GR32:$R1, imm32zx16:$I2, ccmask:$M3),
+ "clfit"##pos1##"\t$R1, $I2"##pos2, []>;
+ def LGIT : InstRIEa<0xEC71, (outs), (ins GR64:$R1, imm32zx16:$I2, ccmask:$M3),
+ "clgit"##pos1##"\t$R1, $I2"##pos2, []>;
}
}
let isCodeGenOnly = 1 in
defm C : CompareBranches<cond4, "$M3", "">;
-defm AsmC : CompareBranches<imm32zx4, "", "$M3, ">;
+defm AsmC : CompareBranches<imm32zx4, "", ", $M3">;
// Define AsmParser mnemonics for each general condition-code mask
// (integer or floating-point)
-multiclass CondExtendedMnemonic<bits<4> ccmask, string name> {
- let R1 = ccmask in {
+multiclass CondExtendedMnemonicA<bits<4> ccmask, string name> {
+ let isBranch = 1, isTerminator = 1, R1 = ccmask in {
def J : InstRI<0xA74, (outs), (ins brtarget16:$I2),
"j"##name##"\t$I2", []>;
def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2),
@@ -123,25 +205,36 @@ multiclass CondExtendedMnemonic<bits<4> ccmask, string name> {
}
def LOCR : FixedCondUnaryRRF<"locr"##name, 0xB9F2, GR32, GR32, ccmask>;
def LOCGR : FixedCondUnaryRRF<"locgr"##name, 0xB9E2, GR64, GR64, ccmask>;
+ def LOCHI : FixedCondUnaryRIE<"lochi"##name, 0xEC42, GR64, imm32sx16,
+ ccmask>;
+ def LOCGHI: FixedCondUnaryRIE<"locghi"##name, 0xEC46, GR64, imm64sx16,
+ ccmask>;
def LOC : FixedCondUnaryRSY<"loc"##name, 0xEBF2, GR32, ccmask, 4>;
def LOCG : FixedCondUnaryRSY<"locg"##name, 0xEBE2, GR64, ccmask, 8>;
def STOC : FixedCondStoreRSY<"stoc"##name, 0xEBF3, GR32, ccmask, 4>;
def STOCG : FixedCondStoreRSY<"stocg"##name, 0xEBE3, GR64, ccmask, 8>;
}
-defm AsmO : CondExtendedMnemonic<1, "o">;
-defm AsmH : CondExtendedMnemonic<2, "h">;
-defm AsmNLE : CondExtendedMnemonic<3, "nle">;
-defm AsmL : CondExtendedMnemonic<4, "l">;
-defm AsmNHE : CondExtendedMnemonic<5, "nhe">;
-defm AsmLH : CondExtendedMnemonic<6, "lh">;
-defm AsmNE : CondExtendedMnemonic<7, "ne">;
-defm AsmE : CondExtendedMnemonic<8, "e">;
-defm AsmNLH : CondExtendedMnemonic<9, "nlh">;
-defm AsmHE : CondExtendedMnemonic<10, "he">;
-defm AsmNL : CondExtendedMnemonic<11, "nl">;
-defm AsmLE : CondExtendedMnemonic<12, "le">;
-defm AsmNH : CondExtendedMnemonic<13, "nh">;
-defm AsmNO : CondExtendedMnemonic<14, "no">;
+
+multiclass CondExtendedMnemonic<bits<4> ccmask, string name1, string name2>
+ : CondExtendedMnemonicA<ccmask, name1> {
+ let isAsmParserOnly = 1 in
+ defm Alt : CondExtendedMnemonicA<ccmask, name2>;
+}
+
+defm AsmO : CondExtendedMnemonicA<1, "o">;
+defm AsmH : CondExtendedMnemonic<2, "h", "p">;
+defm AsmNLE : CondExtendedMnemonicA<3, "nle">;
+defm AsmL : CondExtendedMnemonic<4, "l", "m">;
+defm AsmNHE : CondExtendedMnemonicA<5, "nhe">;
+defm AsmLH : CondExtendedMnemonicA<6, "lh">;
+defm AsmNE : CondExtendedMnemonic<7, "ne", "nz">;
+defm AsmE : CondExtendedMnemonic<8, "e", "z">;
+defm AsmNLH : CondExtendedMnemonicA<9, "nlh">;
+defm AsmHE : CondExtendedMnemonicA<10, "he">;
+defm AsmNL : CondExtendedMnemonic<11, "nl", "nm">;
+defm AsmLE : CondExtendedMnemonicA<12, "le">;
+defm AsmNH : CondExtendedMnemonic<13, "nh", "np">;
+defm AsmNO : CondExtendedMnemonicA<14, "no">;
// Define AsmParser mnemonics for each integer condition-code mask.
// This is like the list above, except that condition 3 is not possible
@@ -151,31 +244,76 @@ defm AsmNO : CondExtendedMnemonic<14, "no">;
// We don't make one of the two names an alias of the other because
// we need the custom parsing routines to select the correct register class.
multiclass IntCondExtendedMnemonicA<bits<4> ccmask, string name> {
- let M3 = ccmask in {
- def CR : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2,
- brtarget16:$RI4),
- "crj"##name##"\t$R1, $R2, $RI4", []>;
- def CGR : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2,
- brtarget16:$RI4),
- "cgrj"##name##"\t$R1, $R2, $RI4", []>;
- def CI : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2,
- brtarget16:$RI4),
- "cij"##name##"\t$R1, $I2, $RI4", []>;
- def CGI : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2,
- brtarget16:$RI4),
- "cgij"##name##"\t$R1, $I2, $RI4", []>;
- def CLR : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2,
- brtarget16:$RI4),
- "clrj"##name##"\t$R1, $R2, $RI4", []>;
- def CLGR : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2,
+ let isBranch = 1, isTerminator = 1, M3 = ccmask in {
+ def CRJ : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2,
+ brtarget16:$RI4),
+ "crj"##name##"\t$R1, $R2, $RI4", []>;
+ def CGRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2,
+ brtarget16:$RI4),
+ "cgrj"##name##"\t$R1, $R2, $RI4", []>;
+ def CIJ : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2,
brtarget16:$RI4),
- "clgrj"##name##"\t$R1, $R2, $RI4", []>;
- def CLI : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2,
+ "cij"##name##"\t$R1, $I2, $RI4", []>;
+ def CGIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2,
brtarget16:$RI4),
- "clij"##name##"\t$R1, $I2, $RI4", []>;
- def CLGI : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2,
+ "cgij"##name##"\t$R1, $I2, $RI4", []>;
+ def CLRJ : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2,
brtarget16:$RI4),
- "clgij"##name##"\t$R1, $I2, $RI4", []>;
+ "clrj"##name##"\t$R1, $R2, $RI4", []>;
+ def CLGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2,
+ brtarget16:$RI4),
+ "clgrj"##name##"\t$R1, $R2, $RI4", []>;
+ def CLIJ : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2,
+ brtarget16:$RI4),
+ "clij"##name##"\t$R1, $I2, $RI4", []>;
+ def CLGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2,
+ brtarget16:$RI4),
+ "clgij"##name##"\t$R1, $I2, $RI4", []>;
+ let isIndirectBranch = 1 in {
+ def CRB : InstRRS<0xECF6, (outs), (ins GR32:$R1, GR32:$R2,
+ bdaddr12only:$BD4),
+ "crb"##name##"\t$R1, $R2, $BD4", []>;
+ def CGRB : InstRRS<0xECE4, (outs), (ins GR64:$R1, GR64:$R2,
+ bdaddr12only:$BD4),
+ "cgrb"##name##"\t$R1, $R2, $BD4", []>;
+ def CIB : InstRIS<0xECFE, (outs), (ins GR32:$R1, imm32sx8:$I2,
+ bdaddr12only:$BD4),
+ "cib"##name##"\t$R1, $I2, $BD4", []>;
+ def CGIB : InstRIS<0xECFC, (outs), (ins GR64:$R1, imm64sx8:$I2,
+ bdaddr12only:$BD4),
+ "cgib"##name##"\t$R1, $I2, $BD4", []>;
+ def CLRB : InstRRS<0xECF7, (outs), (ins GR32:$R1, GR32:$R2,
+ bdaddr12only:$BD4),
+ "clrb"##name##"\t$R1, $R2, $BD4", []>;
+ def CLGRB : InstRRS<0xECE5, (outs), (ins GR64:$R1, GR64:$R2,
+ bdaddr12only:$BD4),
+ "clgrb"##name##"\t$R1, $R2, $BD4", []>;
+ def CLIB : InstRIS<0xECFF, (outs), (ins GR32:$R1, imm32zx8:$I2,
+ bdaddr12only:$BD4),
+ "clib"##name##"\t$R1, $I2, $BD4", []>;
+ def CLGIB : InstRIS<0xECFD, (outs), (ins GR64:$R1, imm64zx8:$I2,
+ bdaddr12only:$BD4),
+ "clgib"##name##"\t$R1, $I2, $BD4", []>;
+ }
+ }
+
+ let hasCtrlDep = 1, isTerminator = 1, M3 = ccmask in {
+ def CRT : InstRRFc<0xB972, (outs), (ins GR32:$R1, GR32:$R2),
+ "crt"##name##"\t$R1, $R2", []>;
+ def CGRT : InstRRFc<0xB960, (outs), (ins GR64:$R1, GR64:$R2),
+ "cgrt"##name##"\t$R1, $R2", []>;
+ def CLRT : InstRRFc<0xB973, (outs), (ins GR32:$R1, GR32:$R2),
+ "clrt"##name##"\t$R1, $R2", []>;
+ def CLGRT : InstRRFc<0xB961, (outs), (ins GR64:$R1, GR64:$R2),
+ "clgrt"##name##"\t$R1, $R2", []>;
+ def CIT : InstRIEa<0xEC72, (outs), (ins GR32:$R1, imm32sx16:$I2),
+ "cit"##name##"\t$R1, $I2", []>;
+ def CGIT : InstRIEa<0xEC70, (outs), (ins GR64:$R1, imm32sx16:$I2),
+ "cgit"##name##"\t$R1, $I2", []>;
+ def CLFIT : InstRIEa<0xEC73, (outs), (ins GR32:$R1, imm32zx16:$I2),
+ "clfit"##name##"\t$R1, $I2", []>;
+ def CLGIT : InstRIEa<0xEC71, (outs), (ins GR64:$R1, imm32zx16:$I2),
+ "clgit"##name##"\t$R1, $I2", []>;
}
}
multiclass IntCondExtendedMnemonic<bits<4> ccmask, string name1, string name2>
@@ -249,6 +387,26 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
}
+let CCMaskFirst = 1, isCall = 1, isTerminator = 1, isReturn = 1 in {
+ def CallBRCL : Alias<6, (outs), (ins cond4:$valid, cond4:$R1,
+ pcrel32:$I2), []>;
+
+ let Uses = [R1D] in
+ def CallBCR : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+}
+
+// Fused compare and conditional sibling calls.
+let isCall = 1, isTerminator = 1, isReturn = 1, Uses = [R1D] in {
+ def CRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+ def CGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+ def CIBCall : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>;
+ def CGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>;
+ def CLRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+ def CLGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+ def CLIBCall : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>;
+ def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
+}
+
// TLS calls. These will be lowered into a call to __tls_get_offset,
// with an extra relocation specifying the TLS symbol.
let isCall = 1, Defs = [R14D, CC] in {
@@ -261,12 +419,14 @@ let isCall = 1, Defs = [R14D, CC] in {
// Define the general form of the call instructions for the asm parser.
// These instructions don't hard-code %r14 as the return address register.
// Allow an optional TLS marker symbol to generate TLS call relocations.
-def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2),
- "bras\t$R1, $I2", []>;
-def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2),
- "brasl\t$R1, $I2", []>;
-def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
- "basr\t$R1, $R2", []>;
+let isCall = 1, Defs = [CC] in {
+ def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2),
+ "bras\t$R1, $I2", []>;
+ def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2),
+ "brasl\t$R1, $I2", []>;
+ def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
+ "basr\t$R1, $R2", []>;
+}
//===----------------------------------------------------------------------===//
// Move instructions
@@ -294,6 +454,14 @@ let Uses = [CC] in {
def AsmLOCR : AsmCondUnaryRRF<"loc", 0xB9F2, GR32, GR32>;
def AsmLOCGR : AsmCondUnaryRRF<"locg", 0xB9E2, GR64, GR64>;
}
+let isCodeGenOnly = 1, Uses = [CC] in {
+ def LOCHI : CondUnaryRIE<"lochi", 0xEC42, GR32, imm32sx16>;
+ def LOCGHI : CondUnaryRIE<"locghi", 0xEC46, GR64, imm64sx16>;
+}
+let Uses = [CC] in {
+ def AsmLOCHI : AsmCondUnaryRIE<"lochi", 0xEC42, GR32, imm32sx16>;
+ def AsmLOCGHI : AsmCondUnaryRIE<"locghi", 0xEC46, GR64, imm64sx16>;
+}
// Immediate moves.
let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
@@ -546,10 +714,14 @@ def : StoreGR64PC<STRL, aligned_truncstorei32>;
//===----------------------------------------------------------------------===//
// Multi-register loads.
+defm LM : LoadMultipleRSPair<"lm", 0x98, 0xEB98, GR32>;
def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>;
+def LMH : LoadMultipleRSY<"lmh", 0xEB96, GRH32>;
// Multi-register stores.
+defm STM : StoreMultipleRSPair<"stm", 0x90, 0xEB90, GR32>;
def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>;
+def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>;
//===----------------------------------------------------------------------===//
// Byte swaps
@@ -563,13 +735,14 @@ let hasSideEffects = 0 in {
// Byte-swapping loads. Unlike normal loads, these instructions are
// allowed to access storage more than once.
-def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap, nonvolatile_load>, GR32, 4>;
-def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64, 8>;
+def LRVH : UnaryRXY<"lrvh", 0xE31F, z_lrvh, GR32, 2>;
+def LRV : UnaryRXY<"lrv", 0xE31E, z_lrv, GR32, 4>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, z_lrvg, GR64, 8>;
// Likewise byte-swapping stores.
-def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32, 4>;
-def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>,
- GR64, 8>;
+def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>;
+def STRV : StoreRXY<"strv", 0xE33E, z_strv, GR32, 4>;
+def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>;
//===----------------------------------------------------------------------===//
// Load address instructions
@@ -657,6 +830,11 @@ defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>;
defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>;
defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>;
+let Defs = [CC] in {
+ defm ICM : TernaryRSPair<"icm", 0xBF, 0xEB81, GR32, 0>;
+ def ICMH : TernaryRSY<"icmh", 0xEB80, GRH32, 0>;
+}
+
// Insertions of a 16-bit immediate, leaving other bits unaffected.
// We don't have or_as_insert equivalents of these operations because
// OI is available instead.
@@ -812,7 +990,7 @@ defm : ZXB<subc, GR64, SLGFR>;
let Defs = [CC], Uses = [CC] in {
// Subtraction of a register.
def SLBR : BinaryRRE<"slb", 0xB999, sube, GR32, GR32>;
- def SLGBR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>;
+ def SLBGR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>;
// Subtraction of memory.
def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load, 4>;
@@ -865,7 +1043,7 @@ let Defs = [CC] in {
// ANDs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
- def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+ def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
}
// AND to memory
@@ -1030,6 +1208,7 @@ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
// Shift left.
let hasSideEffects = 0 in {
defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
+ defm SLA : BinaryRSAndK<"sla", 0x8B, 0xEBDD, null_frag, GR32>;
def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
}
@@ -1208,6 +1387,9 @@ let Defs = [CC] in {
defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>;
}
+def TML : InstAlias<"tml\t$R, $I", (TMLL GR32:$R, imm32ll16:$I), 0>;
+def TMH : InstAlias<"tmh\t$R, $I", (TMLH GR32:$R, imm32lh16:$I), 0>;
+
//===----------------------------------------------------------------------===//
// Prefetch
//===----------------------------------------------------------------------===//
@@ -1224,6 +1406,10 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
let hasSideEffects = 1 in
def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
+// A pseudo instruction that serves as a compiler barrier.
+let hasSideEffects = 1 in
+def MemBarrier : Pseudo<(outs), (ins), [(z_membarrier)]>;
+
let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
def LAA : LoadAndOpRSY<"laa", 0xEBF8, atomic_load_add_32, GR32>;
def LAAG : LoadAndOpRSY<"laag", 0xEBE8, atomic_load_add_64, GR64>;
@@ -1466,6 +1652,10 @@ let mayLoad = 1, Defs = [CC] in
defm SRST : StringRRE<"srst", 0xb25e, z_search_string>;
// Other instructions for inline assembly
+let hasSideEffects = 1, Defs = [CC], isCall = 1 in
+ def SVC : InstI<0x0A, (outs), (ins imm32zx8:$I1),
+ "svc\t$I1",
+ []>;
let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
def STCK : InstS<0xB205, (outs), (ins bdaddr12only:$BD2),
"stck\t$BD2",
@@ -1483,6 +1673,12 @@ let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
"stfle\t$BD2",
[]>;
+let hasSideEffects = 1 in {
+ def EX : InstRX<0x44, (outs), (ins GR64:$R1, bdxaddr12only:$XBD2),
+ "ex\t$R1, $XBD2", []>;
+ def EXRL : InstRIL<0xC60, (outs), (ins GR64:$R1, pcrel32:$I2),
+ "exrl\t$R1, $I2", []>;
+}
//===----------------------------------------------------------------------===//
@@ -1515,6 +1711,42 @@ def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid,
(i32 63)),
(Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>;
+// Avoid generating 2 XOR instructions. (xor (and x, y), y) is
+// equivalent to (and (xor x, -1), y)
+def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
+ (XGR GR64:$y, (NGR GR64:$y, GR64:$x))>;
+
+// Shift/rotate instructions only use the last 6 bits of the second operand
+// register, so we can safely use NILL (16 fewer bits than NILF) to only AND the
+// last 16 bits.
+// Complexity is added so that we match this before we match NILF on the AND
+// operation alone.
+let AddedComplexity = 4 in {
+ def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+}
+
// Peepholes for turning scalar operations into block operations.
defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
XCSequence, 1>;
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
index 24165be29ae7..2cdf2f9bf990 100644
--- a/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -64,6 +64,9 @@ void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) {
+ if (skipFunction(*F.getFunction()))
+ return false;
+
TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
MF = &F;
@@ -92,9 +95,9 @@ bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node,
switch (I->getOpcode()) {
case SystemZ::TLS_LDCALL:
if (TLSBaseAddrReg)
- I = ReplaceTLSCall(I, TLSBaseAddrReg);
+ I = ReplaceTLSCall(&*I, TLSBaseAddrReg);
else
- I = SetRegister(I, &TLSBaseAddrReg);
+ I = SetRegister(&*I, &TLSBaseAddrReg);
Changed = true;
break;
default:
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 8dab44e7f8af..a24d47d2d16b 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -58,7 +58,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -139,12 +138,16 @@ public:
}
bool runOnMachineFunction(MachineFunction &F) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
private:
void skipNonTerminators(BlockPosition &Position, MBBInfo &Block);
void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
bool AssumeRelaxed);
- TerminatorInfo describeTerminator(MachineInstr *MI);
+ TerminatorInfo describeTerminator(MachineInstr &MI);
uint64_t initMBBInfo();
bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
bool mustRelaxABranch();
@@ -207,11 +210,11 @@ void SystemZLongBranch::skipTerminator(BlockPosition &Position,
}
// Return a description of terminator instruction MI.
-TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
+TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) {
TerminatorInfo Terminator;
Terminator.Size = TII->getInstSizeInBytes(MI);
- if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) {
- switch (MI->getOpcode()) {
+ if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) {
+ switch (MI.getOpcode()) {
case SystemZ::J:
// Relaxes to JG, which is 2 bytes longer.
Terminator.ExtraRelaxSize = 2;
@@ -248,7 +251,7 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
default:
llvm_unreachable("Unrecognized branch instruction");
}
- Terminator.Branch = MI;
+ Terminator.Branch = &MI;
Terminator.TargetBlock =
TII->getBranchInfo(MI).Target->getMBB()->getNumber();
}
@@ -280,7 +283,7 @@ uint64_t SystemZLongBranch::initMBBInfo() {
MachineBasicBlock::iterator MI = MBB->begin();
MachineBasicBlock::iterator End = MBB->end();
while (MI != End && !MI->isTerminator()) {
- Block.Size += TII->getInstSizeInBytes(MI);
+ Block.Size += TII->getInstSizeInBytes(*MI);
++MI;
}
skipNonTerminators(Position, Block);
@@ -289,7 +292,7 @@ uint64_t SystemZLongBranch::initMBBInfo() {
while (MI != End) {
if (!MI->isDebugValue()) {
assert(MI->isTerminator() && "Terminator followed by non-terminator");
- Terminators.push_back(describeTerminator(MI));
+ Terminators.push_back(describeTerminator(*MI));
skipTerminator(Position, Terminators.back(), false);
++Block.NumTerminators;
}
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index f4a517bd54df..4f64f4c65f1d 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -22,14 +22,15 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
unsigned VarArgsFirstFPR;
unsigned VarArgsFrameIndex;
unsigned RegSaveFrameIndex;
+ int FramePointerSaveIndex;
bool ManipulatesSP;
unsigned NumLocalDynamics;
public:
explicit SystemZMachineFunctionInfo(MachineFunction &MF)
: LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
- VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false),
- NumLocalDynamics(0) {}
+ VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0),
+ ManipulatesSP(false), NumLocalDynamics(0) {}
// Get and set the first call-saved GPR that should be saved and restored
// by this function. This is 0 if no GPRs need to be saved or restored.
@@ -59,6 +60,10 @@ public:
unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; }
+ // Get and set the frame index of where the old frame pointer is stored.
+ int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+ void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
// Get and set whether the function directly manipulates the stack pointer,
// e.g. through STACKSAVE or STACKRESTORE.
bool getManipulatesSP() const { return ManipulatesSP; }
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 9af90d492cf8..17b076d88a34 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -451,11 +451,11 @@ def PCRelTLS32 : PCRelTLSAsmOperand<"32">;
// and multiplied by 2.
def brtarget16 : PCRelOperand<OtherVT, PCRel16> {
let EncoderMethod = "getPC16DBLEncoding";
- let DecoderMethod = "decodePC16DBLOperand";
+ let DecoderMethod = "decodePC16DBLBranchOperand";
}
def brtarget32 : PCRelOperand<OtherVT, PCRel32> {
let EncoderMethod = "getPC32DBLEncoding";
- let DecoderMethod = "decodePC32DBLOperand";
+ let DecoderMethod = "decodePC32DBLBranchOperand";
}
// Variants of brtarget16/32 with an optional additional TLS symbol.
@@ -464,12 +464,12 @@ def tlssym : Operand<i64> { }
def brtarget16tls : PCRelTLSOperand<OtherVT, PCRelTLS16> {
let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym);
let EncoderMethod = "getPC16DBLTLSEncoding";
- let DecoderMethod = "decodePC16DBLOperand";
+ let DecoderMethod = "decodePC16DBLBranchOperand";
}
def brtarget32tls : PCRelTLSOperand<OtherVT, PCRelTLS32> {
let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym);
let EncoderMethod = "getPC32DBLTLSEncoding";
- let DecoderMethod = "decodePC32DBLOperand";
+ let DecoderMethod = "decodePC32DBLBranchOperand";
}
// A PC-relative offset of a global value. The offset is sign-extended
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 3c95a1e11b45..8d031f1ea05d 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -79,6 +79,14 @@ def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
def SDT_ZPrefetch : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
+def SDT_ZLoadBSwap : SDTypeProfile<1, 2,
+ [SDTCisInt<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDT_ZStoreBSwap : SDTypeProfile<0, 3,
+ [SDTCisInt<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
def SDT_ZTBegin : SDTypeProfile<0, 2,
[SDTCisPtrTy<0>,
SDTCisVT<1, i32>]>;
@@ -137,6 +145,7 @@ def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisVT<4, i32>]>;
+def SDT_ZTest : SDTypeProfile<0, 2, [SDTCisVT<1, i64>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@@ -188,6 +197,15 @@ def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone,
[SDNPHasChain, SDNPMayStore]>;
+def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def z_loadbswap : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def z_storebswap : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest, [SDNPOutGlue]>;
// Defined because the index is an i32 rather than a pointer.
def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
@@ -329,6 +347,17 @@ def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
// Pattern fragments
//===----------------------------------------------------------------------===//
+def z_lrvh : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>;
+def z_lrv : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>;
+def z_lrvg : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>;
+
+def z_strvh : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr, i16)>;
+def z_strv : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr, i32)>;
+def z_strvg : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr, i64)>;
+
// Signed and unsigned comparisons.
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index 32fbe5ae9ef9..9adc0189e650 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -29,6 +29,11 @@ def FeatureLoadStoreOnCond : SystemZFeature<
"Assume that the load/store-on-condition facility is installed"
>;
+def FeatureLoadStoreOnCond2 : SystemZFeature<
+ "load-store-on-cond-2", "LoadStoreOnCond2",
+ "Assume that the load/store-on-condition facility 2 is installed"
+>;
+
def FeatureHighWord : SystemZFeature<
"high-word", "HighWord",
"Assume that the high-word facility is installed"
@@ -92,5 +97,6 @@ def : Processor<"z13", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
FeatureFPExtension, FeaturePopulationCount,
FeatureFastSerialization, FeatureInterlockedAccess1,
+ FeatureMiscellaneousExtensions,
FeatureTransactionalExecution, FeatureProcessorAssist,
- FeatureVector]>;
+ FeatureVector, FeatureLoadStoreOnCond2]>;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 6fd24e3df625..b5e5fd4bfc4f 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -24,12 +24,20 @@ SystemZRegisterInfo::SystemZRegisterInfo()
const MCPhysReg *
SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ if (MF->getSubtarget().getTargetLowering()->supportSwiftError() &&
+ MF->getFunction()->getAttributes().hasAttrSomewhere(
+ Attribute::SwiftError))
+ return CSR_SystemZ_SwiftError_SaveList;
return CSR_SystemZ_SaveList;
}
const uint32_t *
SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
+ if (MF.getSubtarget().getTargetLowering()->supportSwiftError() &&
+ MF.getFunction()->getAttributes().hasAttrSomewhere(
+ Attribute::SwiftError))
+ return CSR_SystemZ_SwiftError_RegMask;
return CSR_SystemZ_RegMask;
}
@@ -84,8 +92,14 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// accepts the offset exists.
unsigned Opcode = MI->getOpcode();
unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset);
- if (OpcodeForOffset)
+ if (OpcodeForOffset) {
+ if (OpcodeForOffset == SystemZ::LE &&
+ MF.getSubtarget<SystemZSubtarget>().hasVector()) {
+ // If LE is ok for offset, use LDE instead on z13.
+ OpcodeForOffset = SystemZ::LDE32;
+ }
MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+ }
else {
// Create an anchor point that is in range. Start at 0xffff so that
// can use LLILH to load the immediate.
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index a0db5a9c188f..e41c06c98af2 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -33,6 +33,15 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
public:
SystemZRegisterInfo();
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is currently only used by LOAD_STACK_GUARD, which requires a non-%r0
+ /// register, hence ADDR64.
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind=0) const override {
+ return &SystemZ::ADDR64BitRegClass;
+ }
+
// Override TargetRegisterInfo.h.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 178aa3817311..657482504045 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
// address Dest. Sequence is the opcode to use for straight-line code
// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
// Return the chain for the completed operation.
-static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
+static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,
unsigned Loop, SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size) {
EVT PtrVT = Src.getValueType();
@@ -46,12 +46,10 @@ static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
DAG.getConstant(Size, DL, PtrVT));
}
-SDValue SystemZSelectionDAGInfo::
-EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
- bool IsVolatile, bool AlwaysInline,
- MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) const {
+SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
if (IsVolatile)
return SDValue();
@@ -64,24 +62,21 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
// Chain, Dst, ByteVal and Size. These cases are expected to use
// MVI, MVHHI, MVHI and MVGHI respectively.
-static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
SDValue Dst, uint64_t ByteVal, uint64_t Size,
- unsigned Align,
- MachinePointerInfo DstPtrInfo) {
+ unsigned Align, MachinePointerInfo DstPtrInfo) {
uint64_t StoreVal = ByteVal;
for (unsigned I = 1; I < Size; ++I)
StoreVal |= ByteVal << (I * 8);
- return DAG.getStore(Chain, DL,
- DAG.getConstant(StoreVal, DL,
- MVT::getIntegerVT(Size * 8)),
- Dst, DstPtrInfo, false, false, Align);
+ return DAG.getStore(
+ Chain, DL, DAG.getConstant(StoreVal, DL, MVT::getIntegerVT(Size * 8)),
+ Dst, DstPtrInfo, Align);
}
-SDValue SystemZSelectionDAGInfo::
-EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Dst, SDValue Byte, SDValue Size,
- unsigned Align, bool IsVolatile,
- MachinePointerInfo DstPtrInfo) const {
+SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst,
+ SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile,
+ MachinePointerInfo DstPtrInfo) const {
EVT PtrVT = Dst.getValueType();
if (IsVolatile)
@@ -116,15 +111,14 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
} else {
// Handle one and two bytes using STC.
if (Bytes <= 2) {
- SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
- false, false, Align);
+ SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align);
if (Bytes == 1)
return Chain1;
SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(1, DL, PtrVT));
- SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2,
- DstPtrInfo.getWithOffset(1),
- false, false, 1);
+ SDValue Chain2 =
+ DAG.getStore(Chain, DL, Byte, Dst2, DstPtrInfo.getWithOffset(1),
+ /* Alignment = */ 1);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
}
}
@@ -138,8 +132,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
// Copy the byte to the first location and then use MVC to copy
// it to the rest.
- Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
- false, false, Align);
+ Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align);
SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(1, DL, PtrVT));
return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
@@ -150,7 +143,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
// deciding whether to use a loop or straight-line code.
-static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
SDValue Src1, SDValue Src2, uint64_t Size) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
EVT PtrVT = Src1.getValueType();
@@ -174,7 +167,8 @@ static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
// less than zero if CC == 1 and greater than zero if CC >= 2.
// The sequence starts with IPM, which puts CC into bits 29 and 28
// of an integer and clears bits 30 and 31.
-static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
+static SDValue addIPMSequence(const SDLoc &DL, SDValue Glue,
+ SelectionDAG &DAG) {
SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
@@ -183,11 +177,10 @@ static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
return ROTL;
}
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Src1, SDValue Src2, SDValue Size,
- MachinePointerInfo Op1PtrInfo,
- MachinePointerInfo Op2PtrInfo) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
+ SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const {
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
assert(Bytes > 0 && "Caller should have handled 0-size case");
@@ -198,10 +191,9 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
return std::make_pair(SDValue(), SDValue());
}
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Src, SDValue Char, SDValue Length,
- MachinePointerInfo SrcPtrInfo) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src,
+ SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const {
// Use SRST to find the character. End is its address on success.
EVT PtrVT = Src.getValueType();
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
@@ -226,22 +218,20 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
return std::make_pair(End, Chain);
}
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Dest, SDValue Src,
- MachinePointerInfo DestPtrInfo,
- MachinePointerInfo SrcPtrInfo, bool isStpcpy) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcpy(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest,
+ SDValue Src, MachinePointerInfo DestPtrInfo, MachinePointerInfo SrcPtrInfo,
+ bool isStpcpy) const {
SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other);
SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src,
DAG.getConstant(0, DL, MVT::i32));
return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1));
}
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Src1, SDValue Src2,
- MachinePointerInfo Op1PtrInfo,
- MachinePointerInfo Op2PtrInfo) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
+ SDValue Src2, MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const {
SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue);
SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
DAG.getConstant(0, DL, MVT::i32));
@@ -255,7 +245,8 @@ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
// and the second being the out chain.
//
// This can be used for strlen by setting Limit to 0.
-static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
+static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG,
+ const SDLoc &DL,
SDValue Chain, SDValue Src,
SDValue Limit) {
EVT PtrVT = Src.getValueType();
@@ -265,19 +256,18 @@ static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
Chain = End.getValue(1);
SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src);
return std::make_pair(Len, Chain);
-}
+}
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Src, MachinePointerInfo SrcPtrInfo) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrlen(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src,
+ MachinePointerInfo SrcPtrInfo) const {
EVT PtrVT = Src.getValueType();
return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT));
}
-std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
-EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Src, SDValue MaxLength,
- MachinePointerInfo SrcPtrInfo) const {
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrnlen(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src,
+ SDValue MaxLength, MachinePointerInfo SrcPtrInfo) const {
EVT PtrVT = Src.getValueType();
MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT);
SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength);
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 246fa3e5e656..93cd970c30c6 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -7,66 +7,64 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
+// This file defines the SystemZ subclass for SelectionDAGTargetInfo.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H
-#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
namespace llvm {
class SystemZTargetMachine;
-class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
+class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo {
public:
explicit SystemZSelectionDAGInfo() = default;
- SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool IsVolatile, bool AlwaysInline,
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool IsVolatile,
+ bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
- SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL,
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
SDValue Chain, SDValue Dst, SDValue Byte,
SDValue Size, unsigned Align, bool IsVolatile,
MachinePointerInfo DstPtrInfo) const override;
std::pair<SDValue, SDValue>
- EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
SDValue Src1, SDValue Src2, SDValue Size,
MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const override;
std::pair<SDValue, SDValue>
- EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ EmitTargetCodeForMemchr(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
SDValue Src, SDValue Char, SDValue Length,
MachinePointerInfo SrcPtrInfo) const override;
- std::pair<SDValue, SDValue>
- EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Dest, SDValue Src,
- MachinePointerInfo DestPtrInfo,
- MachinePointerInfo SrcPtrInfo,
- bool isStpcpy) const override;
+ std::pair<SDValue, SDValue> EmitTargetCodeForStrcpy(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest,
+ SDValue Src, MachinePointerInfo DestPtrInfo,
+ MachinePointerInfo SrcPtrInfo, bool isStpcpy) const override;
std::pair<SDValue, SDValue>
- EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ EmitTargetCodeForStrcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
SDValue Src1, SDValue Src2,
MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const override;
std::pair<SDValue, SDValue>
- EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ EmitTargetCodeForStrlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
SDValue Src,
MachinePointerInfo SrcPtrInfo) const override;
std::pair<SDValue, SDValue>
- EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ EmitTargetCodeForStrnlen(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
SDValue Src, SDValue MaxLength,
MachinePointerInfo SrcPtrInfo) const override;
};
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 846edd51341a..7f26a3519e50 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -35,6 +35,10 @@ public:
bool processBlock(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &F) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
private:
bool shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH);
@@ -68,18 +72,20 @@ static void tieOpsIfNeeded(MachineInstr &MI) {
// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH
// are the halfword immediate loads for the same word. Try to use one of them
-// instead of IIxF.
-bool SystemZShortenInst::shortenIIF(MachineInstr &MI,
- unsigned LLIxL, unsigned LLIxH) {
+// instead of IIxF.
+bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL,
+ unsigned LLIxH) {
unsigned Reg = MI.getOperand(0).getReg();
// The new opcode will clear the other half of the GR64 reg, so
// cancel if that is live.
- unsigned thisSubRegIdx = (SystemZ::GRH32BitRegClass.contains(Reg) ?
- SystemZ::subreg_h32 : SystemZ::subreg_l32);
- unsigned otherSubRegIdx = (thisSubRegIdx == SystemZ::subreg_l32 ?
- SystemZ::subreg_h32 : SystemZ::subreg_l32);
- unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx,
- &SystemZ::GR64BitRegClass);
+ unsigned thisSubRegIdx =
+ (SystemZ::GRH32BitRegClass.contains(Reg) ? SystemZ::subreg_h32
+ : SystemZ::subreg_l32);
+ unsigned otherSubRegIdx =
+ (thisSubRegIdx == SystemZ::subreg_l32 ? SystemZ::subreg_h32
+ : SystemZ::subreg_l32);
+ unsigned GR64BitReg =
+ TRI->getMatchingSuperReg(Reg, thisSubRegIdx, &SystemZ::GR64BitRegClass);
unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx);
if (LiveRegs.contains(OtherReg))
return false;
@@ -135,11 +141,10 @@ bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
// Calls shortenOn001 if CCLive is false. CC def operand is added in
// case of success.
-bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI,
- unsigned Opcode) {
+bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI, unsigned Opcode) {
if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) {
MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
- .addReg(SystemZ::CC, RegState::ImplicitDefine);
+ .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
return true;
}
return false;
@@ -177,7 +182,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
// Set up the set of live registers at the end of MBB (live out)
LiveRegs.clear();
- LiveRegs.addLiveOuts(&MBB);
+ LiveRegs.addLiveOuts(MBB);
// Iterate backwards through the block looking for instructions to change.
for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
@@ -264,6 +269,9 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
}
bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
+ if (skipFunction(*F.getFunction()))
+ return false;
+
const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>();
TII = ST.getInstrInfo();
TRI = ST.getRegisterInfo();
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 0b49fcdd8f78..67d5e0179fe2 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -40,21 +40,11 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasPopulationCount(false), HasFastSerialization(false),
HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
HasTransactionalExecution(false), HasProcessorAssist(false),
- HasVector(false), TargetTriple(TT),
+ HasVector(false), HasLoadStoreOnCond2(false), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
TSInfo(), FrameLowering() {}
-// Return true if GV binds locally under reloc model RM.
-static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
- // For non-PIC, all symbols bind locally.
- if (RM == Reloc::Static)
- return true;
-
- return GV->hasLocalLinkage() || !GV->hasDefaultVisibility();
-}
-
bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
- Reloc::Model RM,
CodeModel::Model CM) const {
// PC32DBL accesses require the low bit to be clear. Note that a zero
// value selects the default alignment and is therefore OK.
@@ -63,7 +53,7 @@ bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
// For the small model, all locally-binding symbols are in range.
if (CM == CodeModel::Small)
- return bindsLocally(GV, RM);
+ return TLInfo.getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
// For Medium and above, assume that the symbol is not within the 4GB range.
// Taking the address of locally-defined text would be OK, but that
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index f7eaf01cb77e..6007f6fc9c4c 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -45,6 +45,7 @@ protected:
bool HasTransactionalExecution;
bool HasProcessorAssist;
bool HasVector;
+ bool HasLoadStoreOnCond2;
private:
Triple TargetTriple;
@@ -69,7 +70,7 @@ public:
const SystemZTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
@@ -85,6 +86,9 @@ public:
// Return true if the target has the load/store-on-condition facility.
bool hasLoadStoreOnCond() const { return HasLoadStoreOnCond; }
+ // Return true if the target has the load/store-on-condition facility 2.
+ bool hasLoadStoreOnCond2() const { return HasLoadStoreOnCond2; }
+
// Return true if the target has the high-word facility.
bool hasHighWord() const { return HasHighWord; }
@@ -116,8 +120,7 @@ public:
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
- bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
- CodeModel::Model CM) const;
+ bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
};
diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp
new file mode 100644
index 000000000000..96a9ef82c125
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTDC.cpp
@@ -0,0 +1,382 @@
+//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for instructions that can be replaced by a Test Data Class
+// instruction, and replaces them when profitable.
+//
+// Roughly, the following rules are recognized:
+//
+// 1: fcmp pred X, 0 -> tdc X, mask
+// 2: fcmp pred X, +-inf -> tdc X, mask
+// 3: fcmp pred X, +-minnorm -> tdc X, mask
+// 4: tdc (fabs X), mask -> tdc X, newmask
+// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
+// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
+// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
+// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
+// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
+// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
+//
+// The pass works in 4 steps:
+//
+// 1. All fcmp and icmp instructions in a function are checked for a match
+// with rules 1-3 and 5-7. Their TDC equivalents are stored in
+// the ConvertedInsts mapping. If the operand of a fcmp instruction is
+// a fabs, it's also folded according to rule 4.
+// 2. All and/or/xor i1 instructions whose both operands have been already
+// mapped are mapped according to rules 8-10. LogicOpsWorklist is used
+// as a queue of instructions to check.
+// 3. All mapped instructions that are considered worthy of conversion (ie.
+// replacing them will actually simplify the final code) are replaced
+// with a call to the s390.tdc intrinsic.
+// 4. All intermediate results of replaced instructions are removed if unused.
+//
+// Instructions that match rules 1-3 are considered unworthy of conversion
+// on their own (since a comparison instruction is superior), but are mapped
+// in the hopes of folding the result using rules 4 and 8-10 (likely removing
+// the original comparison in the process).
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include <deque>
+#include <set>
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeSystemZTDCPassPass(PassRegistry&);
+}
+
+namespace {
+
+class SystemZTDCPass : public FunctionPass {
+public:
+ static char ID;
+ SystemZTDCPass() : FunctionPass(ID) {
+ initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+private:
+ // Maps seen instructions that can be mapped to a TDC, values are
+ // (TDC operand, TDC mask, worthy flag) triples.
+ MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
+ // The queue of and/or/xor i1 instructions to be potentially folded.
+ std::vector<BinaryOperator *> LogicOpsWorklist;
+ // Instructions matched while folding, to be removed at the end if unused.
+ std::set<Instruction *> PossibleJunk;
+
+ // Tries to convert a fcmp instruction.
+ void convertFCmp(CmpInst &I);
+
+ // Tries to convert an icmp instruction.
+ void convertICmp(CmpInst &I);
+
+ // Tries to convert an i1 and/or/xor instruction, whose both operands
+ // have been already converted.
+ void convertLogicOp(BinaryOperator &I);
+
+ // Marks an instruction as converted - adds it to ConvertedInsts and adds
+ // any and/or/xor i1 users to the queue.
+ void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
+ ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
+ auto &M = *I->getFunction()->getParent();
+ auto &Ctx = M.getContext();
+ for (auto *U : I->users()) {
+ auto *LI = dyn_cast<BinaryOperator>(U);
+ if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
+ (LI->getOpcode() == Instruction::And ||
+ LI->getOpcode() == Instruction::Or ||
+ LI->getOpcode() == Instruction::Xor)) {
+ LogicOpsWorklist.push_back(LI);
+ }
+ }
+ }
+};
+
+} // end anonymous namespace
+
+char SystemZTDCPass::ID = 0;
+INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
+ "SystemZ Test Data Class optimization", false, false)
+
+FunctionPass *llvm::createSystemZTDCPass() {
+ return new SystemZTDCPass();
+}
+
+void SystemZTDCPass::convertFCmp(CmpInst &I) {
+ Value *Op0 = I.getOperand(0);
+ auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
+ auto Pred = I.getPredicate();
+ // Only comparisons with consts are interesting.
+ if (!Const)
+ return;
+ // Compute the smallest normal number (and its negation).
+ auto &Sem = Op0->getType()->getFltSemantics();
+ APFloat Smallest = APFloat::getSmallestNormalized(Sem);
+ APFloat NegSmallest = Smallest;
+ NegSmallest.changeSign();
+ // Check if Const is one of our recognized consts.
+ int WhichConst;
+ if (Const->isZero()) {
+ // All comparisons with 0 can be converted.
+ WhichConst = 0;
+ } else if (Const->isInfinity()) {
+ // Likewise for infinities.
+ WhichConst = Const->isNegative() ? 2 : 1;
+ } else if (Const->isExactlyValue(Smallest)) {
+ // For Smallest, we cannot do EQ separately from GT.
+ if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
+ (Pred & CmpInst::FCMP_OGE) != 0)
+ return;
+ WhichConst = 3;
+ } else if (Const->isExactlyValue(NegSmallest)) {
+ // Likewise for NegSmallest, we cannot do EQ separately from LT.
+ if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
+ (Pred & CmpInst::FCMP_OLE) != 0)
+ return;
+ WhichConst = 4;
+ } else {
+ // Not one of our special constants.
+ return;
+ }
+ // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
+ static const int Masks[][4] = {
+ { // 0
+ SystemZ::TDCMASK_ZERO, // eq
+ SystemZ::TDCMASK_POSITIVE, // gt
+ SystemZ::TDCMASK_NEGATIVE, // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // inf
+ SystemZ::TDCMASK_INFINITY_PLUS, // eq
+ 0, // gt
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_NEGATIVE |
+ SystemZ::TDCMASK_NORMAL_PLUS |
+ SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // -inf
+ SystemZ::TDCMASK_INFINITY_MINUS, // eq
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_POSITIVE |
+ SystemZ::TDCMASK_NORMAL_MINUS |
+ SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
+ 0, // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // minnorm
+ 0, // eq (unsupported)
+ (SystemZ::TDCMASK_NORMAL_PLUS |
+ SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_NEGATIVE |
+ SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // -minnorm
+ 0, // eq (unsupported)
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_POSITIVE |
+ SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
+ (SystemZ::TDCMASK_NORMAL_MINUS |
+ SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
+ SystemZ::TDCMASK_NAN, // un
+ }
+ };
+ // Construct the mask as a combination of the partial masks.
+ int Mask = 0;
+ if (Pred & CmpInst::FCMP_OEQ)
+ Mask |= Masks[WhichConst][0];
+ if (Pred & CmpInst::FCMP_OGT)
+ Mask |= Masks[WhichConst][1];
+ if (Pred & CmpInst::FCMP_OLT)
+ Mask |= Masks[WhichConst][2];
+ if (Pred & CmpInst::FCMP_UNO)
+ Mask |= Masks[WhichConst][3];
+ // A lone fcmp is unworthy of tdc conversion on its own, but may become
+ // worthy if combined with fabs.
+ bool Worthy = false;
+ if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
+ Function *F = CI->getCalledFunction();
+ if (F && F->getIntrinsicID() == Intrinsic::fabs) {
+ // Fold with fabs - adjust the mask appropriately.
+ Mask &= SystemZ::TDCMASK_PLUS;
+ Mask |= Mask >> 1;
+ Op0 = CI->getArgOperand(0);
+ // A combination of fcmp with fabs is a win, unless the constant
+ // involved is 0 (which is handled by later passes).
+ Worthy = WhichConst != 0;
+ PossibleJunk.insert(CI);
+ }
+ }
+ converted(&I, Op0, Mask, Worthy);
+}
+
+void SystemZTDCPass::convertICmp(CmpInst &I) {
+ Value *Op0 = I.getOperand(0);
+ auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
+ auto Pred = I.getPredicate();
+ // All our icmp rules involve comparisons with consts.
+ if (!Const)
+ return;
+ if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
+ // Check for icmp+bitcast used for signbit.
+ if (!Cast->getSrcTy()->isFloatTy() &&
+ !Cast->getSrcTy()->isDoubleTy() &&
+ !Cast->getSrcTy()->isFP128Ty())
+ return;
+ Value *V = Cast->getOperand(0);
+ int Mask;
+ if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
+ // icmp slt (bitcast X), 0 - set if sign bit true
+ Mask = SystemZ::TDCMASK_MINUS;
+ } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
+ // icmp sgt (bitcast X), -1 - set if sign bit false
+ Mask = SystemZ::TDCMASK_PLUS;
+ } else {
+ // Not a sign bit check.
+ return;
+ }
+ PossibleJunk.insert(Cast);
+ converted(&I, V, Mask, true);
+ } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
+ // Check if this is a pre-existing call of our tdc intrinsic.
+ Function *F = CI->getCalledFunction();
+ if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
+ return;
+ if (!Const->isZero())
+ return;
+ Value *V = CI->getArgOperand(0);
+ auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ // Bail if the mask is not a constant.
+ if (!MaskC)
+ return;
+ int Mask = MaskC->getZExtValue();
+ Mask &= SystemZ::TDCMASK_ALL;
+ if (Pred == CmpInst::ICMP_NE) {
+ // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
+ } else if (Pred == CmpInst::ICMP_EQ) {
+ // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
+ Mask ^= SystemZ::TDCMASK_ALL;
+ } else {
+ // An unknown comparison - ignore.
+ return;
+ }
+ PossibleJunk.insert(CI);
+ converted(&I, V, Mask, false);
+ }
+}
+
+void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
+ Value *Op0, *Op1;
+ int Mask0, Mask1;
+ bool Worthy0, Worthy1;
+ std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
+ std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
+ if (Op0 != Op1)
+ return;
+ int Mask;
+ switch (I.getOpcode()) {
+ case Instruction::And:
+ Mask = Mask0 & Mask1;
+ break;
+ case Instruction::Or:
+ Mask = Mask0 | Mask1;
+ break;
+ case Instruction::Xor:
+ Mask = Mask0 ^ Mask1;
+ break;
+ default:
+ llvm_unreachable("Unknown op in convertLogicOp");
+ }
+ converted(&I, Op0, Mask, true);
+}
+
+bool SystemZTDCPass::runOnFunction(Function &F) {
+ ConvertedInsts.clear();
+ LogicOpsWorklist.clear();
+ PossibleJunk.clear();
+
+ // Look for icmp+fcmp instructions.
+ for (auto &I : instructions(F)) {
+ if (I.getOpcode() == Instruction::FCmp)
+ convertFCmp(cast<CmpInst>(I));
+ else if (I.getOpcode() == Instruction::ICmp)
+ convertICmp(cast<CmpInst>(I));
+ }
+
+ // If none found, bail already.
+ if (ConvertedInsts.empty())
+ return false;
+
+ // Process the queue of logic instructions.
+ while (!LogicOpsWorklist.empty()) {
+ BinaryOperator *Op = LogicOpsWorklist.back();
+ LogicOpsWorklist.pop_back();
+ // If both operands mapped, and the instruction itself not yet mapped,
+ // convert it.
+ if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
+ ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
+ !ConvertedInsts.count(Op))
+ convertLogicOp(*Op);
+ }
+
+ // Time to actually replace the instructions. Do it in the reverse order
+ // of finding them, since there's a good chance the earlier ones will be
+ // unused (due to being folded into later ones).
+ Module &M = *F.getParent();
+ auto &Ctx = M.getContext();
+ Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ bool MadeChange = false;
+ for (auto &It : reverse(ConvertedInsts)) {
+ Instruction *I = It.first;
+ Value *V;
+ int Mask;
+ bool Worthy;
+ std::tie(V, Mask, Worthy) = It.second;
+ if (!I->user_empty()) {
+ // If used and unworthy of conversion, skip it.
+ if (!Worthy)
+ continue;
+ // Call the intrinsic, compare result with 0.
+ Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc,
+ V->getType());
+ IRBuilder<> IRB(I);
+ Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
+ Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
+ Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
+ I->replaceAllUsesWith(ICmp);
+ }
+ // If unused, or used and converted, remove it.
+ I->eraseFromParent();
+ MadeChange = true;
+ }
+
+ if (!MadeChange)
+ return false;
+
+ // We've actually done something - now clear misc accumulated junk (fabs,
+ // bitcast).
+ for (auto *I : PossibleJunk)
+ if (I->user_empty())
+ I->eraseFromParent();
+
+ return true;
+}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index f305e85f6cfe..85a3f6f4a8be 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -10,6 +10,7 @@
#include "SystemZTargetMachine.h"
#include "SystemZTargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -79,13 +80,22 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
return Ret;
}
+static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
+ // Static code is suitable for use in a dynamic executable; there is no
+ // separate DynamicNoPIC model.
+ if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
+ return Reloc::Static;
+ return *RM;
+}
+
SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
- RM, CM, OL),
+ getEffectiveRelocModel(RM), CM, OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
@@ -112,6 +122,9 @@ public:
} // end anonymous namespace
void SystemZPassConfig::addIRPasses() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZTDCPass());
+
TargetPassConfig::addIRPasses();
}
@@ -125,8 +138,7 @@ bool SystemZPassConfig::addInstSelector() {
}
void SystemZPassConfig::addPreSched2() {
- if (getOptLevel() != CodeGenOpt::None &&
- getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond())
+ if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 1a8f1f7f3aaa..69cf9bc6e525 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -29,7 +29,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
public:
SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
~SystemZTargetMachine() override;
diff --git a/lib/Target/SystemZ/TargetInfo/Makefile b/lib/Target/SystemZ/TargetInfo/Makefile
deleted file mode 100644
index 0be80eb4e6ad..000000000000
--- a/lib/Target/SystemZ/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/SystemZ/TargetInfo/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common