diff options
Diffstat (limited to 'llvm/lib/Target/SystemZ')
33 files changed, 1093 insertions, 416 deletions
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 93c4ce4b5ccc..607266d552a6 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -1304,14 +1304,23 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, if (getParser().parseExpression(Expr)) return MatchOperand_NoMatch; + auto isOutOfRangeConstant = [&](const MCExpr *E) -> bool { + if (auto *CE = dyn_cast<MCConstantExpr>(E)) { + int64_t Value = CE->getValue(); + if ((Value & 1) || Value < MinVal || Value > MaxVal) + return true; + } + return false; + }; + // For consistency with the GNU assembler, treat immediates as offsets // from ".". if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) { - int64_t Value = CE->getValue(); - if ((Value & 1) || Value < MinVal || Value > MaxVal) { + if (isOutOfRangeConstant(CE)) { Error(StartLoc, "offset out of range"); return MatchOperand_ParseFail; } + int64_t Value = CE->getValue(); MCSymbol *Sym = Ctx.createTempSymbol(); Out.EmitLabel(Sym); const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, @@ -1319,6 +1328,15 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx); } + // For consistency with the GNU assembler, conservatively assume that a + // constant offset must by itself be within the given size range. + if (const auto *BE = dyn_cast<MCBinaryExpr>(Expr)) + if (isOutOfRangeConstant(BE->getLHS()) || + isOutOfRangeConstant(BE->getRHS())) { + Error(StartLoc, "offset out of range"); + return MatchOperand_ParseFail; + } + // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol. const MCExpr *Sym = nullptr; if (AllowTLS && getLexer().is(AsmToken::Colon)) { @@ -1371,6 +1389,6 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, } // Force static initialization. -extern "C" void LLVMInitializeSystemZAsmParser() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZAsmParser() { RegisterMCAsmParser<SystemZAsmParser> X(getTheSystemZTarget()); } diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 70c26db33ced..e42aa14fe589 100644 --- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -34,7 +34,6 @@ public: DecodeStatus getInstruction(MCInst &instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, - raw_ostream &VStream, raw_ostream &CStream) const override; }; @@ -46,7 +45,7 @@ static MCDisassembler *createSystemZDisassembler(const Target &T, return new SystemZDisassembler(STI, Ctx); } -extern "C" void LLVMInitializeSystemZDisassembler() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZDisassembler() { // Register the disassembler. TargetRegistry::RegisterMCDisassembler(getTheSystemZTarget(), createSystemZDisassembler); @@ -449,7 +448,6 @@ static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field, DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, - raw_ostream &OS, raw_ostream &CS) const { // Get the first two bytes of the instruction. Size = 0; diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp index 91cb35dd72f2..5893b227c08c 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp @@ -41,8 +41,12 @@ void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI, raw_ostream &O) { - if (MO.isReg()) - O << '%' << getRegisterName(MO.getReg()); + if (MO.isReg()) { + if (!MO.getReg()) + O << '0'; + else + O << '%' << getRegisterName(MO.getReg()); + } else if (MO.isImm()) O << MO.getImm(); else if (MO.isExpr()) @@ -51,10 +55,10 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI, llvm_unreachable("Invalid operand"); } -void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot, - const MCSubtargetInfo &STI) { - printInstruction(MI, O); +void SystemZInstPrinter::printInst(const MCInst *MI, uint64_t Address, + StringRef Annot, const MCSubtargetInfo &STI, + raw_ostream &O) { + printInstruction(MI, Address, O); printAnnotation(O, Annot); } diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h index 4235d4e21792..5628e9252f03 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h @@ -27,7 +27,7 @@ public: : MCInstPrinter(MAI, MII, MRI) {} // Automatically generated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); + void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); // Print an address with the given base, displacement and index. @@ -40,8 +40,8 @@ public: // Override MCInstPrinter. void printRegName(raw_ostream &O, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, - const MCSubtargetInfo &STI) override; + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &O) override; private: // Print various types of operand. diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 3c0300cfd8f0..eb2112674a12 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -147,7 +147,8 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) { } static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, - const Triple &TT) { + const Triple &TT, + const MCTargetOptions &Options) { MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, @@ -182,7 +183,7 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T, return new SystemZInstPrinter(MAI, MII, MRI); } -extern "C" void LLVMInitializeSystemZTargetMC() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetMC() { // Register the MCAsmInfo. TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(), createSystemZMCAsmInfo); diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index 88cf589a3f10..0808160f627c 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -55,7 +55,7 @@ const unsigned CCMASK_ARITH = CCMASK_ANY; // Condition-code mask assignments for logical operations. const unsigned CCMASK_LOGICAL_ZERO = CCMASK_0 | CCMASK_2; -const unsigned CCMASK_LOGICAL_NONZERO = CCMASK_1 | CCMASK_2; +const unsigned CCMASK_LOGICAL_NONZERO = CCMASK_1 | CCMASK_3; const unsigned CCMASK_LOGICAL_CARRY = CCMASK_2 | CCMASK_3; const unsigned CCMASK_LOGICAL_NOCARRY = CCMASK_0 | CCMASK_1; const unsigned CCMASK_LOGICAL_BORROW = CCMASK_LOGICAL_NOCARRY; diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 10023e9e169c..67c4aa08f90d 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -16,11 +16,13 @@ #include "SystemZConstantPoolValue.h" #include "SystemZMCInstLower.h" #include "TargetInfo/SystemZTargetInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/TargetRegistry.h" @@ -543,9 +545,9 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer, else { MCSymbol *DotSym = OutContext.createTempSymbol(); const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext); + OutStreamer.EmitLabel(DotSym); OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BRCLAsm) .addImm(0).addExpr(Dot), STI); - OutStreamer.EmitLabel(DotSym); return 6; } } @@ -553,8 +555,17 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer, void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &Lower) { MCContext &Ctx = MF->getContext(); - if (MF->getFunction().getFnAttribute("mnop-mcount") - .getValueAsString() == "true") { + if (MF->getFunction().hasFnAttribute("mrecord-mcount")) { + MCSymbol *DotSym = OutContext.createTempSymbol(); + OutStreamer->PushSection(); + OutStreamer->SwitchSection( + Ctx.getELFSection("__mcount_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC)); + OutStreamer->EmitSymbolValue(DotSym, 8); + OutStreamer->PopSection(); + OutStreamer->EmitLabel(DotSym); + } + + if (MF->getFunction().hasFnAttribute("mnop-mcount")) { EmitNop(Ctx, *OutStreamer, 6, getSubtargetInfo()); return; } @@ -572,7 +583,11 @@ void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { unsigned NumNOPBytes = MI.getOperand(1).getImm(); - SM.recordStackMap(MI); + auto &Ctx = OutStreamer->getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer->EmitLabel(MILabel); + + SM.recordStackMap(*MILabel, MI); assert(NumNOPBytes % 2 == 0 && "Invalid number of NOP bytes requested!"); // Scan ahead to trim the shadow. @@ -601,7 +616,11 @@ void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { // [<def>], <id>, <numBytes>, <target>, <numArgs> void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower) { - SM.recordPatchPoint(MI); + auto &Ctx = OutStreamer->getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer->EmitLabel(MILabel); + + SM.recordPatchPoint(*MILabel, MI); PatchPointOpers Opers(&MI); unsigned EncodedBytes = 0; @@ -705,6 +724,6 @@ void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) { } // Force static initialization. -extern "C" void LLVMInitializeSystemZAsmPrinter() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZAsmPrinter() { RegisterAsmPrinter<SystemZAsmPrinter> X(getTheSystemZTarget()); } diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h index 82f29b6361f1..4432adc6a269 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -124,6 +124,13 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT, return true; } +inline bool CC_SystemZ_GHC_Error(unsigned &, MVT &, MVT &, + CCValAssign::LocInfo &, ISD::ArgFlagsTy &, + CCState &) { + report_fatal_error("No registers left in GHC calling convention"); + return false; +} + } // end namespace llvm #endif diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td index bbd51546ac9f..b1b7ad47671f 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -58,9 +58,34 @@ def RetCC_SystemZ : CallingConv<[ ]>; //===----------------------------------------------------------------------===// +// z/Linux argument calling conventions for GHC +//===----------------------------------------------------------------------===// +def CC_SystemZ_GHC : CallingConv<[ + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, R8, SpLim + CCIfType<[i64], CCAssignToReg<[R7D, R8D, R10D, R11D, R12D, R13D, + R6D, R2D, R3D, R4D, R5D, R9D]>>, + + // Pass in STG registers: F1, ..., F6 + CCIfType<[f32], CCAssignToReg<[F8S, F9S, F10S, F11S, F0S, F1S]>>, + + // Pass in STG registers: D1, ..., D6 + CCIfType<[f64], CCAssignToReg<[F12D, F13D, F14D, F15D, F2D, F3D]>>, + + // Pass in STG registers: XMM1, ..., XMM6 + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfFixed<CCAssignToReg<[V16, V17, V18, V19, V20, V21]>>>>, + + // Fail otherwise + CCCustom<"CC_SystemZ_GHC_Error"> +]>; + +//===----------------------------------------------------------------------===// // z/Linux argument calling conventions //===----------------------------------------------------------------------===// def CC_SystemZ : CallingConv<[ + CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_SystemZ_GHC>>, + // Promote i32 to i64 if it has an explicit extension type. // The convention is that true integer arguments that are smaller // than 64 bits should be marked as extended, but structures that @@ -128,3 +153,5 @@ def CSR_SystemZ_AllRegs : CalleeSavedRegs<(add (sequence "R%dD", 2, 15), def CSR_SystemZ_AllRegs_Vector : CalleeSavedRegs<(add (sequence "R%dD", 2, 15), (sequence "V%d", 0, 31))>; +def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>; + diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp index 946eb2ba7c79..2f0cf0317029 100644 --- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -87,6 +88,8 @@ private: SmallVectorImpl<MachineInstr *> &CCUsers); bool convertToLoadAndTest(MachineInstr &MI, MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers); + bool convertToLogical(MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers, unsigned ConvOpc = 0); @@ -103,14 +106,6 @@ char SystemZElimCompare::ID = 0; } // end anonymous namespace -// Return true if CC is live out of MBB. -static bool isCCLiveOut(MachineBasicBlock &MBB) { - for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(SystemZ::CC)) - return true; - return false; -} - // Returns true if MI is an instruction whose output equals the value in Reg. static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { switch (MI.getOpcode()) { @@ -302,9 +297,60 @@ bool SystemZElimCompare::convertToLoadAndTest( MIB.setMemRefs(MI.memoperands()); MI.eraseFromParent(); + // Mark instruction as not raising an FP exception if applicable. We already + // verified earlier that this move is valid. + if (!Compare.mayRaiseFPException()) + MIB.setMIFlag(MachineInstr::MIFlag::NoFPExcept); + + return true; +} + +// See if MI is an instruction with an equivalent "logical" opcode that can +// be used and replace MI. This is useful for EQ/NE comparisons where the +// "nsw" flag is missing since the "logical" opcode always sets CC to reflect +// the result being zero or non-zero. +bool SystemZElimCompare::convertToLogical( + MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + + unsigned ConvOpc = 0; + switch (MI.getOpcode()) { + case SystemZ::AR: ConvOpc = SystemZ::ALR; break; + case SystemZ::ARK: ConvOpc = SystemZ::ALRK; break; + case SystemZ::AGR: ConvOpc = SystemZ::ALGR; break; + case SystemZ::AGRK: ConvOpc = SystemZ::ALGRK; break; + case SystemZ::A: ConvOpc = SystemZ::AL; break; + case SystemZ::AY: ConvOpc = SystemZ::ALY; break; + case SystemZ::AG: ConvOpc = SystemZ::ALG; break; + default: break; + } + if (!ConvOpc || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc)) + return false; + + // Operands should be identical, so just change the opcode and remove the + // dead flag on CC. + MI.setDesc(TII->get(ConvOpc)); + MI.clearRegisterDeads(SystemZ::CC); return true; } +#ifndef NDEBUG +static bool isAddWithImmediate(unsigned Opcode) { + switch(Opcode) { + case SystemZ::AHI: + case SystemZ::AHIK: + case SystemZ::AGHI: + case SystemZ::AGHIK: + case SystemZ::AFI: + case SystemZ::AIH: + case SystemZ::AGFI: + return true; + default: break; + } + return false; +} +#endif + // The CC users in CCUsers are testing the result of a comparison of some // value X against zero and we know that any CC value produced by MI would // also reflect the value of X. ConvOpc may be used to pass the transfomed @@ -315,65 +361,116 @@ bool SystemZElimCompare::adjustCCMasksForInstr( MachineInstr &MI, MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers, unsigned ConvOpc) { + unsigned CompareFlags = Compare.getDesc().TSFlags; + unsigned CompareCCValues = SystemZII::getCCValues(CompareFlags); int Opcode = (ConvOpc ? ConvOpc : MI.getOpcode()); const MCInstrDesc &Desc = TII->get(Opcode); unsigned MIFlags = Desc.TSFlags; - // See which compare-style condition codes are available. - unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); + // If Compare may raise an FP exception, we can only eliminate it + // if MI itself would have already raised the exception. + if (Compare.mayRaiseFPException()) { + // If the caller will change MI to use ConvOpc, only test whether + // ConvOpc is suitable; it is on the caller to set the MI flag. + if (ConvOpc && !Desc.mayRaiseFPException()) + return false; + // If the caller will not change MI, we test the MI flag here. + if (!ConvOpc && !MI.mayRaiseFPException()) + return false; + } + // See which compare-style condition codes are available. + unsigned CCValues = SystemZII::getCCValues(MIFlags); + unsigned ReusableCCMask = CCValues; // For unsigned comparisons with zero, only equality makes sense. - unsigned CompareFlags = Compare.getDesc().TSFlags; if (CompareFlags & SystemZII::IsLogical) ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; - + unsigned OFImplies = 0; + bool LogicalMI = false; + bool MIEquivalentToCmp = false; + if (MI.getFlag(MachineInstr::NoSWrap) && + (MIFlags & SystemZII::CCIfNoSignedWrap)) { + // If MI has the NSW flag set in combination with the + // SystemZII::CCIfNoSignedWrap flag, all CCValues are valid. + } + else if ((MIFlags & SystemZII::CCIfNoSignedWrap) && + MI.getOperand(2).isImm()) { + // Signed addition of immediate. If adding a positive immediate + // overflows, the result must be less than zero. If adding a negative + // immediate overflows, the result must be larger than zero (except in + // the special case of adding the minimum value of the result range, in + // which case we cannot predict whether the result is larger than or + // equal to zero). + assert(isAddWithImmediate(Opcode) && "Expected an add with immediate."); + assert(!MI.mayLoadOrStore() && "Expected an immediate term."); + int64_t RHS = MI.getOperand(2).getImm(); + if (SystemZ::GRX32BitRegClass.contains(MI.getOperand(0).getReg()) && + RHS == INT32_MIN) + return false; + OFImplies = (RHS > 0 ? SystemZ::CCMASK_CMP_LT : SystemZ::CCMASK_CMP_GT); + } + else if ((MIFlags & SystemZII::IsLogical) && CCValues) { + // Use CCMASK_CMP_EQ to match with CCUsers. On success CCMask:s will be + // converted to CCMASK_LOGICAL_ZERO or CCMASK_LOGICAL_NONZERO. + LogicalMI = true; + ReusableCCMask = SystemZ::CCMASK_CMP_EQ; + } + else { + ReusableCCMask &= SystemZII::getCompareZeroCCMask(MIFlags); + assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); + MIEquivalentToCmp = + ReusableCCMask == CCValues && CCValues == CompareCCValues; + } if (ReusableCCMask == 0) return false; - unsigned CCValues = SystemZII::getCCValues(MIFlags); - assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); - - bool MIEquivalentToCmp = - (ReusableCCMask == CCValues && - CCValues == SystemZII::getCCValues(CompareFlags)); - if (!MIEquivalentToCmp) { // Now check whether these flags are enough for all users. SmallVector<MachineOperand *, 4> AlterMasks; for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { - MachineInstr *MI = CCUsers[I]; + MachineInstr *CCUserMI = CCUsers[I]; // Fail if this isn't a use of CC that we understand. - unsigned Flags = MI->getDesc().TSFlags; + unsigned Flags = CCUserMI->getDesc().TSFlags; unsigned FirstOpNum; if (Flags & SystemZII::CCMaskFirst) FirstOpNum = 0; else if (Flags & SystemZII::CCMaskLast) - FirstOpNum = MI->getNumExplicitOperands() - 2; + FirstOpNum = CCUserMI->getNumExplicitOperands() - 2; else return false; // Check whether the instruction predicate treats all CC values // outside of ReusableCCMask in the same way. In that case it // doesn't matter what those CC values mean. - unsigned CCValid = MI->getOperand(FirstOpNum).getImm(); - unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm(); + unsigned CCValid = CCUserMI->getOperand(FirstOpNum).getImm(); + unsigned CCMask = CCUserMI->getOperand(FirstOpNum + 1).getImm(); + assert(CCValid == CompareCCValues && (CCMask & ~CCValid) == 0 && + "Corrupt CC operands of CCUser."); unsigned OutValid = ~ReusableCCMask & CCValid; unsigned OutMask = ~ReusableCCMask & CCMask; if (OutMask != 0 && OutMask != OutValid) return false; - AlterMasks.push_back(&MI->getOperand(FirstOpNum)); - AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1)); + AlterMasks.push_back(&CCUserMI->getOperand(FirstOpNum)); + AlterMasks.push_back(&CCUserMI->getOperand(FirstOpNum + 1)); } // All users are OK. Adjust the masks for MI. for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { AlterMasks[I]->setImm(CCValues); unsigned CCMask = AlterMasks[I + 1]->getImm(); - if (CCMask & ~ReusableCCMask) - AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | - (CCValues & ~ReusableCCMask)); + if (LogicalMI) { + // Translate the CCMask into its "logical" value. + CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ? + SystemZ::CCMASK_LOGICAL_ZERO : SystemZ::CCMASK_LOGICAL_NONZERO); + CCMask &= CCValues; // Logical subtracts never set CC=0. + } else { + if (CCMask & ~ReusableCCMask) + CCMask = (CCMask & ReusableCCMask) | (CCValues & ~ReusableCCMask); + CCMask |= (CCMask & OFImplies) ? SystemZ::CCMASK_ARITH_OVERFLOW : 0; + } + AlterMasks[I + 1]->setImm(CCMask); } } @@ -450,7 +547,9 @@ bool SystemZElimCompare::optimizeCompareZero( } // Try to eliminate Compare by reusing a CC result from MI. if ((!CCRefs && convertToLoadAndTest(MI, Compare, CCUsers)) || - (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) { + (!CCRefs.Def && + (adjustCCMasksForInstr(MI, Compare, CCUsers) || + convertToLogical(MI, Compare, CCUsers)))) { EliminatedComparisons += 1; return true; } @@ -461,6 +560,12 @@ bool SystemZElimCompare::optimizeCompareZero( CCRefs |= getRegReferences(MI, SystemZ::CC); if (CCRefs.Use && CCRefs.Def) break; + // Eliminating a Compare that may raise an FP exception will move + // raising the exception to some earlier MI. We cannot do this if + // there is anything in between that might change exception flags. + if (Compare.mayRaiseFPException() && + (MI.isCall() || MI.hasUnmodeledSideEffects())) + break; } // Also do a forward search to handle cases where an instruction after the @@ -595,7 +700,9 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { // Walk backwards through the block looking for comparisons, recording // all CC users as we go. The subroutines can delete Compare and // instructions before it. - bool CompleteCCUsers = !isCCLiveOut(MBB); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(MBB); + bool CompleteCCUsers = !LiveRegs.contains(SystemZ::CC); SmallVector<MachineInstr *, 4> CCUsers; MachineBasicBlock::iterator MBBI = MBB.end(); while (MBBI != MBB.begin()) { diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 0b8b6880accc..3cdf6bf98ee0 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -21,8 +21,8 @@ using namespace llvm; namespace { -// The ABI-defined register save slots, relative to the incoming stack -// pointer. +// The ABI-defined register save slots, relative to the CFA (i.e. +// incoming stack pointer + SystemZMC::CallFrameSize). static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { { SystemZ::R2D, 0x10 }, { SystemZ::R3D, 0x18 }, @@ -47,18 +47,125 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { SystemZFrameLowering::SystemZFrameLowering() : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), - -SystemZMC::CallFrameSize, Align(8), - false /* StackRealignable */) { + 0, Align(8), false /* StackRealignable */), + RegSpillOffsets(0) { + // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not + // equal to the incoming stack pointer, but to incoming stack pointer plus + // 160. Instead of using a Local Area Offset, the Register save area will + // be occupied by fixed frame objects, and all offsets are actually + // relative to CFA. + // Create a mapping from register number to save slot offset. + // These offsets are relative to the start of the register save area. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset; } -const TargetFrameLowering::SpillSlot * -SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { - NumEntries = array_lengthof(SpillOffsetTable); - return SpillOffsetTable; +static bool usePackedStack(MachineFunction &MF) { + bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack"); + bool IsVarArg = MF.getFunction().isVarArg(); + bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; + bool BackChain = MF.getFunction().hasFnAttribute("backchain"); + bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken(); + if (HasPackedStackAttr && BackChain) + report_fatal_error("packed-stack with backchain is currently unsupported."); + return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain && + !FramAddressTaken; +} + +bool SystemZFrameLowering:: +assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const { + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + bool IsVarArg = MF.getFunction().isVarArg(); + if (CSI.empty()) + return true; // Early exit if no callee saved registers are modified! + + unsigned LowGPR = 0; + unsigned HighGPR = SystemZ::R15D; + int StartSPOffset = SystemZMC::CallFrameSize; + int CurrOffset; + if (!usePackedStack(MF)) { + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + int Offset = RegSpillOffsets[Reg]; + if (Offset) { + if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) { + LowGPR = Reg; + StartSPOffset = Offset; + } + Offset -= SystemZMC::CallFrameSize; + int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); + CS.setFrameIdx(FrameIdx); + } else + CS.setFrameIdx(INT32_MAX); + } + + // Save the range of call-saved registers, for use by the + // prologue/epilogue inserters. + ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); + if (IsVarArg) { + // Also save the GPR varargs, if any. R6D is call-saved, so would + // already be included, but we also need to handle the call-clobbered + // argument registers. + unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); + if (FirstGPR < SystemZ::NumArgGPRs) { + unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; + int Offset = RegSpillOffsets[Reg]; + if (StartSPOffset > Offset) { + LowGPR = Reg; StartSPOffset = Offset; + } + } + } + ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset); + + CurrOffset = -SystemZMC::CallFrameSize; + } else { + // Packed stack: put all the GPRs at the top of the Register save area. + uint32_t LowGR64Num = UINT32_MAX; + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) { + unsigned GR64Num = SystemZMC::getFirstReg(Reg); + int Offset = -8 * (15 - GR64Num + 1); + if (LowGR64Num > GR64Num) { + LowGR64Num = GR64Num; + StartSPOffset = SystemZMC::CallFrameSize + Offset; + } + int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); + CS.setFrameIdx(FrameIdx); + } else + CS.setFrameIdx(INT32_MAX); + } + if (LowGR64Num < UINT32_MAX) + LowGPR = SystemZMC::GR64Regs[LowGR64Num]; + + // Save the range of call-saved registers, for use by the + // prologue/epilogue inserters. + ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); + ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset); + + CurrOffset = LowGPR ? -(SystemZMC::CallFrameSize - StartSPOffset) : 0; + } + + // Create fixed stack objects for the remaining registers. + for (auto &CS : CSI) { + if (CS.getFrameIdx() != INT32_MAX) + continue; + unsigned Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + unsigned Size = TRI->getSpillSize(*RC); + CurrOffset -= Size; + assert(CurrOffset % 8 == 0 && + "8-byte alignment required for for all register save slots"); + int FrameIdx = MFFrame.CreateFixedSpillStackObject(Size, CurrOffset); + CS.setFrameIdx(FrameIdx); + } + + return true; } void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF, @@ -141,53 +248,21 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool IsVarArg = MF.getFunction().isVarArg(); DebugLoc DL; - // Scan the call-saved GPRs and find the bounds of the register spill area. - unsigned LowGPR = 0; - unsigned HighGPR = SystemZ::R15D; - unsigned StartOffset = -1U; - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); - if (SystemZ::GR64BitRegClass.contains(Reg)) { - unsigned Offset = RegSpillOffsets[Reg]; - assert(Offset && "Unexpected GPR save"); - if (StartOffset > Offset) { - LowGPR = Reg; - StartOffset = Offset; - } - } - } - - // Save the range of call-saved registers, for use by the epilogue inserter. - ZFI->setLowSavedGPR(LowGPR); - ZFI->setHighSavedGPR(HighGPR); - - // Include the GPR varargs, if any. R6D is call-saved, so would - // be included by the loop above, but we also need to handle the - // call-clobbered argument registers. - if (IsVarArg) { - unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); - if (FirstGPR < SystemZ::NumArgGPRs) { - unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; - unsigned Offset = RegSpillOffsets[Reg]; - if (StartOffset > Offset) { - LowGPR = Reg; StartOffset = Offset; - } - } - } - // Save GPRs - if (LowGPR) { - assert(LowGPR != HighGPR && "Should be saving %r15 and something else"); + SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs(); + if (SpillGPRs.LowGPR) { + assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR && + "Should be saving %r15 and something else"); // Build an STMG instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG)); // Add the explicit register operands. - addSavedGPR(MBB, MIB, LowGPR, false); - addSavedGPR(MBB, MIB, HighGPR, false); + addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false); + addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false); // Add the address. - MIB.addReg(SystemZ::R15D).addImm(StartOffset); + MIB.addReg(SystemZ::R15D).addImm(SpillGPRs.GPROffset); // Make sure all call-saved GPRs are included as operands and are // marked as live on entry. @@ -248,30 +323,29 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // Restore call-saved GPRs (but not call-clobbered varargs, which at // this point might hold return values). - unsigned LowGPR = ZFI->getLowSavedGPR(); - unsigned HighGPR = ZFI->getHighSavedGPR(); - unsigned StartOffset = RegSpillOffsets[LowGPR]; - if (LowGPR) { + SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs(); + if (RestoreGPRs.LowGPR) { // If we saved any of %r2-%r5 as varargs, we should also be saving // and restoring %r6. If we're saving %r6 or above, we should be // restoring it too. - assert(LowGPR != HighGPR && "Should be loading %r15 and something else"); + assert(RestoreGPRs.LowGPR != RestoreGPRs.HighGPR && + "Should be loading %r15 and something else"); // Build an LMG instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG)); // Add the explicit register operands. - MIB.addReg(LowGPR, RegState::Define); - MIB.addReg(HighGPR, RegState::Define); + MIB.addReg(RestoreGPRs.LowGPR, RegState::Define); + MIB.addReg(RestoreGPRs.HighGPR, RegState::Define); // Add the address. MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D); - MIB.addImm(StartOffset); + MIB.addImm(RestoreGPRs.GPROffset); // Do a second scan adding regs as being defined by instruction for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); - if (Reg != LowGPR && Reg != HighGPR && + if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR && SystemZ::GR64BitRegClass.contains(Reg)) MIB.addReg(Reg, RegState::ImplicitDefine); } @@ -284,16 +358,20 @@ void SystemZFrameLowering:: processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFFrame = MF.getFrameInfo(); + + if (!usePackedStack(MF)) + // Always create the full incoming register save area. + getOrCreateFramePointerSaveIndex(MF); + // Get the size of our stack frame to be allocated ... uint64_t StackSize = (MFFrame.estimateStackSize(MF) + SystemZMC::CallFrameSize); // ... and the maximum offset we may need to reach into the // caller's frame to access the save area or stack arguments. - int64_t MaxArgOffset = SystemZMC::CallFrameSize; + int64_t MaxArgOffset = 0; for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I) if (MFFrame.getObjectOffset(I) >= 0) { - int64_t ArgOffset = SystemZMC::CallFrameSize + - MFFrame.getObjectOffset(I) + + int64_t ArgOffset = MFFrame.getObjectOffset(I) + MFFrame.getObjectSize(I); MaxArgOffset = std::max(MaxArgOffset, ArgOffset); } @@ -351,6 +429,23 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI = MFFrame.getCalleeSavedInfo(); bool HasFP = hasFP(MF); + // In GHC calling convention C stack space, including the ABI-defined + // 160-byte base area, is (de)allocated by GHC itself. This stack space may + // be used by LLVM as spill slots for the tail recursive GHC functions. Thus + // do not allocate stack space here, too. + if (MF.getFunction().getCallingConv() == CallingConv::GHC) { + if (MFFrame.getStackSize() > 2048 * sizeof(long)) { + report_fatal_error( + "Pre allocated stack space for GHC function is too small"); + } + if (HasFP) { + report_fatal_error( + "In GHC calling convention a frame pointer is not supported"); + } + MFFrame.setStackSize(MFFrame.getStackSize() + SystemZMC::CallFrameSize); + return; + } + // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; @@ -358,7 +453,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, // The current offset of the stack pointer from the CFA. int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP; - if (ZFI->getLowSavedGPR()) { + if (ZFI->getSpillGPRRegs().LowGPR) { // Skip over the GPR saves. if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG) ++MBBI; @@ -369,7 +464,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, for (auto &Save : CSI) { unsigned Reg = Save.getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) { - int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; + int FI = Save.getFrameIdx(); + int64_t Offset = MFFrame.getObjectOffset(FI); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -382,10 +478,19 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, // We need to allocate the ABI-defined 160-byte base area whenever // we allocate stack space for our own use and whenever we call another // function. - if (StackSize || MFFrame.hasVarSizedObjects() || MFFrame.hasCalls()) { + bool HasStackObject = false; + for (unsigned i = 0, e = MFFrame.getObjectIndexEnd(); i != e; ++i) + if (!MFFrame.isDeadObjectIndex(i)) { + HasStackObject = true; + break; + } + if (HasStackObject || MFFrame.hasCalls()) StackSize += SystemZMC::CallFrameSize; - MFFrame.setStackSize(StackSize); - } + // Don't allocate the incoming reg save area. + StackSize = StackSize > SystemZMC::CallFrameSize + ? StackSize - SystemZMC::CallFrameSize + : 0; + MFFrame.setStackSize(StackSize); if (StackSize) { // Determine if we want to store a backchain. @@ -410,7 +515,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, if (StoreBackchain) BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) - .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0); + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0) + .addReg(0); } if (HasFP) { @@ -478,11 +584,15 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF, SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); MachineFrameInfo &MFFrame = MF.getFrameInfo(); + // See SystemZFrameLowering::emitPrologue + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + return; + // Skip the return instruction. assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks"); uint64_t StackSize = MFFrame.getStackSize(); - if (ZFI->getLowSavedGPR()) { + if (ZFI->getRestoreGPRRegs().LowGPR) { --MBBI; unsigned Opcode = MBBI->getOpcode(); if (Opcode != SystemZ::LMG) @@ -527,6 +637,16 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return true; } +int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + // Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so + // add that difference here. + int64_t Offset = + TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg); + return Offset + SystemZMC::CallFrameSize; +} + MachineBasicBlock::iterator SystemZFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, @@ -543,3 +663,15 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, llvm_unreachable("Unexpected call frame instruction"); } } + +int SystemZFrameLowering:: +getOrCreateFramePointerSaveIndex(MachineFunction &MF) const { + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + int FI = ZFI->getFramePointerSaveIndex(); + if (!FI) { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + FI = MFFrame.CreateFixedObject(8, -SystemZMC::CallFrameSize, false); + ZFI->setFramePointerSaveIndex(FI); + } + return FI; +} diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 71ef3e4dc240..4189a92b8294 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -24,8 +24,10 @@ public: // Override TargetFrameLowering. bool isFPCloseToIncomingSP() const override { return false; } - const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const - override; + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, @@ -43,6 +45,8 @@ public: void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; @@ -52,6 +56,9 @@ public: unsigned getRegSpillOffset(unsigned Reg) const { return RegSpillOffsets[Reg]; } + + // Get or create the frame index of where the old frame pointer is stored. + int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 751034c2d41a..3927a977e6fc 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -347,9 +347,12 @@ public: bool runOnMachineFunction(MachineFunction &MF) override { const Function &F = MF.getFunction(); - if (F.getFnAttribute("mnop-mcount").getValueAsString() == "true" && - F.getFnAttribute("fentry-call").getValueAsString() != "true") - report_fatal_error("mnop-mcount only supported with fentry-call"); + if (F.getFnAttribute("fentry-call").getValueAsString() != "true") { + if (F.hasFnAttribute("mnop-mcount")) + report_fatal_error("mnop-mcount only supported with fentry-call"); + if (F.hasFnAttribute("mrecord-mcount")) + report_fatal_error("mrecord-mcount only supported with fentry-call"); + } Subtarget = &MF.getSubtarget<SystemZSubtarget>(); return SelectionDAGISel::runOnMachineFunction(MF); @@ -1494,8 +1497,9 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR || ChildOpcode == ISD::XOR) break; - // Check whether this expression matches OR-with-complement. - if (Opcode == ISD::OR && ChildOpcode == ISD::XOR) { + // Check whether this expression matches OR-with-complement + // (or matches an alternate pattern for NXOR). + if (ChildOpcode == ISD::XOR) { auto Op0 = Node->getOperand(0); if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1))) if (Op0Op1->getZExtValue() == (uint64_t)-1) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index e0ca9da93561..c73905d3357a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -19,8 +19,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsS390.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include <cctype> @@ -32,12 +33,16 @@ using namespace llvm; namespace { // Represents information about a comparison. struct Comparison { - Comparison(SDValue Op0In, SDValue Op1In) - : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} + Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) + : Op0(Op0In), Op1(Op1In), Chain(ChainIn), + Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} // The operands to the comparison. SDValue Op0, Op1; + // Chain if this is a strict floating-point comparison. + SDValue Chain; + // The opcode that should be used to compare Op0 and Op1. unsigned Opcode; @@ -132,6 +137,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, if (isTypeLegal(VT)) { // Lower SET_CC into an IPM-based sequence. setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). setOperationAction(ISD::SELECT, VT, Expand); @@ -212,6 +219,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); if (Subtarget.hasFPExtension()) setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); + + // And similarly for STRICT_[SU]INT_TO_FP. + setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); + if (Subtarget.hasFPExtension()) + setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); } } @@ -251,6 +263,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, if (!Subtarget.hasFPExtension()) { setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); } // We have native support for a 64-bit CTLZ, via FLOGR. @@ -373,6 +387,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands // and inverting the result as necessary. setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); } } @@ -392,6 +409,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal); } if (Subtarget.hasVectorEnhancements2()) { @@ -408,6 +429,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal); } // Handle floating-point types. @@ -558,16 +583,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, MVT::v4f32, MVT::v2f64 }) { setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); + setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal); + setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal); } } - // We have fused multiply-addition for f32 and f64 but not f128. - setOperationAction(ISD::FMA, MVT::f32, Legal); - setOperationAction(ISD::FMA, MVT::f64, Legal); - if (Subtarget.hasVectorEnhancements1()) - setOperationAction(ISD::FMA, MVT::f128, Legal); - else + // We only have fused f128 multiply-addition on vector registers. + if (!Subtarget.hasVectorEnhancements1()) { setOperationAction(ISD::FMA, MVT::f128, Expand); + setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand); + } // We don't have a copysign instruction on vector registers. if (Subtarget.hasVectorEnhancements1()) @@ -612,7 +637,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::STRICT_FP_ROUND); setTargetDAGCombine(ISD::FP_EXTEND); + setTargetDAGCombine(ISD::STRICT_FP_EXTEND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::SDIV); setTargetDAGCombine(ISD::UDIV); @@ -634,6 +661,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // than "STC;MVC". Handle the choice in target-specific code instead. MaxStoresPerMemset = 0; MaxStoresPerMemsetOptSize = 0; + + // Default to having -disable-strictnode-mutation on + IsStrictFPEnabled = true; } EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, @@ -643,7 +673,8 @@ EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, return VT.changeVectorElementTypeToInteger(); } -bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) @@ -1406,7 +1437,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments( // ...and a similar frame index for the caller-allocated save area // that will be used to store the incoming registers. - int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); + int64_t RegSaveOffset = -SystemZMC::CallFrameSize; unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); FuncInfo->setRegSaveFrameIndex(RegSaveIndex); @@ -1675,6 +1706,9 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (RetLocs.empty()) return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain); + if (CallConv == CallingConv::GHC) + report_fatal_error("GHC functions return void only"); + // Copy the result values into the output registers. SDValue Glue; SmallVector<SDValue, 4> RetOps; @@ -2161,6 +2195,10 @@ static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, // negation to set CC, so avoiding separate LOAD AND TEST and // LOAD (NEGATIVE/COMPLEMENT) instructions. static void adjustForFNeg(Comparison &C) { + // This optimization is invalid for strict comparisons, since FNEG + // does not raise any exceptions. + if (C.Chain) + return; auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); if (C1 && C1->isZero()) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { @@ -2448,7 +2486,7 @@ static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond) { - Comparison C(Call, SDValue()); + Comparison C(Call, SDValue(), SDValue()); C.Opcode = Opcode; C.CCValid = CCValid; if (Cond == ISD::SETEQ) @@ -2479,8 +2517,11 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond, const SDLoc &DL) { + ISD::CondCode Cond, const SDLoc &DL, + SDValue Chain = SDValue(), + bool IsSignaling = false) { if (CmpOp1.getOpcode() == ISD::Constant) { + assert(!Chain); uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); unsigned Opcode, CCValid; if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && @@ -2492,13 +2533,19 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); } - Comparison C(CmpOp0, CmpOp1); + Comparison C(CmpOp0, CmpOp1, Chain); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { C.CCValid = SystemZ::CCMASK_FCMP; - C.Opcode = SystemZISD::FCMP; + if (!C.Chain) + C.Opcode = SystemZISD::FCMP; + else if (!IsSignaling) + C.Opcode = SystemZISD::STRICT_FCMP; + else + C.Opcode = SystemZISD::STRICT_FCMPS; adjustForFNeg(C); } else { + assert(!C.Chain); C.CCValid = SystemZ::CCMASK_ICMP; C.Opcode = SystemZISD::ICMP; // Choose the type of comparison. Equality and inequality tests can @@ -2556,6 +2603,10 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); } + if (C.Chain) { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); + } return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } @@ -2600,24 +2651,51 @@ static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, } // Return the SystemISD vector comparison operation for CC, or 0 if it cannot -// be done directly. IsFP is true if CC is for a floating-point rather than -// integer comparison. -static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { +// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP +// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) +// floating-point comparisons, and CmpMode::SignalingFP for strict signaling +// floating-point comparisons. +enum class CmpMode { Int, FP, StrictFP, SignalingFP }; +static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { switch (CC) { case ISD::SETOEQ: case ISD::SETEQ: - return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPE; + case CmpMode::FP: return SystemZISD::VFCMPE; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; + } + llvm_unreachable("Bad mode"); case ISD::SETOGE: case ISD::SETGE: - return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0); + switch (Mode) { + case CmpMode::Int: return 0; + case CmpMode::FP: return SystemZISD::VFCMPHE; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; + } + llvm_unreachable("Bad mode"); case ISD::SETOGT: case ISD::SETGT: - return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPH; + case CmpMode::FP: return SystemZISD::VFCMPH; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; + } + llvm_unreachable("Bad mode"); case ISD::SETUGT: - return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPHL; + case CmpMode::FP: return 0; + case CmpMode::StrictFP: return 0; + case CmpMode::SignalingFP: return 0; + } + llvm_unreachable("Bad mode"); default: return 0; @@ -2626,17 +2704,16 @@ static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { // Return the SystemZISD vector comparison operation for CC or its inverse, // or 0 if neither can be done directly. Indicate in Invert whether the -// result is for the inverse of CC. IsFP is true if CC is for a -// floating-point rather than integer comparison. -static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, +// result is for the inverse of CC. Mode is as above. +static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert) { - if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = false; return Opcode; } - CC = ISD::getSetCCInverse(CC, !IsFP); - if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32); + if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = true; return Opcode; } @@ -2645,44 +2722,73 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, } // Return a v2f64 that contains the extended form of elements Start and Start+1 -// of v4f32 value Op. +// of v4f32 value Op. If Chain is nonnull, return the strict form. static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, - SDValue Op) { + SDValue Op, SDValue Chain) { int Mask[] = { Start, -1, Start + 1, -1 }; Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); + if (Chain) { + SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); + return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); + } return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); } // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, -// producing a result of type VT. +// producing a result of type VT. If Chain is nonnull, return the strict form. SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, SDValue CmpOp0, - SDValue CmpOp1) const { + SDValue CmpOp1, + SDValue Chain) const { // There is no hardware support for v4f32 (unless we have the vector // enhancements facility 1), so extend the vector into two v2f64s // and compare those. if (CmpOp0.getValueType() == MVT::v4f32 && !Subtarget.hasVectorEnhancements1()) { - SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); - SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); - SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); - SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1); + SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); + SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); + SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); + SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); + if (Chain) { + SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); + SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); + SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); + SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), + H1.getValue(1), L1.getValue(1), + HRes.getValue(1), LRes.getValue(1) }; + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue Ops[2] = { Res, NewChain }; + return DAG.getMergeValues(Ops, DL); + } SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); } + if (Chain) { + SDVTList VTs = DAG.getVTList(VT, MVT::Other); + return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); + } return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); } // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing -// an integer mask of type VT. +// an integer mask of type VT. If Chain is nonnull, we have a strict +// floating-point comparison. If in addition IsSignaling is true, we have +// a strict signaling floating-point comparison. SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, - SDValue CmpOp1) const { + SDValue CmpOp1, + SDValue Chain, + bool IsSignaling) const { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); + assert (!Chain || IsFP); + assert (!IsSignaling || Chain); + CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : + Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; bool Invert = false; SDValue Cmp; switch (CC) { @@ -2692,9 +2798,14 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, LLVM_FALLTHROUGH; case ISD::SETO: { assert(IsFP && "Unexpected integer comparison"); - SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); - SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1); + SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp1, CmpOp0, Chain); + SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), + DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LT.getValue(1), GE.getValue(1)); break; } @@ -2704,9 +2815,14 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, LLVM_FALLTHROUGH; case ISD::SETONE: { assert(IsFP && "Unexpected integer comparison"); - SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); - SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1); + SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp1, CmpOp0, Chain); + SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LT.getValue(1), GT.getValue(1)); break; } @@ -2714,15 +2830,17 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, // matter whether we try the inversion or the swap first, since // there are no cases where both work. default: - if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) - Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); else { CC = ISD::getSetCCSwappedOperands(CC); - if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) - Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); else llvm_unreachable("Unhandled comparison"); } + if (Chain) + Chain = Cmp.getValue(1); break; } if (Invert) { @@ -2730,6 +2848,10 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } + if (Chain && Chain.getNode() != Cmp.getNode()) { + SDValue Ops[2] = { Cmp, Chain }; + Cmp = DAG.getMergeValues(Ops, DL); + } return Cmp; } @@ -2748,6 +2870,29 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); } +SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, + SelectionDAG &DAG, + bool IsSignaling) const { + SDValue Chain = Op.getOperand(0); + SDValue CmpOp0 = Op.getOperand(1); + SDValue CmpOp1 = Op.getOperand(2); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get(); + SDLoc DL(Op); + EVT VT = Op.getNode()->getValueType(0); + if (VT.isVector()) { + SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, + Chain, IsSignaling); + return Res.getValue(Op.getResNo()); + } + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling)); + SDValue CCReg = emitCmp(DAG, DL, C); + CCReg->setFlags(Op->getFlags()); + SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); + SDValue Ops[2] = { Result, CCReg.getValue(1) }; + return DAG.getMergeValues(Ops, DL); +} + SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); @@ -2828,17 +2973,26 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, SDValue Result; if (Subtarget.isPC32DBLSymbol(GV, CM)) { - // Assign anchors at 1<<12 byte boundaries. - uint64_t Anchor = Offset & ~uint64_t(0xfff); - Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); - Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); - - // The offset can be folded into the address if it is aligned to a halfword. - Offset -= Anchor; - if (Offset != 0 && (Offset & 1) == 0) { - SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); - Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); - Offset = 0; + if (isInt<32>(Offset)) { + // Assign anchors at 1<<12 byte boundaries. + uint64_t Anchor = Offset & ~uint64_t(0xfff); + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + + // The offset can be folded into the address if it is aligned to a + // halfword. + Offset -= Anchor; + if (Offset != 0 && (Offset & 1) == 0) { + SDValue Full = + DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); + Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); + Offset = 0; + } + } else { + // Conservatively load a constant offset greater than 32 bits into a + // register below. + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } } else { Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); @@ -2865,6 +3019,10 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, SDValue Chain = DAG.getEntryNode(); SDValue Glue; + if (DAG.getMachineFunction().getFunction().getCallingConv() == + CallingConv::GHC) + report_fatal_error("In GHC calling convention TLS is not supported"); + // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); @@ -2931,6 +3089,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, EVT PtrVT = getPointerTy(DAG.getDataLayout()); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); + if (DAG.getMachineFunction().getFunction().getCallingConv() == + CallingConv::GHC) + report_fatal_error("In GHC calling convention TLS is not supported"); + SDValue TP = lowerThreadPointer(DL, DAG); // Get the offset of GA from the thread pointer, based on the TLS model. @@ -3060,14 +3222,10 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); - // If the back chain frame index has not been allocated yet, do so. - SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>(); - int BackChainIdx = FI->getFramePointerSaveIndex(); - if (!BackChainIdx) { - // By definition, the frame address is the address of the back chain. - BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false); - FI->setFramePointerSaveIndex(BackChainIdx); - } + // By definition, the frame address is the address of the back chain. + auto *TFL = + static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); + int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); // FIXME The frontend should detect this case. @@ -3585,7 +3743,7 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, // Get the known-zero mask for the operand. KnownBits Known = DAG.computeKnownBits(Op); - unsigned NumSignificantBits = (~Known.Zero).getActiveBits(); + unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); if (NumSignificantBits == 0) return DAG.getConstant(0, DL, VT); @@ -3861,6 +4019,9 @@ SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + report_fatal_error("Variable-sized stack allocations are not supported " + "in GHC calling convention"); return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), SystemZ::R15D, Op.getValueType()); } @@ -3871,6 +4032,10 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + report_fatal_error("Variable-sized stack allocations are not supported " + "in GHC calling convention"); + SDValue Chain = Op.getOperand(0); SDValue NewSP = Op.getOperand(1); SDValue Backchain; @@ -4935,6 +5100,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSELECT_CC(Op, DAG); case ISD::SETCC: return lowerSETCC(Op, DAG); + case ISD::STRICT_FSETCC: + return lowerSTRICT_FSETCC(Op, DAG, false); + case ISD::STRICT_FSETCCS: + return lowerSTRICT_FSETCC(Op, DAG, true); case ISD::GlobalAddress: return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); case ISD::GlobalTLSAddress: @@ -5140,6 +5309,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(IABS); OPCODE(ICMP); OPCODE(FCMP); + OPCODE(STRICT_FCMP); + OPCODE(STRICT_FCMPS); OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); @@ -5202,14 +5373,22 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VICMPHS); OPCODE(VICMPHLS); OPCODE(VFCMPE); + OPCODE(STRICT_VFCMPE); + OPCODE(STRICT_VFCMPES); OPCODE(VFCMPH); + OPCODE(STRICT_VFCMPH); + OPCODE(STRICT_VFCMPHS); OPCODE(VFCMPHE); + OPCODE(STRICT_VFCMPHE); + OPCODE(STRICT_VFCMPHES); OPCODE(VFCMPES); OPCODE(VFCMPHS); OPCODE(VFCMPHES); OPCODE(VFTCI); OPCODE(VEXTEND); + OPCODE(STRICT_VEXTEND); OPCODE(VROUND); + OPCODE(STRICT_VROUND); OPCODE(VTM); OPCODE(VFAE_CC); OPCODE(VFAEZ_CC); @@ -5732,6 +5911,19 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS( return SDValue(); } +static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { + SDValue Chain1 = N1->getOperand(0); + SDValue Chain2 = N2->getOperand(0); + + // Trivial case: both nodes take the same chain. + if (Chain1 == Chain2) + return Chain1; + + // FIXME - we could handle more complex cases via TokenFactor, + // assuming we can verify that this would not create a cycle. + return SDValue(); +} + SDValue SystemZTargetLowering::combineFP_ROUND( SDNode *N, DAGCombinerInfo &DCI) const { @@ -5744,8 +5936,9 @@ SDValue SystemZTargetLowering::combineFP_ROUND( // (extract_vector_elt (VROUND X) 2) // // This is a special case since the target doesn't really support v2f32s. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); + SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && @@ -5761,20 +5954,34 @@ SDValue SystemZTargetLowering::combineFP_ROUND( U->getOperand(1).getOpcode() == ISD::Constant && cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { SDValue OtherRound = SDValue(*U->use_begin(), 0); - if (OtherRound.getOpcode() == ISD::FP_ROUND && - OtherRound.getOperand(0) == SDValue(U, 0) && + if (OtherRound.getOpcode() == N->getOpcode() && + OtherRound.getOperand(OpNo) == SDValue(U, 0) && OtherRound.getValueType() == MVT::f32) { - SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), - MVT::v4f32, Vec); + SDValue VRound, Chain; + if (N->isStrictFPOpcode()) { + Chain = MergeInputChains(N, OtherRound.getNode()); + if (!Chain) + continue; + VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), + {MVT::v4f32, MVT::Other}, {Chain, Vec}); + Chain = VRound.getValue(1); + } else + VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), + MVT::v4f32, Vec); DCI.AddToWorklist(VRound.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); + if (Chain) + DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + if (Chain) + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), + N->getVTList(), Extract0, Chain); return Extract0; } } @@ -5795,8 +6002,9 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( // (extract_vector_elt (VEXTEND X) 1) // // This is a special case since the target doesn't really support v2f32s. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); + SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && @@ -5812,20 +6020,34 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( U->getOperand(1).getOpcode() == ISD::Constant && cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) { SDValue OtherExtend = SDValue(*U->use_begin(), 0); - if (OtherExtend.getOpcode() == ISD::FP_EXTEND && - OtherExtend.getOperand(0) == SDValue(U, 0) && + if (OtherExtend.getOpcode() == N->getOpcode() && + OtherExtend.getOperand(OpNo) == SDValue(U, 0) && OtherExtend.getValueType() == MVT::f64) { - SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), - MVT::v2f64, Vec); + SDValue VExtend, Chain; + if (N->isStrictFPOpcode()) { + Chain = MergeInputChains(N, OtherExtend.getNode()); + if (!Chain) + continue; + VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), + {MVT::v2f64, MVT::Other}, {Chain, Vec}); + Chain = VExtend.getValue(1); + } else + VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), + MVT::v2f64, Vec); DCI.AddToWorklist(VExtend.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); + if (Chain) + DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + if (Chain) + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), + N->getVTList(), Extract0, Chain); return Extract0; } } @@ -6165,7 +6387,9 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); @@ -7523,7 +7747,8 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( // Replace pseudo with a normal load-and-test that models the def as // well. BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) - .addReg(SrcReg); + .addReg(SrcReg) + .setMIFlags(MI.getFlags()); MI.eraseFromParent(); return MBB; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 23cdcc72bc42..defcaa6eb6eb 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -245,7 +245,7 @@ enum NodeType : unsigned { VICMPHS, VICMPHLS, - // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1 + // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1 // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and // greater than" and VFCMPHE for "ordered and greater than or equal to". VFCMPE, @@ -290,6 +290,24 @@ enum NodeType : unsigned { // Operand 1: the bit mask TDC, + // Strict variants of scalar floating-point comparisons. + // Quiet and signaling versions. + STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCMPS, + + // Strict variants of vector floating-point comparisons. + // Quiet and signaling versions. + STRICT_VFCMPE, + STRICT_VFCMPH, + STRICT_VFCMPHE, + STRICT_VFCMPES, + STRICT_VFCMPHS, + STRICT_VFCMPHES, + + // Strict variants of VEXTEND and VROUND. + STRICT_VEXTEND, + STRICT_VROUND, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_<op>. // @@ -404,7 +422,8 @@ public: bool isCheapToSpeculateCtlz() const override { return true; } EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override; - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; bool isLegalICmpImmediate(int64_t Imm) const override; @@ -530,11 +549,15 @@ private: // Implement LowerOperation for individual opcodes. SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue CmpOp0, SDValue CmpOp1) const; + SDValue CmpOp0, SDValue CmpOp1, SDValue Chain) const; SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, - SDValue CmpOp0, SDValue CmpOp1) const; + SDValue CmpOp0, SDValue CmpOp1, + SDValue Chain = SDValue(), + bool IsSignaling = false) const; SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG, + bool IsSignaling) const; SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 9c95e8aec940..6d03274fe8a6 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -221,13 +221,13 @@ let Predicates = [FeatureNoVectorEnhancements1] in { // Convert a signed integer register value to a floating-point one. let Uses = [FPC], mayRaiseFPException = 1 in { - def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; - def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>; - def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>; + def CEFBR : UnaryRRE<"cefbr", 0xB394, any_sint_to_fp, FP32, GR32>; + def CDFBR : UnaryRRE<"cdfbr", 0xB395, any_sint_to_fp, FP64, GR32>; + def CXFBR : UnaryRRE<"cxfbr", 0xB396, any_sint_to_fp, FP128, GR32>; - def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>; - def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>; - def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>; + def CEGBR : UnaryRRE<"cegbr", 0xB3A4, any_sint_to_fp, FP32, GR64>; + def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, any_sint_to_fp, FP64, GR64>; + def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, any_sint_to_fp, FP128, GR64>; } // The FP extension feature provides versions of the above that allow @@ -254,13 +254,13 @@ let Predicates = [FeatureFPExtension] in { def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>; } - def : Pat<(f32 (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>; - def : Pat<(f64 (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>; - def : Pat<(f128 (uint_to_fp GR32:$src)), (CXLFBR 0, GR32:$src, 0)>; + def : Pat<(f32 (any_uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>; + def : Pat<(f64 (any_uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>; + def : Pat<(f128 (any_uint_to_fp GR32:$src)), (CXLFBR 0, GR32:$src, 0)>; - def : Pat<(f32 (uint_to_fp GR64:$src)), (CELGBR 0, GR64:$src, 0)>; - def : Pat<(f64 (uint_to_fp GR64:$src)), (CDLGBR 0, GR64:$src, 0)>; - def : Pat<(f128 (uint_to_fp GR64:$src)), (CXLGBR 0, GR64:$src, 0)>; + def : Pat<(f32 (any_uint_to_fp GR64:$src)), (CELGBR 0, GR64:$src, 0)>; + def : Pat<(f64 (any_uint_to_fp GR64:$src)), (CDLGBR 0, GR64:$src, 0)>; + def : Pat<(f128 (any_uint_to_fp GR64:$src)), (CXLGBR 0, GR64:$src, 0)>; } // Convert a floating-point register value to a signed integer value, @@ -467,16 +467,16 @@ let Uses = [FPC], mayRaiseFPException = 1 in { // f64 multiplication of two FP32 registers. let Uses = [FPC], mayRaiseFPException = 1 in def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; -def : Pat<(any_fmul (f64 (fpextend FP32:$src1)), - (f64 (fpextend FP32:$src2))), +def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)), + (f64 (any_fpextend FP32:$src2))), (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), FP32:$src2)>; // f64 multiplication of an FP32 register and an f32 memory. let Uses = [FPC], mayRaiseFPException = 1 in def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; -def : Pat<(any_fmul (f64 (fpextend FP32:$src1)), - (f64 (extloadf32 bdxaddr12only:$addr))), +def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)), + (f64 (any_extloadf32 bdxaddr12only:$addr))), (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), bdxaddr12only:$addr)>; @@ -484,8 +484,8 @@ def : Pat<(any_fmul (f64 (fpextend FP32:$src1)), let Uses = [FPC], mayRaiseFPException = 1 in def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; let Predicates = [FeatureNoVectorEnhancements1] in - def : Pat<(any_fmul (f128 (fpextend FP64:$src1)), - (f128 (fpextend FP64:$src2))), + def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)), + (f128 (any_fpextend FP64:$src2))), (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), FP64:$src2)>; @@ -493,8 +493,8 @@ let Predicates = [FeatureNoVectorEnhancements1] in let Uses = [FPC], mayRaiseFPException = 1 in def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; let Predicates = [FeatureNoVectorEnhancements1] in - def : Pat<(any_fmul (f128 (fpextend FP64:$src1)), - (f128 (extloadf64 bdxaddr12only:$addr))), + def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)), + (f128 (any_extloadf64 bdxaddr12only:$addr))), (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), bdxaddr12only:$addr)>; @@ -537,19 +537,19 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { //===----------------------------------------------------------------------===// let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in { - def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>; - def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>; - def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>; + def CEBR : CompareRRE<"cebr", 0xB309, z_any_fcmp, FP32, FP32>; + def CDBR : CompareRRE<"cdbr", 0xB319, z_any_fcmp, FP64, FP64>; + def CXBR : CompareRRE<"cxbr", 0xB349, z_any_fcmp, FP128, FP128>; - def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>; - def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>; + def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, load, 4>; + def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, load, 8>; - def KEBR : CompareRRE<"kebr", 0xB308, null_frag, FP32, FP32>; - def KDBR : CompareRRE<"kdbr", 0xB318, null_frag, FP64, FP64>; - def KXBR : CompareRRE<"kxbr", 0xB348, null_frag, FP128, FP128>; + def KEBR : CompareRRE<"kebr", 0xB308, z_strict_fcmps, FP32, FP32>; + def KDBR : CompareRRE<"kdbr", 0xB318, z_strict_fcmps, FP64, FP64>; + def KXBR : CompareRRE<"kxbr", 0xB348, z_strict_fcmps, FP128, FP128>; - def KEB : CompareRXE<"keb", 0xED08, null_frag, FP32, load, 4>; - def KDB : CompareRXE<"kdb", 0xED18, null_frag, FP64, load, 8>; + def KEB : CompareRXE<"keb", 0xED08, z_strict_fcmps, FP32, load, 4>; + def KDB : CompareRXE<"kdb", 0xED18, z_strict_fcmps, FP64, load, 8>; } // Test Data Class. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index c9dbe3da686d..f064d33ac2f3 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -75,8 +75,9 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, // SystemZ::CCMASK_*. bits<4> CCValues = 0; - // The subset of CCValues that have the same meaning as they would after - // a comparison of the first operand against zero. + // The subset of CCValues that have the same meaning as they would after a + // comparison of the first operand against zero. "Logical" instructions + // leave this blank as they set CC in a different way. bits<4> CompareZeroCCMask = 0; // True if the instruction is conditional and if the CC mask operand @@ -87,9 +88,16 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, bit CCMaskLast = 0; // True if the instruction is the "logical" rather than "arithmetic" form, - // in cases where a distinction exists. + // in cases where a distinction exists. Except for logical compares, if the + // instruction sets this flag along with a non-zero CCValues field, it is + // assumed to set CC to either CCMASK_LOGICAL_ZERO or + // CCMASK_LOGICAL_NONZERO. bit IsLogical = 0; + // True if the (add or sub) instruction sets CC like a compare of the + // result against zero, but only if the 'nsw' flag is set. + bit CCIfNoSignedWrap = 0; + let TSFlags{0} = SimpleBDXLoad; let TSFlags{1} = SimpleBDXStore; let TSFlags{2} = Has20BitOffset; @@ -101,6 +109,7 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, let TSFlags{18} = CCMaskFirst; let TSFlags{19} = CCMaskLast; let TSFlags{20} = IsLogical; + let TSFlags{21} = CCIfNoSignedWrap; } //===----------------------------------------------------------------------===// @@ -3200,6 +3209,8 @@ class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let CCMaskLast = 1; + let NumOpsKey = !subst("loc", "sel", mnemonic); + let NumOpsValue = "2"; } // Like CondBinaryRRF, but used for the raw assembly form. The condition-code @@ -3239,6 +3250,8 @@ class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1, [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3, cond4:$valid, cond4:$M4))]> { let CCMaskLast = 1; + let NumOpsKey = mnemonic; + let NumOpsValue = "3"; } // Like CondBinaryRRFa, but used for the raw assembly form. The condition-code @@ -4789,7 +4802,8 @@ class TestBinarySILPseudo<SDPatternOperator operator, ImmOpWithPattern imm> // Like CondBinaryRRF, but expanded after RA depending on the choice of // register. -class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2> +class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1, + RegisterOperand cls2> : Pseudo<(outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls1:$R1src, @@ -4797,17 +4811,21 @@ class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2> let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let CCMaskLast = 1; + let NumOpsKey = !subst("loc", "sel", mnemonic); + let NumOpsValue = "2"; } // Like CondBinaryRRFa, but expanded after RA depending on the choice of // register. -class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2, - RegisterOperand cls3> +class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1, + RegisterOperand cls2, RegisterOperand cls3> : Pseudo<(outs cls1:$R1), (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4), [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3, cond4:$valid, cond4:$M4))]> { let CCMaskLast = 1; + let NumOpsKey = mnemonic; + let NumOpsValue = "3"; } // Like CondBinaryRIE, but expanded after RA depending on the choice of diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index bc783608d45b..97c8fa7aa32e 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -765,8 +765,8 @@ bool SystemZInstrInfo::PredicateInstruction( void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DestReg, - unsigned SrcReg, bool KillSrc) const { + const DebugLoc &DL, MCRegister DestReg, + MCRegister SrcReg, bool KillSrc) const { // Split 128-bit GPR moves into two 64-bit moves. Add implicit uses of the // super register in case one of the subregs is undefined. // This handles ADDR128 too. @@ -791,12 +791,12 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Move 128-bit floating-point values between VR128 and FP128. if (SystemZ::VR128BitRegClass.contains(DestReg) && SystemZ::FP128BitRegClass.contains(SrcReg)) { - unsigned SrcRegHi = - RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64), - SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); - unsigned SrcRegLo = - RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64), - SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); + MCRegister SrcRegHi = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64), + SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); + MCRegister SrcRegLo = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64), + SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg) .addReg(SrcRegHi, getKillRegState(KillSrc)) @@ -805,12 +805,12 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } if (SystemZ::FP128BitRegClass.contains(DestReg) && SystemZ::VR128BitRegClass.contains(SrcReg)) { - unsigned DestRegHi = - RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64), - SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); - unsigned DestRegLo = - RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64), - SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); + MCRegister DestRegHi = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64), + SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); + MCRegister DestRegLo = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64), + SystemZ::subreg_h64, &SystemZ::VR128BitRegClass); if (DestRegHi != SrcReg) copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false); @@ -945,6 +945,12 @@ static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) { } } +static void transferMIFlag(MachineInstr *OldMI, MachineInstr *NewMI, + MachineInstr::MIFlag Flag) { + if (OldMI->getFlag(Flag)) + NewMI->setFlag(Flag); +} + MachineInstr *SystemZInstrInfo::convertToThreeAddress( MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { MachineBasicBlock *MBB = MI.getParent(); @@ -1050,6 +1056,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( .addImm(0) .addImm(MI.getOperand(2).getImm()); transferDeadCC(&MI, BuiltMI); + transferMIFlag(&MI, BuiltMI, MachineInstr::NoSWrap); return BuiltMI; } @@ -1200,6 +1207,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); transferDeadCC(&MI, MIB); + transferMIFlag(&MI, MIB, MachineInstr::NoSWrap); return MIB; } } @@ -1748,6 +1756,28 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value); } +bool SystemZInstrInfo::verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const { + const MCInstrDesc &MCID = MI.getDesc(); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + if (I >= MCID.getNumOperands()) + break; + const MachineOperand &Op = MI.getOperand(I); + const MCOperandInfo &MCOI = MCID.OpInfo[I]; + // Addressing modes have register and immediate operands. Op should be a + // register (or frame index) operand if MCOI.RegClass contains a valid + // register class, or an immediate otherwise. + if (MCOI.OperandType == MCOI::OPERAND_MEMORY && + ((MCOI.RegClass != -1 && !Op.isReg() && !Op.isFI()) || + (MCOI.RegClass == -1 && !Op.isImm()))) { + ErrInfo = "Addressing mode operands corrupt!"; + return false; + } + } + + return true; +} + bool SystemZInstrInfo:: areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const { diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 6dc6e72aa52a..8391970c7d9d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -46,7 +46,8 @@ enum { CompareZeroCCMaskShift = 14, CCMaskFirst = (1 << 18), CCMaskLast = (1 << 19), - IsLogical = (1 << 20) + IsLogical = (1 << 20), + CCIfNoSignedWrap = (1 << 21) }; static inline unsigned getAccessSize(unsigned int Flags) { @@ -242,7 +243,7 @@ public: bool PredicateInstruction(MachineInstr &MI, ArrayRef<MachineOperand> Pred) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -322,6 +323,10 @@ public: MachineBasicBlock::iterator MBBI, unsigned Reg, uint64_t Value) const; + // Perform target specific instruction verification. + bool verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const override; + // Sometimes, it is possible for the target to tell, even without // aliasing information, that two MIs access different memory // addresses. This function returns true if two MIs access different diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 8b334756611a..9579dcc0d1b6 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -492,7 +492,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in { let isCommutable = 1 in { // Expands to SELR or SELFHR or a branch-and-move sequence, // depending on the choice of registers. - def SELRMux : CondBinaryRRFaPseudo<GRX32, GRX32, GRX32>; + def SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>; defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>; defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>; defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>; @@ -525,7 +525,7 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { let isCommutable = 1 in { // Expands to LOCR or LOCFHR or a branch-and-move sequence, // depending on the choice of registers. - def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>; + def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>; defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>; } @@ -915,7 +915,7 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm), //===----------------------------------------------------------------------===// // Addition producing a signed overflow flag. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { +let Defs = [CC], CCValues = 0xF, CCIfNoSignedWrap = 1 in { // Addition of a register. let isCommutable = 1 in { defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>; @@ -957,7 +957,7 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { defm : SXB<z_sadd, GR64, AGFR>; // Addition producing a carry. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xF, IsLogical = 1 in { // Addition of a register. let isCommutable = 1 in { defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uadd, GR32, GR32>; @@ -997,7 +997,7 @@ let Defs = [CC] in { defm : ZXB<z_uadd, GR64, ALGFR>; // Addition producing and using a carry. -let Defs = [CC], Uses = [CC] in { +let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in { // Addition of a register. def ALCR : BinaryRRE<"alcr", 0xB998, z_addcarry, GR32, GR32>; def ALCGR : BinaryRRE<"alcgr", 0xB988, z_addcarry, GR64, GR64>; @@ -1017,7 +1017,8 @@ def ALSIHN : BinaryRIL<"alsihn", 0xCCB, null_frag, GRH32, simm32>, //===----------------------------------------------------------------------===// // Subtraction producing a signed overflow flag. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8, + CCIfNoSignedWrap = 1 in { // Subtraction of a register. defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>; def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; @@ -1066,7 +1067,7 @@ def : Pat<(z_saddo GR64:$src1, imm64lf32n:$src2), (SGR GR64:$src1, (LLILF imm64lf32n:$src2))>; // Subtraction producing a carry. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0x7, IsLogical = 1 in { // Subtraction of a register. defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usub, GR32, GR32>; def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; @@ -1104,7 +1105,7 @@ def : Pat<(add GR64:$src1, imm64zx32n:$src2), (SLGFI GR64:$src1, imm64zx32n:$src2)>; // Subtraction producing and using a carry. -let Defs = [CC], Uses = [CC] in { +let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in { // Subtraction of a register. def SLBR : BinaryRRE<"slbr", 0xB999, z_subcarry, GR32, GR32>; def SLBGR : BinaryRRE<"slbgr", 0xB989, z_subcarry, GR64, GR64>; @@ -2069,7 +2070,7 @@ let Predicates = [FeatureProcessorAssist] in { def PPA : SideEffectTernaryRRFc<"ppa", 0xB2E8, GR64, GR64, imm32zx4>; def : Pat<(int_s390_ppa_txassist GR32:$src), (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), - 0, 1)>; + zero_reg, 1)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 02364bbda5c1..c945122ee577 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1034,7 +1034,7 @@ let Predicates = [FeatureVector] in { def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; } - def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>; + def : FPConversion<VCDGB, any_sint_to_fp, v128db, v128g, 0, 0>; let Predicates = [FeatureVectorEnhancements2] in { let Uses = [FPC], mayRaiseFPException = 1 in { let isAsmParserOnly = 1 in @@ -1042,7 +1042,7 @@ let Predicates = [FeatureVector] in { def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>; def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>; } - def : FPConversion<VCEFB, sint_to_fp, v128sb, v128f, 0, 0>; + def : FPConversion<VCEFB, any_sint_to_fp, v128sb, v128f, 0, 0>; } // Convert from logical. @@ -1051,7 +1051,7 @@ let Predicates = [FeatureVector] in { def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; } - def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>; + def : FPConversion<VCDLGB, any_uint_to_fp, v128db, v128g, 0, 0>; let Predicates = [FeatureVectorEnhancements2] in { let Uses = [FPC], mayRaiseFPException = 1 in { let isAsmParserOnly = 1 in @@ -1059,7 +1059,7 @@ let Predicates = [FeatureVector] in { def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>; def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>; } - def : FPConversion<VCELFB, uint_to_fp, v128sb, v128f, 0, 0>; + def : FPConversion<VCELFB, any_uint_to_fp, v128sb, v128f, 0, 0>; } // Convert to fixed. @@ -1134,7 +1134,7 @@ let Predicates = [FeatureVector] in { // Load lengthened. let Uses = [FPC], mayRaiseFPException = 1 in { def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; - def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>; def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>; } let Predicates = [FeatureVectorEnhancements1] in { @@ -1156,7 +1156,7 @@ let Predicates = [FeatureVector] in { def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; } - def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; + def : Pat<(v4f32 (z_any_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>; let Predicates = [FeatureVectorEnhancements1] in { let Uses = [FPC], mayRaiseFPException = 1 in { @@ -1175,7 +1175,7 @@ let Predicates = [FeatureVector] in { // Maximum. multiclass VectorMax<Instruction insn, TypedReg tr> { def : FPMinMax<insn, any_fmaxnum, tr, 4>; - def : FPMinMax<insn, fmaximum, tr, 1>; + def : FPMinMax<insn, any_fmaximum, tr, 1>; } let Predicates = [FeatureVectorEnhancements1] in { let Uses = [FPC], mayRaiseFPException = 1 in { @@ -1201,7 +1201,7 @@ let Predicates = [FeatureVector] in { // Minimum. multiclass VectorMin<Instruction insn, TypedReg tr> { def : FPMinMax<insn, any_fminnum, tr, 4>; - def : FPMinMax<insn, fminimum, tr, 1>; + def : FPMinMax<insn, any_fminimum, tr, 1>; } let Predicates = [FeatureVectorEnhancements1] in { let Uses = [FPC], mayRaiseFPException = 1 in { @@ -1364,32 +1364,32 @@ let Predicates = [FeatureVector] in { // Compare scalar. let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; - def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; + def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { - def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>; - def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>; + def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2>; + def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>; } } // Compare and signal scalar. let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; - def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; + def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { - def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>; - def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>; + def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2>; + def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, z_strict_fcmps, v128xb, 4>; } } // Compare equal. let Uses = [FPC], mayRaiseFPException = 1 in { def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>; - defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, + defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_any_vfcmpe, z_vfcmpes, v128g, v128db, 3, 0>; defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes, + defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_any_vfcmpe, z_vfcmpes, v128f, v128sb, 2, 0>; defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, v32f, v32sb, 2, 8>; @@ -1401,11 +1401,11 @@ let Predicates = [FeatureVector] in { // Compare and signal equal. let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureVectorEnhancements1] in { - defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag, + defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, z_strict_vfcmpes, null_frag, v128g, v128db, 3, 4>; defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 12>; - defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, null_frag, null_frag, + defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, z_strict_vfcmpes, null_frag, v128f, v128sb, 2, 4>; defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag, v32f, v32sb, 2, 12>; @@ -1416,12 +1416,12 @@ let Predicates = [FeatureVector] in { // Compare high. let Uses = [FPC], mayRaiseFPException = 1 in { def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; - defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, + defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_any_vfcmph, z_vfcmphs, v128g, v128db, 3, 0>; defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs, + defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_any_vfcmph, z_vfcmphs, v128f, v128sb, 2, 0>; defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, v32f, v32sb, 2, 8>; @@ -1433,11 +1433,11 @@ let Predicates = [FeatureVector] in { // Compare and signal high. let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureVectorEnhancements1] in { - defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag, + defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, z_strict_vfcmphs, null_frag, v128g, v128db, 3, 4>; defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 12>; - defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, null_frag, null_frag, + defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, z_strict_vfcmphs, null_frag, v128f, v128sb, 2, 4>; defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag, v32f, v32sb, 2, 12>; @@ -1448,12 +1448,12 @@ let Predicates = [FeatureVector] in { // Compare high or equal. let Uses = [FPC], mayRaiseFPException = 1 in { def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; - defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, + defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_any_vfcmphe, z_vfcmphes, v128g, v128db, 3, 0>; defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes, + defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_any_vfcmphe, z_vfcmphes, v128f, v128sb, 2, 0>; defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, v32f, v32sb, 2, 8>; @@ -1465,11 +1465,11 @@ let Predicates = [FeatureVector] in { // Compare and signal high or equal. let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureVectorEnhancements1] in { - defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag, + defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, z_strict_vfcmphes, null_frag, v128g, v128db, 3, 4>; defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 12>; - defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, null_frag, null_frag, + defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, z_strict_vfcmphes, null_frag, v128f, v128sb, 2, 4>; defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag, v32f, v32sb, 2, 12>; diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp index 724111229569..b1964321c78a 100644 --- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -162,7 +162,7 @@ private: void relaxBranches(); const SystemZInstrInfo *TII = nullptr; - MachineFunction *MF; + MachineFunction *MF = nullptr; SmallVector<MBBInfo, 16> MBBs; SmallVector<TerminatorInfo, 16> Terminators; }; diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index 9eec3f37bc28..d1f6511ceea3 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -13,10 +13,22 @@ namespace llvm { +namespace SystemZ { +// A struct to hold the low and high GPR registers to be saved/restored as +// well as the offset into the register save area of the low register. +struct GPRRegs { + unsigned LowGPR; + unsigned HighGPR; + unsigned GPROffset; + GPRRegs() : LowGPR(0), HighGPR(0), GPROffset(0) {} + }; +} + class SystemZMachineFunctionInfo : public MachineFunctionInfo { virtual void anchor(); - unsigned LowSavedGPR; - unsigned HighSavedGPR; + + SystemZ::GPRRegs SpillGPRRegs; + SystemZ::GPRRegs RestoreGPRRegs; unsigned VarArgsFirstGPR; unsigned VarArgsFirstFPR; unsigned VarArgsFrameIndex; @@ -27,19 +39,29 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) - : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), - VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), - ManipulatesSP(false), NumLocalDynamics(0) {} - - // Get and set the first call-saved GPR that should be saved and restored - // by this function. This is 0 if no GPRs need to be saved or restored. - unsigned getLowSavedGPR() const { return LowSavedGPR; } - void setLowSavedGPR(unsigned Reg) { LowSavedGPR = Reg; } - - // Get and set the last call-saved GPR that should be saved and restored - // by this function. - unsigned getHighSavedGPR() const { return HighSavedGPR; } - void setHighSavedGPR(unsigned Reg) { HighSavedGPR = Reg; } + : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), + RegSaveFrameIndex(0), FramePointerSaveIndex(0), ManipulatesSP(false), + NumLocalDynamics(0) {} + + // Get and set the first and last call-saved GPR that should be saved by + // this function and the SP offset for the STMG. These are 0 if no GPRs + // need to be saved or restored. + SystemZ::GPRRegs getSpillGPRRegs() const { return SpillGPRRegs; } + void setSpillGPRRegs(unsigned Low, unsigned High, unsigned Offs) { + SpillGPRRegs.LowGPR = Low; + SpillGPRRegs.HighGPR = High; + SpillGPRRegs.GPROffset = Offs; + } + + // Get and set the first and last call-saved GPR that should be restored by + // this function and the SP offset for the LMG. These are 0 if no GPRs + // need to be saved or restored. + SystemZ::GPRRegs getRestoreGPRRegs() const { return RestoreGPRRegs; } + void setRestoreGPRRegs(unsigned Low, unsigned High, unsigned Offs) { + RestoreGPRRegs.LowGPR = Low; + RestoreGPRRegs.HighGPR = High; + RestoreGPRRegs.GPROffset = Offs; + } // Get and set the number of fixed (as opposed to variable) arguments // that are passed in GPRs to this function. diff --git a/llvm/lib/Target/SystemZ/SystemZOperands.td b/llvm/lib/Target/SystemZ/SystemZOperands.td index b2bab68a6274..bd40f6d7bf40 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperands.td +++ b/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -25,6 +25,7 @@ class ImmediateOp<ValueType vt, string asmop> : Operand<vt> { let PrintMethod = "print"##asmop##"Operand"; let DecoderMethod = "decode"##asmop##"Operand"; let ParserMatchClass = !cast<AsmOperandClass>(asmop); + let OperandType = "OPERAND_IMMEDIATE"; } class ImmOpWithPattern<ValueType vt, string asmop, code pred, SDNodeXForm xform, @@ -63,13 +64,15 @@ class PCRelTLSAsmOperand<string size> // Constructs an operand for a PC-relative address with address type VT. // ASMOP is the associated asm operand. -class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { - let PrintMethod = "printPCRelOperand"; - let ParserMatchClass = asmop; -} -class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { - let PrintMethod = "printPCRelTLSOperand"; - let ParserMatchClass = asmop; +let OperandType = "OPERAND_PCREL" in { + class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { + let PrintMethod = "printPCRelOperand"; + let ParserMatchClass = asmop; + } + class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { + let PrintMethod = "printPCRelTLSOperand"; + let ParserMatchClass = asmop; + } } // Constructs both a DAG pattern and instruction operand for a PC-relative @@ -105,6 +108,7 @@ class AddressOperand<string bitsize, string dispsize, string length, let EncoderMethod = "get"##format##dispsize##length##"Encoding"; let DecoderMethod = "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; + let OperandType = "OPERAND_MEMORY"; let MIOperandInfo = operands; let ParserMatchClass = !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length); @@ -508,7 +512,8 @@ defm imm64zx48 : Immediate<i64, [{ return isUInt<64>(N->getZExtValue()); }], UIMM48, "U48Imm">; -def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>; +let OperandType = "OPERAND_IMMEDIATE" in + def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>; //===----------------------------------------------------------------------===// // Floating-point immediates @@ -657,4 +662,5 @@ def bdvaddr12only : BDVMode< "64", "12">; def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>, Operand<i32> { let PrintMethod = "printCond4Operand"; + let OperandType = "OPERAND_IMMEDIATE"; } diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 6fe383e64b74..a6a72903e573 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -258,6 +258,10 @@ def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>; def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>; def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>; +def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp, + [SDNPHasChain]>; +def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp, + [SDNPHasChain]>; def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>; def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, [SDNPHasChain]>; @@ -328,13 +332,29 @@ def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinaryCC>; def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinaryCC>; def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinaryCC>; def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; +def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; +def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS", + SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; +def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>; def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>; def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>; def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; +def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; +def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>; def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>; def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>; @@ -707,6 +727,26 @@ def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3), // Floating-point negative absolute. def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; +// Strict floating-point fragments. +def z_any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_fcmp node:$lhs, node:$rhs), + (z_fcmp node:$lhs, node:$rhs)]>; +def z_any_vfcmpe : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmpe node:$lhs, node:$rhs), + (z_vfcmpe node:$lhs, node:$rhs)]>; +def z_any_vfcmph : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmph node:$lhs, node:$rhs), + (z_vfcmph node:$lhs, node:$rhs)]>; +def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmphe node:$lhs, node:$rhs), + (z_vfcmphe node:$lhs, node:$rhs)]>; +def z_any_vextend : PatFrags<(ops node:$src), + [(z_strict_vextend node:$src), + (z_vextend node:$src)]>; +def z_any_vround : PatFrags<(ops node:$src), + [(z_strict_vround node:$src), + (z_vround node:$src)]>; + // Create a unary operator that loads from memory and then performs // the given operation on it. class loadu<SDPatternOperator operator, SDPatternOperator load = load> diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td index 65300fb47627..501a69488397 100644 --- a/llvm/lib/Target/SystemZ/SystemZPatterns.td +++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -148,9 +148,9 @@ multiclass BlockLoadStore<SDPatternOperator load, ValueType vt, // registers in CLS against zero. The instruction has separate R1 and R2 // operands, but they must be the same when the instruction is used like this. multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> { - def : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; + def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; // The sign of the zero makes no difference. - def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>; + def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>; } // Use INSN for performing binary operation OPERATION of type VT diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 39ace5594b7f..0d5e7af92523 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -87,6 +87,52 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); + if (VRM != nullptr) { + // Add any two address hints after any copy hints. + SmallSet<unsigned, 4> TwoAddrHints; + for (auto &Use : MRI->reg_nodbg_instructions(VirtReg)) + if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) { + const MachineOperand *VRRegMO = nullptr; + const MachineOperand *OtherMO = nullptr; + const MachineOperand *CommuMO = nullptr; + if (VirtReg == Use.getOperand(0).getReg()) { + VRRegMO = &Use.getOperand(0); + OtherMO = &Use.getOperand(1); + if (Use.isCommutable()) + CommuMO = &Use.getOperand(2); + } else if (VirtReg == Use.getOperand(1).getReg()) { + VRRegMO = &Use.getOperand(1); + OtherMO = &Use.getOperand(0); + } else if (VirtReg == Use.getOperand(2).getReg() && + Use.isCommutable()) { + VRRegMO = &Use.getOperand(2); + OtherMO = &Use.getOperand(0); + } else + continue; + + auto tryAddHint = [&](const MachineOperand *MO) -> void { + Register Reg = MO->getReg(); + Register PhysReg = + Register::isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); + if (PhysReg) { + if (MO->getSubReg()) + PhysReg = getSubReg(PhysReg, MO->getSubReg()); + if (VRRegMO->getSubReg()) + PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(), + MRI->getRegClass(VirtReg)); + if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg)) + TwoAddrHints.insert(PhysReg); + } + }; + tryAddHint(OtherMO); + if (CommuMO) + tryAddHint(CommuMO); + } + for (MCPhysReg OrderReg : Order) + if (TwoAddrHints.count(OrderReg)) + Hints.push_back(OrderReg); + } + if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) { SmallVector<unsigned, 8> Worklist; SmallSet<unsigned, 4> DoneRegs; @@ -143,58 +189,14 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, } } - if (VRM == nullptr) - return BaseImplRetVal; - - // Add any two address hints after any copy hints. - SmallSet<unsigned, 4> TwoAddrHints; - for (auto &Use : MRI->reg_nodbg_instructions(VirtReg)) - if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) { - const MachineOperand *VRRegMO = nullptr; - const MachineOperand *OtherMO = nullptr; - const MachineOperand *CommuMO = nullptr; - if (VirtReg == Use.getOperand(0).getReg()) { - VRRegMO = &Use.getOperand(0); - OtherMO = &Use.getOperand(1); - if (Use.isCommutable()) - CommuMO = &Use.getOperand(2); - } else if (VirtReg == Use.getOperand(1).getReg()) { - VRRegMO = &Use.getOperand(1); - OtherMO = &Use.getOperand(0); - } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) { - VRRegMO = &Use.getOperand(2); - OtherMO = &Use.getOperand(0); - } else - continue; - - auto tryAddHint = [&](const MachineOperand *MO) -> void { - Register Reg = MO->getReg(); - Register PhysReg = - Register::isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); - if (PhysReg) { - if (MO->getSubReg()) - PhysReg = getSubReg(PhysReg, MO->getSubReg()); - if (VRRegMO->getSubReg()) - PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(), - MRI->getRegClass(VirtReg)); - if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg)) - TwoAddrHints.insert(PhysReg); - } - }; - tryAddHint(OtherMO); - if (CommuMO) - tryAddHint(CommuMO); - } - for (MCPhysReg OrderReg : Order) - if (TwoAddrHints.count(OrderReg)) - Hints.push_back(OrderReg); - return BaseImplRetVal; } const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>(); + if (MF->getFunction().getCallingConv() == CallingConv::GHC) + return CSR_SystemZ_NoRegs_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::AnyReg) return Subtarget.hasVector()? CSR_SystemZ_AllRegs_Vector_SaveList : CSR_SystemZ_AllRegs_SaveList; @@ -209,6 +211,8 @@ const uint32_t * SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + if (CC == CallingConv::GHC) + return CSR_SystemZ_NoRegs_RegMask; if (CC == CallingConv::AnyReg) return Subtarget.hasVector()? CSR_SystemZ_AllRegs_Vector_RegMask : CSR_SystemZ_AllRegs_RegMask; diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index 2aca22c9082a..f6184cec795a 100644 --- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -46,7 +46,6 @@ private: bool shortenOn001(MachineInstr &MI, unsigned Opcode); bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode); bool shortenFPConv(MachineInstr &MI, unsigned Opcode); - bool shortenSelect(MachineInstr &MI, unsigned Opcode); const SystemZInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -176,23 +175,6 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { return false; } -// MI is a three-operand select instruction. If one of the sources match -// the destination, convert to the equivalent load-on-condition. -bool SystemZShortenInst::shortenSelect(MachineInstr &MI, unsigned Opcode) { - if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { - MI.setDesc(TII->get(Opcode)); - MI.tieOperands(0, 1); - return true; - } - if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) { - TII->commuteInstruction(MI, false, 1, 2); - MI.setDesc(TII->get(Opcode)); - MI.tieOperands(0, 1); - return true; - } - return false; -} - // Process all instructions in MBB. Return true if something changed. bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { bool Changed = false; @@ -213,18 +195,6 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH); break; - case SystemZ::SELR: - Changed |= shortenSelect(MI, SystemZ::LOCR); - break; - - case SystemZ::SELFHR: - Changed |= shortenSelect(MI, SystemZ::LOCFHR); - break; - - case SystemZ::SELGR: - Changed |= shortenSelect(MI, SystemZ::LOCGR); - break; - case SystemZ::WFADB: Changed |= shortenOn001AddCC(MI, SystemZ::ADBR); break; @@ -313,6 +283,14 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenOn01(MI, SystemZ::CEBR); break; + case SystemZ::WFKDB: + Changed |= shortenOn01(MI, SystemZ::KDBR); + break; + + case SystemZ::WFKSB: + Changed |= shortenOn01(MI, SystemZ::KEBR); + break; + case SystemZ::VL32: // For z13 we prefer LDE over LE to avoid partial register dependencies. Changed |= shortenOn0(MI, SystemZ::LDE32); diff --git a/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/llvm/lib/Target/SystemZ/SystemZTDC.cpp index 478848c30701..f103812eb096 100644 --- a/llvm/lib/Target/SystemZ/SystemZTDC.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTDC.cpp @@ -50,6 +50,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsS390.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include <deque> diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index 20865037fe38..dfcdb5356485 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -29,7 +29,7 @@ using namespace llvm; -extern "C" void LLVMInitializeSystemZTarget() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() { // Register the target. RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget()); } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 11c99aa11174..acec3c533585 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -62,7 +62,7 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { return 4 * TTI::TCC_Basic; } -int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, +int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); @@ -180,8 +180,8 @@ int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, return SystemZTTIImpl::getIntImmCost(Imm, Ty); } -int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { +int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -259,7 +259,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, } if (isa<StoreInst>(&I)) { Type *MemAccessTy = I.getOperand(0)->getType(); - NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0); + NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0); } } @@ -348,11 +348,10 @@ static unsigned getNumVectorRegs(Type *Ty) { } int SystemZTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, - TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, - TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, - ArrayRef<const Value *> Args) { + unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, + TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, + const Instruction *CxtI) { // TODO: return a good value for BB-VECTORIZER that includes the // immediate loads, which we do not want to count for the loop @@ -508,7 +507,7 @@ int SystemZTTIImpl::getArithmeticInstrCost( // Fallback to the default implementation. return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, - Opd1PropInfo, Opd2PropInfo, Args); + Opd1PropInfo, Opd2PropInfo, Args, CxtI); } int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, @@ -995,7 +994,7 @@ static bool isBswapIntrinsicCall(const Value *V) { } int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, unsigned AddressSpace, + MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I) { assert(!Src->isVoidTy() && "Invalid type"); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 3ba80b31439f..bc4d066881c1 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -40,9 +40,9 @@ public: int getIntImmCost(const APInt &Imm, Type *Ty); - int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); - int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); @@ -75,7 +75,8 @@ public: TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef<const Value *> Args = ArrayRef<const Value *>()); + ArrayRef<const Value *> Args = ArrayRef<const Value *>(), + const Instruction *CxtI = nullptr); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); @@ -87,7 +88,7 @@ public: const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); - int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, diff --git a/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp index 713a55ee8400..36291e079882 100644 --- a/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp +++ b/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp @@ -16,7 +16,7 @@ Target &llvm::getTheSystemZTarget() { return TheSystemZTarget; } -extern "C" void LLVMInitializeSystemZTargetInfo() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetInfo() { RegisterTarget<Triple::systemz, /*HasJIT=*/true> X( getTheSystemZTarget(), "systemz", "SystemZ", "SystemZ"); } |